From 720d752748b793a2f5cf3cc14cb75ad86e8919c0 Mon Sep 17 00:00:00 2001
From: sillylaird <sillylaird@fastmail.ca>
Date: Tue, 3 Feb 2026 21:27:57 -0500
Subject: First commit

---
 tools/README.md                                   |  10 +
 tools/__pycache__/translate_pages.cpython-312.pyc | Bin 0 -> 9270 bytes
 tools/backup.sh                                   |  13 ++
 tools/generate_sitemap.py                         |  70 ++++++
 tools/html_audit.py                               |  94 ++++++++
 tools/link_check.py                               |  94 ++++++++
 tools/translate_pages.py                          | 258 ++++++++++++++++++++++
 tools/uptime_check.sh                             |   6 +
 8 files changed, 545 insertions(+)
 create mode 100644 tools/README.md
 create mode 100644 tools/__pycache__/translate_pages.cpython-312.pyc
 create mode 100644 tools/backup.sh
 create mode 100644 tools/generate_sitemap.py
 create mode 100644 tools/html_audit.py
 create mode 100644 tools/link_check.py
 create mode 100644 tools/translate_pages.py
 create mode 100644 tools/uptime_check.sh

(limited to 'tools')

diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..e7cd71f
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,10 @@
+# Tools
+
+Small maintenance helpers. No generator required.
+
+- `python tools/link_check.py` — check internal href/src targets exist
+- `python tools/html_audit.py` — quick a11y/markup audit (ids, alt, iframe titles, rel=noopener)
+- `python tools/generate_sitemap.py` — rebuild `sitemap.xml` with lastmod dates
+- `python tools/translate_pages.py` — generate `*_zh.html` and `*_jp.html` (except `startpage/`)
+- `./tools/backup.sh` — create a tar.gz backup (set `BACKUP_DIR` to override destination)
+- `./tools/uptime_check.sh` — curl-based uptime check (set `URL` to override target)
diff --git a/tools/__pycache__/translate_pages.cpython-312.pyc b/tools/__pycache__/translate_pages.cpython-312.pyc
new file mode 100644
index 0000000..dd9d69e
Binary files /dev/null and b/tools/__pycache__/translate_pages.cpython-312.pyc differ
diff --git a/tools/backup.sh b/tools/backup.sh
new file mode 100644
index 0000000..36790bf
--- /dev/null
+++ b/tools/backup.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+BACKUP_DIR="${BACKUP_DIR:-/tmp/www.sillylaird.ca-backups}"
+
+mkdir -p "$BACKUP_DIR"
+TS=$(date -u +"%Y%m%d-%H%M%S")
+ARCHIVE="$BACKUP_DIR/www.sillylaird.ca-$TS.tar.gz"
+
+tar -czf "$ARCHIVE" -C "$ROOT_DIR" .
+
+echo "Backup written to $ARCHIVE"
diff --git a/tools/generate_sitemap.py b/tools/generate_sitemap.py
new file mode 100644
index 0000000..4d34cd7
--- /dev/null
+++ b/tools/generate_sitemap.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""Generate sitemap.xml from local HTML files."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from datetime import datetime, timezone
+import re
+
+ROOT = Path(__file__).resolve().parents[1]
+SITE = "https://www.sillylaird.ca"
+
+EXCLUDE_FILES = {
+    "404.html",
+    "50x.html",
+}
+EXCLUDE_DIRS = {"partials", ".git"}
+
+
+def should_skip(path: Path) -> bool:
+    if any(part in EXCLUDE_DIRS for part in path.parts):
+        return True
+    if path.name.endswith("~"):
+        return True
+    if path.name in EXCLUDE_FILES:
+        return True
+    return False
+
+
+def url_for_path(path: Path) -> str:
+    rel = path.relative_to(ROOT).as_posix()
+    if rel == "index.html":
+        return SITE + "/"
+    if rel.endswith("/index.html"):
+        return SITE + "/" + rel[:-len("index.html")]
+    return SITE + "/" + rel
+
+
+def lastmod_for_path(path: Path) -> str:
+    ts = path.stat().st_mtime
+    dt = datetime.fromtimestamp(ts, tz=timezone.utc)
+    return dt.strftime("%Y-%m-%d")
+
+
+def main() -> int:
+    urls = []
+    for html in ROOT.rglob("*.html"):
+        if should_skip(html):
+            continue
+        urls.append((url_for_path(html), lastmod_for_path(html)))
+
+    urls.sort(key=lambda item: item[0])
+
+    lines = [
+        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
+        '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
+    ]
+    for loc, lastmod in urls:
+        lines.append("  <url>")
+        lines.append(f"    <loc>{loc}</loc>")
+        lines.append(f"    <lastmod>{lastmod}</lastmod>")
+        lines.append("  </url>")
+    lines.append("</urlset>")
+
+    (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/html_audit.py b/tools/html_audit.py
new file mode 100644
index 0000000..4e85c36
--- /dev/null
+++ b/tools/html_audit.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""Lightweight HTML audit for common a11y/markup issues."""
+
+from __future__ import annotations
+
+from html.parser import HTMLParser
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+
+SKIP_DIRS = {"partials", ".git"}
+SKIP_FILES = {
+    "test.html",
+    "test_jp.html",
+    "test_zh.html",
+    "startpage/test.html",
+}
+
+
+class AuditParser(HTMLParser):
+    def __init__(self) -> None:
+        super().__init__()
+        self.ids: dict[str, int] = {}
+        self.duplicate_ids: set[str] = set()
+        self.missing_alt: list[str] = []
+        self.missing_iframe_title: list[str] = []
+        self.blank_rel: list[str] = []
+
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        attr_map = {k.lower(): (v or "") for k, v in attrs}
+
+        if "id" in attr_map:
+            ident = attr_map["id"]
+            if ident:
+                if ident in self.ids:
+                    self.duplicate_ids.add(ident)
+                self.ids[ident] = self.ids.get(ident, 0) + 1
+
+        if tag == "img":
+            if "alt" not in attr_map:
+                src = attr_map.get("src", "")
+                self.missing_alt.append(src)
+
+        if tag == "iframe":
+            if not attr_map.get("title", ""):
+                src = attr_map.get("src", "")
+                self.missing_iframe_title.append(src)
+
+        if tag == "a":
+            if attr_map.get("target", "") == "_blank":
+                rel = attr_map.get("rel", "")
+                if "noopener" not in rel:
+                    href = attr_map.get("href", "")
+                    self.blank_rel.append(href)
+
+
+def main() -> int:
+    issues = []
+
+    for html in ROOT.rglob("*.html"):
+        if any(part in SKIP_DIRS for part in html.parts):
+            continue
+        rel = html.relative_to(ROOT).as_posix()
+        if rel in SKIP_FILES:
+            continue
+
+        parser = AuditParser()
+        parser.feed(html.read_text(encoding="utf-8", errors="ignore"))
+
+        if parser.duplicate_ids:
+            issues.append((rel, "duplicate-ids", sorted(parser.duplicate_ids)))
+        if parser.missing_alt:
+            issues.append((rel, "img-missing-alt", parser.missing_alt))
+        if parser.missing_iframe_title:
+            issues.append((rel, "iframe-missing-title", parser.missing_iframe_title))
+        if parser.blank_rel:
+            issues.append((rel, "target-blank-missing-noopener", parser.blank_rel))
+
+    if not issues:
+        print("OK: no audit issues found")
+        return 0
+
+    print("HTML audit issues:")
+    for rel, kind, items in issues:
+        print(f"- {rel}: {kind}")
+        for item in items[:10]:
+            print(f"  - {item}")
+        if len(items) > 10:
+            print(f"  - ... ({len(items) - 10} more)")
+    return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/link_check.py b/tools/link_check.py
new file mode 100644
index 0000000..246eaf8
--- /dev/null
+++ b/tools/link_check.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""Very small internal link checker.
+
+Checks:
+- href="/path" and src="/path" for local files
+- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc.
+
+Usage:
+  python tools/link_check.py
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+SKIP_DIRS = {".git"}
+SKIP_FILES = {
+    "test.html",
+    "test_jp.html",
+    "test_zh.html",
+    "startpage/test.html",
+}
+
+RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I)
+
+
+def is_external(u: str) -> bool:
+    u = u.strip()
+    return (
+        u.startswith("http://")
+        or u.startswith("https://")
+        or u.startswith("mailto:")
+        or u.startswith("xmpp:")
+        or u.startswith("signal:")
+        or u.startswith("data:")
+        or u.startswith("javascript:")
+        or u.startswith("#")
+        or u.startswith("//")
+        or u.endswith(".onion/")
+        or ".onion" in u
+    )
+
+
+def normalize(p: Path, url: str) -> Path | None:
+    url = url.split("#", 1)[0].split("?", 1)[0].strip()
+    if not url:
+        return None
+    if is_external(url):
+        return None
+
+    if url.startswith("/"):
+        return (ROOT / url.lstrip("/")).resolve()
+
+    # relative
+    return (p.parent / url).resolve()
+
+
+def main() -> int:
+    missing = []
+    for html in ROOT.rglob("*.html"):
+        if any(part in SKIP_DIRS for part in html.parts):
+            continue
+        rel = html.relative_to(ROOT).as_posix()
+        if rel in SKIP_FILES:
+            continue
+        text = html.read_text(encoding="utf-8", errors="ignore")
+        for m in RE_URL.finditer(text):
+            url = m.group(2)
+            target = normalize(html, url)
+            if not target:
+                continue
+            # if it points to a directory, allow index.html
+            if target.is_dir():
+                if (target / "index.html").exists():
+                    continue
+            if not target.exists():
+                missing.append((str(html.relative_to(ROOT)), url))
+
+    if missing:
+        print("Missing local links:")
+        for src, url in missing:
+            print(f"- {src}: {url}")
+        return 1
+
+    print("OK: no missing local href/src found")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/translate_pages.py b/tools/translate_pages.py
new file mode 100644
index 0000000..3127d66
--- /dev/null
+++ b/tools/translate_pages.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""Generate zh/jp copies of all HTML pages (except startpage/).
+
+This is a best-effort, offline translation helper.
+
+- It copies each *.html to *_zh.html and *_jp.html (same directory).
+- It preserves all HTML structure, links, ids, classes.
+- It translates only user-visible text nodes and some common attributes.
+- It skips anything under "startpage/".
+
+Notes:
+- This is not a static site generator. It only writes additional files.
+- Translation quality depends on the dictionaries below.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+SKIP_DIRS = {
+    "startpage",
+    "mstartpage",
+    "partials",
+}
+
+
+# Tags whose text content should not be translated.
+SKIP_TAGS = {
+    "script",
+    "style",
+    "code",
+    "pre",
+    "kbd",
+    "samp",
+}
+
+
+# Very small phrase dictionaries (hand-tuned for this repo).
+# For anything not in the dictionary, we leave the text as-is.
+ZH = {
+    "Skip to content": "跳至内容",
+    "Menu": "菜单",
+    "Language": "语言",
+    "Home": "首页",
+    "StartPage": "StartPage",
+    "Blog": "博客",
+    "Guestbook": "留言板",
+    "Journal": "日志",
+    "Diary": "日记",
+    "Gaming": "游戏",
+    "Bookmarks": "书签",
+    "Accounts": "账户",
+    "Computers": "电脑设备",
+    "Contact": "联系",
+    "Welcome": "欢迎",
+    "My Current Vibe": "当前氛围",
+    "Music": "音乐",
+    "Current Blog": "当前博客",
+    "Changelog": "更新日志",
+    "Friends": "朋友们",
+    "Games": "游戏",
+    "Countries": "国家",
+    "Sponsors / VPNs / Buttons": "赞助商 / VPN / 按钮",
+    "Open guestbook": "打开留言板",
+    "Loading…": "加载中…",
+    "Loading...": "加载中…",
+    "Licensed under": "采用",
+    "site": "网站",
+    "Error": "错误",
+}
+
+
+JA = {
+    "Skip to content": "本文へ移動",
+    "Menu": "メニュー",
+    "Language": "言語",
+    "Home": "ホーム",
+    "StartPage": "StartPage",
+    "Blog": "ブログ",
+    "Guestbook": "ゲストブック",
+    "Journal": "ジャーナル",
+    "Diary": "日記",
+    "Gaming": "ゲーム",
+    "Bookmarks": "ブックマーク",
+    "Accounts": "アカウント",
+    "Computers": "コンピューター",
+    "Contact": "連絡先",
+    "Welcome": "ようこそ",
+    "My Current Vibe": "今の雰囲気",
+    "Music": "音楽",
+    "Current Blog": "現在のブログ",
+    "Changelog": "更新履歴",
+    "Friends": "友達",
+    "Games": "ゲーム",
+    "Countries": "国",
+    "Sponsors / VPNs / Buttons": "スポンサー / VPN / ボタン",
+    "Open guestbook": "ゲストブックを開く",
+    "Loading…": "読み込み中…",
+    "Loading...": "読み込み中…",
+    "Licensed under": "ライセンス:",
+    "Error": "エラー",
+}
+
+
+ATTR_TRANSLATE = {
+    "title",
+    "aria-label",
+    "aria-labelledby",  # generally ids; don't translate
+    "alt",
+    "placeholder",
+}
+
+
+RE_TAG = re.compile(r"(<[^>]+>)")
+RE_TEXT_NODE = re.compile(r"^(\s*)(.*?)(\s*)$", re.S)
+RE_ATTR = re.compile(r'(\s)([a-zA-Z_:.-]+)=("[^"]*"|\'[\s\S]*?\')')
+
+
+def should_skip_path(p: Path) -> bool:
+    rel = p.relative_to(ROOT)
+    parts = set(rel.parts)
+    return any(d in parts for d in SKIP_DIRS)
+
+
+def translate_phrase(s: str, mapping: dict[str, str]) -> str:
+    # Exact match first
+    if s in mapping:
+        return mapping[s]
+
+    # Replace common UI tokens inside longer strings (simple, conservative)
+    out = s
+    for k, v in mapping.items():
+        if k and k in out:
+            out = out.replace(k, v)
+    return out
+
+
+def translate_text_node(text: str, mapping: dict[str, str]) -> str:
+    m = RE_TEXT_NODE.match(text)
+    if not m:
+        return text
+    lead, core, tail = m.group(1), m.group(2), m.group(3)
+
+    # Skip empty or purely whitespace
+    if not core.strip():
+        return text
+
+    # Skip if it's just punctuation/symbols
+    if not re.search(r"[A-Za-z]", core):
+        return text
+
+    translated = translate_phrase(core, mapping)
+    return f"{lead}{translated}{tail}"
+
+
+def tag_name(tag: str) -> str | None:
+    # tag is like <div ...> or </div>
+    t = tag.strip()[1:-1].strip()
+    if not t:
+        return None
+    if t.startswith("!") or t.startswith("?"):
+        return None
+    if t.startswith("/"):
+        t = t[1:].lstrip()
+    name = re.split(r"\s+", t, maxsplit=1)[0].lower()
+    return name
+
+
+def translate_attrs(tag: str, mapping: dict[str, str]) -> str:
+    # Don't touch aria-labelledby since it's usually an id.
+    def repl(m: re.Match[str]) -> str:
+        space, key, val = m.group(1), m.group(2), m.group(3)
+        k = key.lower()
+        if k not in ATTR_TRANSLATE or k == "aria-labelledby":
+            return m.group(0)
+        quote = val[0]
+        inner = val[1:-1]
+        new_inner = translate_phrase(inner, mapping)
+        if new_inner == inner:
+            return m.group(0)
+        return f"{space}{key}={quote}{new_inner}{quote}"
+
+    return RE_ATTR.sub(repl, tag)
+
+
+def translate_html(src: str, mapping: dict[str, str]) -> str:
+    parts = RE_TAG.split(src)
+    out: list[str] = []
+
+    skip_depth = 0
+    for part in parts:
+        if part.startswith("<") and part.endswith(">"):
+            name = tag_name(part)
+
+            # track skip tags nesting
+            if name in SKIP_TAGS:
+                if part.lstrip().startswith("</"):
+                    if skip_depth > 0:
+                        skip_depth -= 1
+                else:
+                    skip_depth += 1
+
+            out.append(translate_attrs(part, mapping))
+        else:
+            if skip_depth > 0:
+                out.append(part)
+            else:
+                out.append(translate_text_node(part, mapping))
+
+    return "".join(out)
+
+
+def write_if_changed(path: Path, content: str) -> None:
+    old = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else None
+    if old == content:
+        return
+    path.write_text(content, encoding="utf-8")
+
+
+def main() -> int:
+    html_files = sorted(ROOT.rglob("*.html"))
+    for p in html_files:
+        if should_skip_path(p):
+            continue
+
+        # Skip already translated files
+        if p.name.endswith("_zh.html") or p.name.endswith("_jp.html"):
+            continue
+
+        # Only translate pages that look like they are part of the unified site
+        # (Keep legacy old HTML alone unless user explicitly wants all)
+        src = p.read_text(encoding="utf-8", errors="ignore")
+
+        # Output names
+        zh_path = p.with_name(p.stem + "_zh.html")
+        jp_path = p.with_name(p.stem + "_jp.html")
+
+        zh = translate_html(src, ZH)
+        jp = translate_html(src, JA)
+
+        # Set lang attribute if present
+        zh = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="zh"', zh, count=1)
+        jp = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="ja"', jp, count=1)
+
+        write_if_changed(zh_path, zh)
+        write_if_changed(jp_path, jp)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/uptime_check.sh b/tools/uptime_check.sh
new file mode 100644
index 0000000..0bbc6e8
--- /dev/null
+++ b/tools/uptime_check.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+URL="${URL:-https://www.sillylaird.ca/}"
+
+curl -fsS -o /dev/null -w "HTTP %{http_code} in %{time_total}s\n" "$URL"
-- 
cgit v1.2.3