summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorsillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
committersillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
commit720d752748b793a2f5cf3cc14cb75ad86e8919c0 (patch)
tree29120103307cb17e7d6c283cc198ec2484f934cd /tools
First commit
Diffstat (limited to 'tools')
-rw-r--r--tools/README.md10
-rw-r--r--tools/__pycache__/translate_pages.cpython-312.pycbin0 -> 9270 bytes
-rw-r--r--tools/backup.sh13
-rw-r--r--tools/generate_sitemap.py70
-rw-r--r--tools/html_audit.py94
-rw-r--r--tools/link_check.py94
-rw-r--r--tools/translate_pages.py258
-rw-r--r--tools/uptime_check.sh6
8 files changed, 545 insertions, 0 deletions
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..e7cd71f
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,10 @@
+# Tools
+
+Small maintenance helpers. No generator required.
+
+- `python tools/link_check.py` — check internal href/src targets exist
+- `python tools/html_audit.py` — quick a11y/markup audit (ids, alt, iframe titles, rel=noopener)
+- `python tools/generate_sitemap.py` — rebuild `sitemap.xml` with lastmod dates
+- `python tools/translate_pages.py` — generate `*_zh.html` and `*_jp.html` (except `startpage/`)
+- `./tools/backup.sh` — create a tar.gz backup (set `BACKUP_DIR` to override destination)
+- `./tools/uptime_check.sh` — curl-based uptime check (set `URL` to override target)
diff --git a/tools/__pycache__/translate_pages.cpython-312.pyc b/tools/__pycache__/translate_pages.cpython-312.pyc
new file mode 100644
index 0000000..dd9d69e
--- /dev/null
+++ b/tools/__pycache__/translate_pages.cpython-312.pyc
Binary files differ
diff --git a/tools/backup.sh b/tools/backup.sh
new file mode 100644
index 0000000..36790bf
--- /dev/null
+++ b/tools/backup.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+BACKUP_DIR="${BACKUP_DIR:-/tmp/www.sillylaird.ca-backups}"
+
+mkdir -p "$BACKUP_DIR"
+TS=$(date -u +"%Y%m%d-%H%M%S")
+ARCHIVE="$BACKUP_DIR/www.sillylaird.ca-$TS.tar.gz"
+
+tar -czf "$ARCHIVE" -C "$ROOT_DIR" .
+
+echo "Backup written to $ARCHIVE"
diff --git a/tools/generate_sitemap.py b/tools/generate_sitemap.py
new file mode 100644
index 0000000..4d34cd7
--- /dev/null
+++ b/tools/generate_sitemap.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""Generate sitemap.xml from local HTML files."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from datetime import datetime, timezone
+import re
+
+ROOT = Path(__file__).resolve().parents[1]
+SITE = "https://www.sillylaird.ca"
+
+EXCLUDE_FILES = {
+ "404.html",
+ "50x.html",
+}
+EXCLUDE_DIRS = {"partials", ".git"}
+
+
+def should_skip(path: Path) -> bool:
+ if any(part in EXCLUDE_DIRS for part in path.parts):
+ return True
+ if path.name.endswith("~"):
+ return True
+ if path.name in EXCLUDE_FILES:
+ return True
+ return False
+
+
+def url_for_path(path: Path) -> str:
+ rel = path.relative_to(ROOT).as_posix()
+ if rel == "index.html":
+ return SITE + "/"
+ if rel.endswith("/index.html"):
+ return SITE + "/" + rel[:-len("index.html")]
+ return SITE + "/" + rel
+
+
+def lastmod_for_path(path: Path) -> str:
+ ts = path.stat().st_mtime
+ dt = datetime.fromtimestamp(ts, tz=timezone.utc)
+ return dt.strftime("%Y-%m-%d")
+
+
+def main() -> int:
+ urls = []
+ for html in ROOT.rglob("*.html"):
+ if should_skip(html):
+ continue
+ urls.append((url_for_path(html), lastmod_for_path(html)))
+
+ urls.sort(key=lambda item: item[0])
+
+ lines = [
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
+ ]
+ for loc, lastmod in urls:
+ lines.append(" <url>")
+ lines.append(f" <loc>{loc}</loc>")
+ lines.append(f" <lastmod>{lastmod}</lastmod>")
+ lines.append(" </url>")
+ lines.append("</urlset>")
+
+ (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tools/html_audit.py b/tools/html_audit.py
new file mode 100644
index 0000000..4e85c36
--- /dev/null
+++ b/tools/html_audit.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""Lightweight HTML audit for common a11y/markup issues."""
+
+from __future__ import annotations
+
+from html.parser import HTMLParser
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+
+SKIP_DIRS = {"partials", ".git"}
+SKIP_FILES = {
+ "test.html",
+ "test_jp.html",
+ "test_zh.html",
+ "startpage/test.html",
+}
+
+
+class AuditParser(HTMLParser):
+ def __init__(self) -> None:
+ super().__init__()
+ self.ids: dict[str, int] = {}
+ self.duplicate_ids: set[str] = set()
+ self.missing_alt: list[str] = []
+ self.missing_iframe_title: list[str] = []
+ self.blank_rel: list[str] = []
+
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+ attr_map = {k.lower(): (v or "") for k, v in attrs}
+
+ if "id" in attr_map:
+ ident = attr_map["id"]
+ if ident:
+ if ident in self.ids:
+ self.duplicate_ids.add(ident)
+ self.ids[ident] = self.ids.get(ident, 0) + 1
+
+ if tag == "img":
+ if "alt" not in attr_map:
+ src = attr_map.get("src", "")
+ self.missing_alt.append(src)
+
+ if tag == "iframe":
+ if not attr_map.get("title", ""):
+ src = attr_map.get("src", "")
+ self.missing_iframe_title.append(src)
+
+ if tag == "a":
+ if attr_map.get("target", "") == "_blank":
+ rel = attr_map.get("rel", "")
+ if "noopener" not in rel:
+ href = attr_map.get("href", "")
+ self.blank_rel.append(href)
+
+
+def main() -> int:
+ issues = []
+
+ for html in ROOT.rglob("*.html"):
+ if any(part in SKIP_DIRS for part in html.parts):
+ continue
+ rel = html.relative_to(ROOT).as_posix()
+ if rel in SKIP_FILES:
+ continue
+
+ parser = AuditParser()
+ parser.feed(html.read_text(encoding="utf-8", errors="ignore"))
+
+ if parser.duplicate_ids:
+ issues.append((rel, "duplicate-ids", sorted(parser.duplicate_ids)))
+ if parser.missing_alt:
+ issues.append((rel, "img-missing-alt", parser.missing_alt))
+ if parser.missing_iframe_title:
+ issues.append((rel, "iframe-missing-title", parser.missing_iframe_title))
+ if parser.blank_rel:
+ issues.append((rel, "target-blank-missing-noopener", parser.blank_rel))
+
+ if not issues:
+ print("OK: no audit issues found")
+ return 0
+
+ print("HTML audit issues:")
+ for rel, kind, items in issues:
+ print(f"- {rel}: {kind}")
+ for item in items[:10]:
+ print(f" - {item}")
+ if len(items) > 10:
+ print(f" - ... ({len(items) - 10} more)")
+ return 1
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tools/link_check.py b/tools/link_check.py
new file mode 100644
index 0000000..246eaf8
--- /dev/null
+++ b/tools/link_check.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""Very small internal link checker.
+
+Checks:
+- href="/path" and src="/path" for local files
+- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc.
+
+Usage:
+ python tools/link_check.py
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+SKIP_DIRS = {".git"}
+SKIP_FILES = {
+ "test.html",
+ "test_jp.html",
+ "test_zh.html",
+ "startpage/test.html",
+}
+
+RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I)
+
+
+def is_external(u: str) -> bool:
+ u = u.strip()
+ return (
+ u.startswith("http://")
+ or u.startswith("https://")
+ or u.startswith("mailto:")
+ or u.startswith("xmpp:")
+ or u.startswith("signal:")
+ or u.startswith("data:")
+ or u.startswith("javascript:")
+ or u.startswith("#")
+ or u.startswith("//")
+ or u.endswith(".onion/")
+ or ".onion" in u
+ )
+
+
+def normalize(p: Path, url: str) -> Path | None:
+ url = url.split("#", 1)[0].split("?", 1)[0].strip()
+ if not url:
+ return None
+ if is_external(url):
+ return None
+
+ if url.startswith("/"):
+ return (ROOT / url.lstrip("/")).resolve()
+
+ # relative
+ return (p.parent / url).resolve()
+
+
+def main() -> int:
+ missing = []
+ for html in ROOT.rglob("*.html"):
+ if any(part in SKIP_DIRS for part in html.parts):
+ continue
+ rel = html.relative_to(ROOT).as_posix()
+ if rel in SKIP_FILES:
+ continue
+ text = html.read_text(encoding="utf-8", errors="ignore")
+ for m in RE_URL.finditer(text):
+ url = m.group(2)
+ target = normalize(html, url)
+ if not target:
+ continue
+ # if it points to a directory, allow index.html
+ if target.is_dir():
+ if (target / "index.html").exists():
+ continue
+ if not target.exists():
+ missing.append((str(html.relative_to(ROOT)), url))
+
+ if missing:
+ print("Missing local links:")
+ for src, url in missing:
+ print(f"- {src}: {url}")
+ return 1
+
+ print("OK: no missing local href/src found")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tools/translate_pages.py b/tools/translate_pages.py
new file mode 100644
index 0000000..3127d66
--- /dev/null
+++ b/tools/translate_pages.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""Generate zh/jp copies of all HTML pages (except startpage/).
+
+This is a best-effort, offline translation helper.
+
+- It copies each *.html to *_zh.html and *_jp.html (same directory).
+- It preserves all HTML structure, links, ids, classes.
+- It translates only user-visible text nodes and some common attributes.
+- It skips anything under "startpage/".
+
+Notes:
+- This is not a static site generator. It only writes additional files.
+- Translation quality depends on the dictionaries below.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+SKIP_DIRS = {
+ "startpage",
+ "mstartpage",
+ "partials",
+}
+
+
+# Tags whose text content should not be translated.
+SKIP_TAGS = {
+ "script",
+ "style",
+ "code",
+ "pre",
+ "kbd",
+ "samp",
+}
+
+
+# Very small phrase dictionaries (hand-tuned for this repo).
+# For anything not in the dictionary, we leave the text as-is.
+ZH = {
+ "Skip to content": "跳至内容",
+ "Menu": "菜单",
+ "Language": "语言",
+ "Home": "首页",
+ "StartPage": "StartPage",
+ "Blog": "博客",
+ "Guestbook": "留言板",
+ "Journal": "日志",
+ "Diary": "日记",
+ "Gaming": "游戏",
+ "Bookmarks": "书签",
+ "Accounts": "账户",
+ "Computers": "电脑设备",
+ "Contact": "联系",
+ "Welcome": "欢迎",
+ "My Current Vibe": "当前氛围",
+ "Music": "音乐",
+ "Current Blog": "当前博客",
+ "Changelog": "更新日志",
+ "Friends": "朋友们",
+ "Games": "游戏",
+ "Countries": "国家",
+ "Sponsors / VPNs / Buttons": "赞助商 / VPN / 按钮",
+ "Open guestbook": "打开留言板",
+ "Loading…": "加载中…",
+ "Loading...": "加载中…",
+ "Licensed under": "采用",
+ "site": "网站",
+ "Error": "错误",
+}
+
+
+JA = {
+ "Skip to content": "本文へ移動",
+ "Menu": "メニュー",
+ "Language": "言語",
+ "Home": "ホーム",
+ "StartPage": "StartPage",
+ "Blog": "ブログ",
+ "Guestbook": "ゲストブック",
+ "Journal": "ジャーナル",
+ "Diary": "日記",
+ "Gaming": "ゲーム",
+ "Bookmarks": "ブックマーク",
+ "Accounts": "アカウント",
+ "Computers": "コンピューター",
+ "Contact": "連絡先",
+ "Welcome": "ようこそ",
+ "My Current Vibe": "今の雰囲気",
+ "Music": "音楽",
+ "Current Blog": "現在のブログ",
+ "Changelog": "更新履歴",
+ "Friends": "友達",
+ "Games": "ゲーム",
+ "Countries": "国",
+ "Sponsors / VPNs / Buttons": "スポンサー / VPN / ボタン",
+ "Open guestbook": "ゲストブックを開く",
+ "Loading…": "読み込み中…",
+ "Loading...": "読み込み中…",
+ "Licensed under": "ライセンス:",
+ "Error": "エラー",
+}
+
+
+ATTR_TRANSLATE = {
+ "title",
+ "aria-label",
+ "aria-labelledby", # generally ids; don't translate
+ "alt",
+ "placeholder",
+}
+
+
+RE_TAG = re.compile(r"(<[^>]+>)")
+RE_TEXT_NODE = re.compile(r"^(\s*)(.*?)(\s*)$", re.S)
+RE_ATTR = re.compile(r'(\s)([a-zA-Z_:.-]+)=("[^"]*"|\'[\s\S]*?\')')
+
+
+def should_skip_path(p: Path) -> bool:
+ rel = p.relative_to(ROOT)
+ parts = set(rel.parts)
+ return any(d in parts for d in SKIP_DIRS)
+
+
+def translate_phrase(s: str, mapping: dict[str, str]) -> str:
+ # Exact match first
+ if s in mapping:
+ return mapping[s]
+
+ # Replace common UI tokens inside longer strings (simple, conservative)
+ out = s
+ for k, v in mapping.items():
+ if k and k in out:
+ out = out.replace(k, v)
+ return out
+
+
+def translate_text_node(text: str, mapping: dict[str, str]) -> str:
+ m = RE_TEXT_NODE.match(text)
+ if not m:
+ return text
+ lead, core, tail = m.group(1), m.group(2), m.group(3)
+
+ # Skip empty or purely whitespace
+ if not core.strip():
+ return text
+
+ # Skip if it's just punctuation/symbols
+ if not re.search(r"[A-Za-z]", core):
+ return text
+
+ translated = translate_phrase(core, mapping)
+ return f"{lead}{translated}{tail}"
+
+
+def tag_name(tag: str) -> str | None:
+ # tag is like <div ...> or </div>
+ t = tag.strip()[1:-1].strip()
+ if not t:
+ return None
+ if t.startswith("!") or t.startswith("?"):
+ return None
+ if t.startswith("/"):
+ t = t[1:].lstrip()
+ name = re.split(r"\s+", t, maxsplit=1)[0].lower()
+ return name
+
+
+def translate_attrs(tag: str, mapping: dict[str, str]) -> str:
+ # Don't touch aria-labelledby since it's usually an id.
+ def repl(m: re.Match[str]) -> str:
+ space, key, val = m.group(1), m.group(2), m.group(3)
+ k = key.lower()
+ if k not in ATTR_TRANSLATE or k == "aria-labelledby":
+ return m.group(0)
+ quote = val[0]
+ inner = val[1:-1]
+ new_inner = translate_phrase(inner, mapping)
+ if new_inner == inner:
+ return m.group(0)
+ return f"{space}{key}={quote}{new_inner}{quote}"
+
+ return RE_ATTR.sub(repl, tag)
+
+
+def translate_html(src: str, mapping: dict[str, str]) -> str:
+ parts = RE_TAG.split(src)
+ out: list[str] = []
+
+ skip_depth = 0
+ for part in parts:
+ if part.startswith("<") and part.endswith(">"):
+ name = tag_name(part)
+
+ # track skip tags nesting
+ if name in SKIP_TAGS:
+ if part.lstrip().startswith("</"):
+ if skip_depth > 0:
+ skip_depth -= 1
+ else:
+ skip_depth += 1
+
+ out.append(translate_attrs(part, mapping))
+ else:
+ if skip_depth > 0:
+ out.append(part)
+ else:
+ out.append(translate_text_node(part, mapping))
+
+ return "".join(out)
+
+
+def write_if_changed(path: Path, content: str) -> None:
+ old = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else None
+ if old == content:
+ return
+ path.write_text(content, encoding="utf-8")
+
+
+def main() -> int:
+ html_files = sorted(ROOT.rglob("*.html"))
+ for p in html_files:
+ if should_skip_path(p):
+ continue
+
+ # Skip already translated files
+ if p.name.endswith("_zh.html") or p.name.endswith("_jp.html"):
+ continue
+
+ # Only translate pages that look like they are part of the unified site
+ # (Keep legacy old HTML alone unless user explicitly wants all)
+ src = p.read_text(encoding="utf-8", errors="ignore")
+
+ # Output names
+ zh_path = p.with_name(p.stem + "_zh.html")
+ jp_path = p.with_name(p.stem + "_jp.html")
+
+ zh = translate_html(src, ZH)
+ jp = translate_html(src, JA)
+
+ # Set lang attribute if present
+ zh = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="zh"', zh, count=1)
+ jp = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="ja"', jp, count=1)
+
+ write_if_changed(zh_path, zh)
+ write_if_changed(jp_path, jp)
+
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tools/uptime_check.sh b/tools/uptime_check.sh
new file mode 100644
index 0000000..0bbc6e8
--- /dev/null
+++ b/tools/uptime_check.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+URL="${URL:-https://www.sillylaird.ca/}"
+
+curl -fsS -o /dev/null -w "HTTP %{http_code} in %{time_total}s\n" "$URL"