From 720d752748b793a2f5cf3cc14cb75ad86e8919c0 Mon Sep 17 00:00:00 2001 From: sillylaird Date: Tue, 3 Feb 2026 21:27:57 -0500 Subject: First commit --- tools/README.md | 10 + tools/__pycache__/translate_pages.cpython-312.pyc | Bin 0 -> 9270 bytes tools/backup.sh | 13 ++ tools/generate_sitemap.py | 70 ++++++ tools/html_audit.py | 94 ++++++++ tools/link_check.py | 94 ++++++++ tools/translate_pages.py | 258 ++++++++++++++++++++++ tools/uptime_check.sh | 6 + 8 files changed, 545 insertions(+) create mode 100644 tools/README.md create mode 100644 tools/__pycache__/translate_pages.cpython-312.pyc create mode 100644 tools/backup.sh create mode 100644 tools/generate_sitemap.py create mode 100644 tools/html_audit.py create mode 100644 tools/link_check.py create mode 100644 tools/translate_pages.py create mode 100644 tools/uptime_check.sh (limited to 'tools') diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..e7cd71f --- /dev/null +++ b/tools/README.md @@ -0,0 +1,10 @@ +# Tools + +Small maintenance helpers. No generator required. + +- `python tools/link_check.py` — check internal href/src targets exist +- `python tools/html_audit.py` — quick a11y/markup audit (ids, alt, iframe titles, rel=noopener) +- `python tools/generate_sitemap.py` — rebuild `sitemap.xml` with lastmod dates +- `python tools/translate_pages.py` — generate `*_zh.html` and `*_jp.html` (except `startpage/`) +- `./tools/backup.sh` — create a tar.gz backup (set `BACKUP_DIR` to override destination) +- `./tools/uptime_check.sh` — curl-based uptime check (set `URL` to override target) diff --git a/tools/__pycache__/translate_pages.cpython-312.pyc b/tools/__pycache__/translate_pages.cpython-312.pyc new file mode 100644 index 0000000..dd9d69e Binary files /dev/null and b/tools/__pycache__/translate_pages.cpython-312.pyc differ diff --git a/tools/backup.sh b/tools/backup.sh new file mode 100644 index 0000000..36790bf --- /dev/null +++ b/tools/backup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BACKUP_DIR="${BACKUP_DIR:-/tmp/www.sillylaird.ca-backups}" + +mkdir -p "$BACKUP_DIR" +TS=$(date -u +"%Y%m%d-%H%M%S") +ARCHIVE="$BACKUP_DIR/www.sillylaird.ca-$TS.tar.gz" + +tar -czf "$ARCHIVE" -C "$ROOT_DIR" . + +echo "Backup written to $ARCHIVE" diff --git a/tools/generate_sitemap.py b/tools/generate_sitemap.py new file mode 100644 index 0000000..4d34cd7 --- /dev/null +++ b/tools/generate_sitemap.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Generate sitemap.xml from local HTML files.""" + +from __future__ import annotations + +from pathlib import Path +from datetime import datetime, timezone +import re + +ROOT = Path(__file__).resolve().parents[1] +SITE = "https://www.sillylaird.ca" + +EXCLUDE_FILES = { + "404.html", + "50x.html", +} +EXCLUDE_DIRS = {"partials", ".git"} + + +def should_skip(path: Path) -> bool: + if any(part in EXCLUDE_DIRS for part in path.parts): + return True + if path.name.endswith("~"): + return True + if path.name in EXCLUDE_FILES: + return True + return False + + +def url_for_path(path: Path) -> str: + rel = path.relative_to(ROOT).as_posix() + if rel == "index.html": + return SITE + "/" + if rel.endswith("/index.html"): + return SITE + "/" + rel[:-len("index.html")] + return SITE + "/" + rel + + +def lastmod_for_path(path: Path) -> str: + ts = path.stat().st_mtime + dt = datetime.fromtimestamp(ts, tz=timezone.utc) + return dt.strftime("%Y-%m-%d") + + +def main() -> int: + urls = [] + for html in ROOT.rglob("*.html"): + if should_skip(html): + continue + urls.append((url_for_path(html), lastmod_for_path(html))) + + urls.sort(key=lambda item: item[0]) + + lines = [ + "", + '', + ] + for loc, lastmod in urls: + lines.append(" ") + lines.append(f" {loc}") + lines.append(f" {lastmod}") + lines.append(" ") + lines.append("") + + (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/html_audit.py b/tools/html_audit.py new file mode 100644 index 0000000..4e85c36 --- /dev/null +++ b/tools/html_audit.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Lightweight HTML audit for common a11y/markup issues.""" + +from __future__ import annotations + +from html.parser import HTMLParser +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] + +SKIP_DIRS = {"partials", ".git"} +SKIP_FILES = { + "test.html", + "test_jp.html", + "test_zh.html", + "startpage/test.html", +} + + +class AuditParser(HTMLParser): + def __init__(self) -> None: + super().__init__() + self.ids: dict[str, int] = {} + self.duplicate_ids: set[str] = set() + self.missing_alt: list[str] = [] + self.missing_iframe_title: list[str] = [] + self.blank_rel: list[str] = [] + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + attr_map = {k.lower(): (v or "") for k, v in attrs} + + if "id" in attr_map: + ident = attr_map["id"] + if ident: + if ident in self.ids: + self.duplicate_ids.add(ident) + self.ids[ident] = self.ids.get(ident, 0) + 1 + + if tag == "img": + if "alt" not in attr_map: + src = attr_map.get("src", "") + self.missing_alt.append(src) + + if tag == "iframe": + if not attr_map.get("title", ""): + src = attr_map.get("src", "") + self.missing_iframe_title.append(src) + + if tag == "a": + if attr_map.get("target", "") == "_blank": + rel = attr_map.get("rel", "") + if "noopener" not in rel: + href = attr_map.get("href", "") + self.blank_rel.append(href) + + +def main() -> int: + issues = [] + + for html in ROOT.rglob("*.html"): + if any(part in SKIP_DIRS for part in html.parts): + continue + rel = html.relative_to(ROOT).as_posix() + if rel in SKIP_FILES: + continue + + parser = AuditParser() + parser.feed(html.read_text(encoding="utf-8", errors="ignore")) + + if parser.duplicate_ids: + issues.append((rel, "duplicate-ids", sorted(parser.duplicate_ids))) + if parser.missing_alt: + issues.append((rel, "img-missing-alt", parser.missing_alt)) + if parser.missing_iframe_title: + issues.append((rel, "iframe-missing-title", parser.missing_iframe_title)) + if parser.blank_rel: + issues.append((rel, "target-blank-missing-noopener", parser.blank_rel)) + + if not issues: + print("OK: no audit issues found") + return 0 + + print("HTML audit issues:") + for rel, kind, items in issues: + print(f"- {rel}: {kind}") + for item in items[:10]: + print(f" - {item}") + if len(items) > 10: + print(f" - ... ({len(items) - 10} more)") + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/link_check.py b/tools/link_check.py new file mode 100644 index 0000000..246eaf8 --- /dev/null +++ b/tools/link_check.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Very small internal link checker. + +Checks: +- href="/path" and src="/path" for local files +- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc. + +Usage: + python tools/link_check.py +""" + +from __future__ import annotations + +import re +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + +SKIP_DIRS = {".git"} +SKIP_FILES = { + "test.html", + "test_jp.html", + "test_zh.html", + "startpage/test.html", +} + +RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I) + + +def is_external(u: str) -> bool: + u = u.strip() + return ( + u.startswith("http://") + or u.startswith("https://") + or u.startswith("mailto:") + or u.startswith("xmpp:") + or u.startswith("signal:") + or u.startswith("data:") + or u.startswith("javascript:") + or u.startswith("#") + or u.startswith("//") + or u.endswith(".onion/") + or ".onion" in u + ) + + +def normalize(p: Path, url: str) -> Path | None: + url = url.split("#", 1)[0].split("?", 1)[0].strip() + if not url: + return None + if is_external(url): + return None + + if url.startswith("/"): + return (ROOT / url.lstrip("/")).resolve() + + # relative + return (p.parent / url).resolve() + + +def main() -> int: + missing = [] + for html in ROOT.rglob("*.html"): + if any(part in SKIP_DIRS for part in html.parts): + continue + rel = html.relative_to(ROOT).as_posix() + if rel in SKIP_FILES: + continue + text = html.read_text(encoding="utf-8", errors="ignore") + for m in RE_URL.finditer(text): + url = m.group(2) + target = normalize(html, url) + if not target: + continue + # if it points to a directory, allow index.html + if target.is_dir(): + if (target / "index.html").exists(): + continue + if not target.exists(): + missing.append((str(html.relative_to(ROOT)), url)) + + if missing: + print("Missing local links:") + for src, url in missing: + print(f"- {src}: {url}") + return 1 + + print("OK: no missing local href/src found") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/translate_pages.py b/tools/translate_pages.py new file mode 100644 index 0000000..3127d66 --- /dev/null +++ b/tools/translate_pages.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +"""Generate zh/jp copies of all HTML pages (except startpage/). + +This is a best-effort, offline translation helper. + +- It copies each *.html to *_zh.html and *_jp.html (same directory). +- It preserves all HTML structure, links, ids, classes. +- It translates only user-visible text nodes and some common attributes. +- It skips anything under "startpage/". + +Notes: +- This is not a static site generator. It only writes additional files. +- Translation quality depends on the dictionaries below. +""" + +from __future__ import annotations + +import os +import re +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +SKIP_DIRS = { + "startpage", + "mstartpage", + "partials", +} + + +# Tags whose text content should not be translated. +SKIP_TAGS = { + "script", + "style", + "code", + "pre", + "kbd", + "samp", +} + + +# Very small phrase dictionaries (hand-tuned for this repo). +# For anything not in the dictionary, we leave the text as-is. +ZH = { + "Skip to content": "跳至内容", + "Menu": "菜单", + "Language": "语言", + "Home": "首页", + "StartPage": "StartPage", + "Blog": "博客", + "Guestbook": "留言板", + "Journal": "日志", + "Diary": "日记", + "Gaming": "游戏", + "Bookmarks": "书签", + "Accounts": "账户", + "Computers": "电脑设备", + "Contact": "联系", + "Welcome": "欢迎", + "My Current Vibe": "当前氛围", + "Music": "音乐", + "Current Blog": "当前博客", + "Changelog": "更新日志", + "Friends": "朋友们", + "Games": "游戏", + "Countries": "国家", + "Sponsors / VPNs / Buttons": "赞助商 / VPN / 按钮", + "Open guestbook": "打开留言板", + "Loading…": "加载中…", + "Loading...": "加载中…", + "Licensed under": "采用", + "site": "网站", + "Error": "错误", +} + + +JA = { + "Skip to content": "本文へ移動", + "Menu": "メニュー", + "Language": "言語", + "Home": "ホーム", + "StartPage": "StartPage", + "Blog": "ブログ", + "Guestbook": "ゲストブック", + "Journal": "ジャーナル", + "Diary": "日記", + "Gaming": "ゲーム", + "Bookmarks": "ブックマーク", + "Accounts": "アカウント", + "Computers": "コンピューター", + "Contact": "連絡先", + "Welcome": "ようこそ", + "My Current Vibe": "今の雰囲気", + "Music": "音楽", + "Current Blog": "現在のブログ", + "Changelog": "更新履歴", + "Friends": "友達", + "Games": "ゲーム", + "Countries": "国", + "Sponsors / VPNs / Buttons": "スポンサー / VPN / ボタン", + "Open guestbook": "ゲストブックを開く", + "Loading…": "読み込み中…", + "Loading...": "読み込み中…", + "Licensed under": "ライセンス:", + "Error": "エラー", +} + + +ATTR_TRANSLATE = { + "title", + "aria-label", + "aria-labelledby", # generally ids; don't translate + "alt", + "placeholder", +} + + +RE_TAG = re.compile(r"(<[^>]+>)") +RE_TEXT_NODE = re.compile(r"^(\s*)(.*?)(\s*)$", re.S) +RE_ATTR = re.compile(r'(\s)([a-zA-Z_:.-]+)=("[^"]*"|\'[\s\S]*?\')') + + +def should_skip_path(p: Path) -> bool: + rel = p.relative_to(ROOT) + parts = set(rel.parts) + return any(d in parts for d in SKIP_DIRS) + + +def translate_phrase(s: str, mapping: dict[str, str]) -> str: + # Exact match first + if s in mapping: + return mapping[s] + + # Replace common UI tokens inside longer strings (simple, conservative) + out = s + for k, v in mapping.items(): + if k and k in out: + out = out.replace(k, v) + return out + + +def translate_text_node(text: str, mapping: dict[str, str]) -> str: + m = RE_TEXT_NODE.match(text) + if not m: + return text + lead, core, tail = m.group(1), m.group(2), m.group(3) + + # Skip empty or purely whitespace + if not core.strip(): + return text + + # Skip if it's just punctuation/symbols + if not re.search(r"[A-Za-z]", core): + return text + + translated = translate_phrase(core, mapping) + return f"{lead}{translated}{tail}" + + +def tag_name(tag: str) -> str | None: + # tag is like
or
+ t = tag.strip()[1:-1].strip() + if not t: + return None + if t.startswith("!") or t.startswith("?"): + return None + if t.startswith("/"): + t = t[1:].lstrip() + name = re.split(r"\s+", t, maxsplit=1)[0].lower() + return name + + +def translate_attrs(tag: str, mapping: dict[str, str]) -> str: + # Don't touch aria-labelledby since it's usually an id. + def repl(m: re.Match[str]) -> str: + space, key, val = m.group(1), m.group(2), m.group(3) + k = key.lower() + if k not in ATTR_TRANSLATE or k == "aria-labelledby": + return m.group(0) + quote = val[0] + inner = val[1:-1] + new_inner = translate_phrase(inner, mapping) + if new_inner == inner: + return m.group(0) + return f"{space}{key}={quote}{new_inner}{quote}" + + return RE_ATTR.sub(repl, tag) + + +def translate_html(src: str, mapping: dict[str, str]) -> str: + parts = RE_TAG.split(src) + out: list[str] = [] + + skip_depth = 0 + for part in parts: + if part.startswith("<") and part.endswith(">"): + name = tag_name(part) + + # track skip tags nesting + if name in SKIP_TAGS: + if part.lstrip().startswith(" 0: + skip_depth -= 1 + else: + skip_depth += 1 + + out.append(translate_attrs(part, mapping)) + else: + if skip_depth > 0: + out.append(part) + else: + out.append(translate_text_node(part, mapping)) + + return "".join(out) + + +def write_if_changed(path: Path, content: str) -> None: + old = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else None + if old == content: + return + path.write_text(content, encoding="utf-8") + + +def main() -> int: + html_files = sorted(ROOT.rglob("*.html")) + for p in html_files: + if should_skip_path(p): + continue + + # Skip already translated files + if p.name.endswith("_zh.html") or p.name.endswith("_jp.html"): + continue + + # Only translate pages that look like they are part of the unified site + # (Keep legacy old HTML alone unless user explicitly wants all) + src = p.read_text(encoding="utf-8", errors="ignore") + + # Output names + zh_path = p.with_name(p.stem + "_zh.html") + jp_path = p.with_name(p.stem + "_jp.html") + + zh = translate_html(src, ZH) + jp = translate_html(src, JA) + + # Set lang attribute if present + zh = re.sub(r"