1 files changed, 258 insertions, 0 deletions
diff --git a/tools/translate_pages.py b/tools/translate_pages.py
new file mode 100644
index 0000000..3127d66
--- /dev/null
+++ b/tools/translate_pages.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""Generate zh/jp copies of all HTML pages (except startpage/).
+
+This is a best-effort, offline translation helper.
+
+- It copies each *.html to *_zh.html and *_jp.html (same directory).
+- It preserves all HTML structure, links, ids, classes.
+- It translates only user-visible text nodes and some common attributes.
+- It skips anything under "startpage/".
+
+Notes:
+- This is not a static site generator. It only writes additional files.
+- Translation quality depends on the dictionaries below.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+SKIP_DIRS = {
+    "startpage",
+    "mstartpage",
+    "partials",
+}
+
+
+# Tags whose text content should not be translated.
+SKIP_TAGS = {
+    "script",
+    "style",
+    "code",
+    "pre",
+    "kbd",
+    "samp",
+}
+
+
+# Very small phrase dictionaries (hand-tuned for this repo).
+# For anything not in the dictionary, we leave the text as-is.
+ZH = {
+    "Skip to content": "跳至内容",
+    "Menu": "菜单",
+    "Language": "语言",
+    "Home": "首页",
+    "StartPage": "StartPage",
+    "Blog": "博客",
+    "Guestbook": "留言板",
+    "Journal": "日志",
+    "Diary": "日记",
+    "Gaming": "游戏",
+    "Bookmarks": "书签",
+    "Accounts": "账户",
+    "Computers": "电脑设备",
+    "Contact": "联系",
+    "Welcome": "欢迎",
+    "My Current Vibe": "当前氛围",
+    "Music": "音乐",
+    "Current Blog": "当前博客",
+    "Changelog": "更新日志",
+    "Friends": "朋友们",
+    "Games": "游戏",
+    "Countries": "国家",
+    "Sponsors / VPNs / Buttons": "赞助商 / VPN / 按钮",
+    "Open guestbook": "打开留言板",
+    "Loading…": "加载中…",
+    "Loading...": "加载中…",
+    "Licensed under": "采用",
+    "site": "网站",
+    "Error": "错误",
+}
+
+
+JA = {
+    "Skip to content": "本文へ移動",
+    "Menu": "メニュー",
+    "Language": "言語",
+    "Home": "ホーム",
+    "StartPage": "StartPage",
+    "Blog": "ブログ",
+    "Guestbook": "ゲストブック",
+    "Journal": "ジャーナル",
+    "Diary": "日記",
+    "Gaming": "ゲーム",
+    "Bookmarks": "ブックマーク",
+    "Accounts": "アカウント",
+    "Computers": "コンピューター",
+    "Contact": "連絡先",
+    "Welcome": "ようこそ",
+    "My Current Vibe": "今の雰囲気",
+    "Music": "音楽",
+    "Current Blog": "現在のブログ",
+    "Changelog": "更新履歴",
+    "Friends": "友達",
+    "Games": "ゲーム",
+    "Countries": "国",
+    "Sponsors / VPNs / Buttons": "スポンサー / VPN / ボタン",
+    "Open guestbook": "ゲストブックを開く",
+    "Loading…": "読み込み中…",
+    "Loading...": "読み込み中…",
+    "Licensed under": "ライセンス:",
+    "Error": "エラー",
+}
+
+
+ATTR_TRANSLATE = {
+    "title",
+    "aria-label",
+    "aria-labelledby",  # generally ids; don't translate
+    "alt",
+    "placeholder",
+}
+
+
+RE_TAG = re.compile(r"(<[^>]+>)")
+RE_TEXT_NODE = re.compile(r"^(\s*)(.*?)(\s*)$", re.S)
+RE_ATTR = re.compile(r'(\s)([a-zA-Z_:.-]+)=("[^"]*"|\'[\s\S]*?\')')
+
+
+def should_skip_path(p: Path) -> bool:
+    rel = p.relative_to(ROOT)
+    parts = set(rel.parts)
+    return any(d in parts for d in SKIP_DIRS)
+
+
+def translate_phrase(s: str, mapping: dict[str, str]) -> str:
+    # Exact match first
+    if s in mapping:
+        return mapping[s]
+
+    # Replace common UI tokens inside longer strings (simple, conservative)
+    out = s
+    for k, v in mapping.items():
+        if k and k in out:
+            out = out.replace(k, v)
+    return out
+
+
+def translate_text_node(text: str, mapping: dict[str, str]) -> str:
+    m = RE_TEXT_NODE.match(text)
+    if not m:
+        return text
+    lead, core, tail = m.group(1), m.group(2), m.group(3)
+
+    # Skip empty or purely whitespace
+    if not core.strip():
+        return text
+
+    # Skip if it's just punctuation/symbols
+    if not re.search(r"[A-Za-z]", core):
+        return text
+
+    translated = translate_phrase(core, mapping)
+    return f"{lead}{translated}{tail}"
+
+
+def tag_name(tag: str) -> str | None:
+    # tag is like <div ...> or </div>
+    t = tag.strip()[1:-1].strip()
+    if not t:
+        return None
+    if t.startswith("!") or t.startswith("?"):
+        return None
+    if t.startswith("/"):
+        t = t[1:].lstrip()
+    name = re.split(r"\s+", t, maxsplit=1)[0].lower()
+    return name
+
+
+def translate_attrs(tag: str, mapping: dict[str, str]) -> str:
+    # Don't touch aria-labelledby since it's usually an id.
+    def repl(m: re.Match[str]) -> str:
+        space, key, val = m.group(1), m.group(2), m.group(3)
+        k = key.lower()
+        if k not in ATTR_TRANSLATE or k == "aria-labelledby":
+            return m.group(0)
+        quote = val[0]
+        inner = val[1:-1]
+        new_inner = translate_phrase(inner, mapping)
+        if new_inner == inner:
+            return m.group(0)
+        return f"{space}{key}={quote}{new_inner}{quote}"
+
+    return RE_ATTR.sub(repl, tag)
+
+
+def translate_html(src: str, mapping: dict[str, str]) -> str:
+    parts = RE_TAG.split(src)
+    out: list[str] = []
+
+    skip_depth = 0
+    for part in parts:
+        if part.startswith("<") and part.endswith(">"):
+            name = tag_name(part)
+
+            # track skip tags nesting
+            if name in SKIP_TAGS:
+                if part.lstrip().startswith("</"):
+                    if skip_depth > 0:
+                        skip_depth -= 1
+                else:
+                    skip_depth += 1
+
+            out.append(translate_attrs(part, mapping))
+        else:
+            if skip_depth > 0:
+                out.append(part)
+            else:
+                out.append(translate_text_node(part, mapping))
+
+    return "".join(out)
+
+
+def write_if_changed(path: Path, content: str) -> None:
+    old = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else None
+    if old == content:
+        return
+    path.write_text(content, encoding="utf-8")
+
+
+def main() -> int:
+    html_files = sorted(ROOT.rglob("*.html"))
+    for p in html_files:
+        if should_skip_path(p):
+            continue
+
+        # Skip already translated files
+        if p.name.endswith("_zh.html") or p.name.endswith("_jp.html"):
+            continue
+
+        # Only translate pages that look like they are part of the unified site
+        # (Keep legacy old HTML alone unless user explicitly wants all)
+        src = p.read_text(encoding="utf-8", errors="ignore")
+
+        # Output names
+        zh_path = p.with_name(p.stem + "_zh.html")
+        jp_path = p.with_name(p.stem + "_jp.html")
+
+        zh = translate_html(src, ZH)
+        jp = translate_html(src, JA)
+
+        # Set lang attribute if present
+        zh = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="zh"', zh, count=1)
+        jp = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="ja"', jp, count=1)
+
+        write_if_changed(zh_path, zh)
+        write_if_changed(jp_path, jp)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())