#!/usr/bin/env python3 """Generate zh/jp copies of all HTML pages (except startpage/). This is a best-effort, offline translation helper. - It copies each *.html to *_zh.html and *_jp.html (same directory). - It preserves all HTML structure, links, ids, classes. - It translates only user-visible text nodes and some common attributes. - It skips anything under "startpage/". Notes: - This is not a static site generator. It only writes additional files. - Translation quality depends on the dictionaries below. """ from __future__ import annotations import os import re from pathlib import Path ROOT = Path(__file__).resolve().parents[1] SKIP_DIRS = { "startpage", "mstartpage", "partials", } # Tags whose text content should not be translated. SKIP_TAGS = { "script", "style", "code", "pre", "kbd", "samp", } # Very small phrase dictionaries (hand-tuned for this repo). # For anything not in the dictionary, we leave the text as-is. ZH = { "Skip to content": "跳至内容", "Menu": "菜单", "Language": "语言", "Home": "首页", "StartPage": "StartPage", "Blog": "博客", "Guestbook": "留言板", "Journal": "日志", "Diary": "日记", "Gaming": "游戏", "Bookmarks": "书签", "Accounts": "账户", "Computers": "电脑设备", "Contact": "联系", "Welcome": "欢迎", "My Current Vibe": "当前氛围", "Music": "音乐", "Current Blog": "当前博客", "Changelog": "更新日志", "Friends": "朋友们", "Games": "游戏", "Countries": "国家", "Sponsors / VPNs / Buttons": "赞助商 / VPN / 按钮", "Open guestbook": "打开留言板", "Loading…": "加载中…", "Loading...": "加载中…", "Licensed under": "采用", "site": "网站", "Error": "错误", } JA = { "Skip to content": "本文へ移動", "Menu": "メニュー", "Language": "言語", "Home": "ホーム", "StartPage": "StartPage", "Blog": "ブログ", "Guestbook": "ゲストブック", "Journal": "ジャーナル", "Diary": "日記", "Gaming": "ゲーム", "Bookmarks": "ブックマーク", "Accounts": "アカウント", "Computers": "コンピューター", "Contact": "連絡先", "Welcome": "ようこそ", "My Current Vibe": "今の雰囲気", "Music": "音楽", "Current Blog": "現在のブログ", "Changelog": "更新履歴", "Friends": "友達", "Games": "ゲーム", "Countries": "国", "Sponsors / VPNs / Buttons": "スポンサー / VPN / ボタン", "Open guestbook": "ゲストブックを開く", "Loading…": "読み込み中…", "Loading...": "読み込み中…", "Licensed under": "ライセンス:", "Error": "エラー", } ATTR_TRANSLATE = { "title", "aria-label", "aria-labelledby", # generally ids; don't translate "alt", "placeholder", } RE_TAG = re.compile(r"(<[^>]+>)") RE_TEXT_NODE = re.compile(r"^(\s*)(.*?)(\s*)$", re.S) RE_ATTR = re.compile(r'(\s)([a-zA-Z_:.-]+)=("[^"]*"|\'[\s\S]*?\')') def should_skip_path(p: Path) -> bool: rel = p.relative_to(ROOT) parts = set(rel.parts) return any(d in parts for d in SKIP_DIRS) def translate_phrase(s: str, mapping: dict[str, str]) -> str: # Exact match first if s in mapping: return mapping[s] # Replace common UI tokens inside longer strings (simple, conservative) out = s for k, v in mapping.items(): if k and k in out: out = out.replace(k, v) return out def translate_text_node(text: str, mapping: dict[str, str]) -> str: m = RE_TEXT_NODE.match(text) if not m: return text lead, core, tail = m.group(1), m.group(2), m.group(3) # Skip empty or purely whitespace if not core.strip(): return text # Skip if it's just punctuation/symbols if not re.search(r"[A-Za-z]", core): return text translated = translate_phrase(core, mapping) return f"{lead}{translated}{tail}" def tag_name(tag: str) -> str | None: # tag is like
or
t = tag.strip()[1:-1].strip() if not t: return None if t.startswith("!") or t.startswith("?"): return None if t.startswith("/"): t = t[1:].lstrip() name = re.split(r"\s+", t, maxsplit=1)[0].lower() return name def translate_attrs(tag: str, mapping: dict[str, str]) -> str: # Don't touch aria-labelledby since it's usually an id. def repl(m: re.Match[str]) -> str: space, key, val = m.group(1), m.group(2), m.group(3) k = key.lower() if k not in ATTR_TRANSLATE or k == "aria-labelledby": return m.group(0) quote = val[0] inner = val[1:-1] new_inner = translate_phrase(inner, mapping) if new_inner == inner: return m.group(0) return f"{space}{key}={quote}{new_inner}{quote}" return RE_ATTR.sub(repl, tag) def translate_html(src: str, mapping: dict[str, str]) -> str: parts = RE_TAG.split(src) out: list[str] = [] skip_depth = 0 for part in parts: if part.startswith("<") and part.endswith(">"): name = tag_name(part) # track skip tags nesting if name in SKIP_TAGS: if part.lstrip().startswith(" 0: skip_depth -= 1 else: skip_depth += 1 out.append(translate_attrs(part, mapping)) else: if skip_depth > 0: out.append(part) else: out.append(translate_text_node(part, mapping)) return "".join(out) def write_if_changed(path: Path, content: str) -> None: old = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else None if old == content: return path.write_text(content, encoding="utf-8") def main() -> int: html_files = sorted(ROOT.rglob("*.html")) for p in html_files: if should_skip_path(p): continue # Skip already translated files if p.name.endswith("_zh.html") or p.name.endswith("_jp.html"): continue # Only translate pages that look like they are part of the unified site # (Keep legacy old HTML alone unless user explicitly wants all) src = p.read_text(encoding="utf-8", errors="ignore") # Output names zh_path = p.with_name(p.stem + "_zh.html") jp_path = p.with_name(p.stem + "_jp.html") zh = translate_html(src, ZH) jp = translate_html(src, JA) # Set lang attribute if present zh = re.sub(r"