#!/usr/bin/env python3
"""Generate zh/jp copies of all HTML pages (except startpage/).

This is a best-effort, offline translation helper.

- It copies each *.html to *_zh.html and *_jp.html (same directory).
- It preserves all HTML structure, links, ids, classes.
- It translates only user-visible text nodes and some common attributes.
- It skips anything under "startpage/".

Notes:
- This is not a static site generator. It only writes additional files.
- Translation quality depends on the dictionaries below.
"""

from __future__ import annotations

import os
import re
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]


SKIP_DIRS = {
    "startpage",
    "mstartpage",
    "partials",
}


# Tags whose text content should not be translated.
SKIP_TAGS = {
    "script",
    "style",
    "code",
    "pre",
    "kbd",
    "samp",
}


# Very small phrase dictionaries (hand-tuned for this repo).
# For anything not in the dictionary, we leave the text as-is.
ZH = {
    "Skip to content": "跳至内容",
    "Menu": "菜单",
    "Language": "语言",
    "Home": "首页",
    "StartPage": "StartPage",
    "Blog": "博客",
    "Guestbook": "留言板",
    "Journal": "日志",
    "Diary": "日记",
    "Gaming": "游戏",
    "Bookmarks": "书签",
    "Accounts": "账户",
    "Computers": "电脑设备",
    "Contact": "联系",
    "Welcome": "欢迎",
    "My Current Vibe": "当前氛围",
    "Music": "音乐",
    "Current Blog": "当前博客",
    "Changelog": "更新日志",
    "Friends": "朋友们",
    "Games": "游戏",
    "Countries": "国家",
    "Sponsors / VPNs / Buttons": "赞助商 / VPN / 按钮",
    "Open guestbook": "打开留言板",
    "Loading…": "加载中…",
    "Loading...": "加载中…",
    "Licensed under": "采用",
    "site": "网站",
    "Error": "错误",
}


JA = {
    "Skip to content": "本文へ移動",
    "Menu": "メニュー",
    "Language": "言語",
    "Home": "ホーム",
    "StartPage": "StartPage",
    "Blog": "ブログ",
    "Guestbook": "ゲストブック",
    "Journal": "ジャーナル",
    "Diary": "日記",
    "Gaming": "ゲーム",
    "Bookmarks": "ブックマーク",
    "Accounts": "アカウント",
    "Computers": "コンピューター",
    "Contact": "連絡先",
    "Welcome": "ようこそ",
    "My Current Vibe": "今の雰囲気",
    "Music": "音楽",
    "Current Blog": "現在のブログ",
    "Changelog": "更新履歴",
    "Friends": "友達",
    "Games": "ゲーム",
    "Countries": "国",
    "Sponsors / VPNs / Buttons": "スポンサー / VPN / ボタン",
    "Open guestbook": "ゲストブックを開く",
    "Loading…": "読み込み中…",
    "Loading...": "読み込み中…",
    "Licensed under": "ライセンス:",
    "Error": "エラー",
}


ATTR_TRANSLATE = {
    "title",
    "aria-label",
    "aria-labelledby",  # generally ids; don't translate
    "alt",
    "placeholder",
}


RE_TAG = re.compile(r"(<[^>]+>)")
RE_TEXT_NODE = re.compile(r"^(\s*)(.*?)(\s*)$", re.S)
RE_ATTR = re.compile(r'(\s)([a-zA-Z_:.-]+)=("[^"]*"|\'[\s\S]*?\')')


def should_skip_path(p: Path) -> bool:
    rel = p.relative_to(ROOT)
    parts = set(rel.parts)
    return any(d in parts for d in SKIP_DIRS)


def translate_phrase(s: str, mapping: dict[str, str]) -> str:
    # Exact match first
    if s in mapping:
        return mapping[s]

    # Replace common UI tokens inside longer strings (simple, conservative)
    out = s
    for k, v in mapping.items():
        if k and k in out:
            out = out.replace(k, v)
    return out


def translate_text_node(text: str, mapping: dict[str, str]) -> str:
    m = RE_TEXT_NODE.match(text)
    if not m:
        return text
    lead, core, tail = m.group(1), m.group(2), m.group(3)

    # Skip empty or purely whitespace
    if not core.strip():
        return text

    # Skip if it's just punctuation/symbols
    if not re.search(r"[A-Za-z]", core):
        return text

    translated = translate_phrase(core, mapping)
    return f"{lead}{translated}{tail}"


def tag_name(tag: str) -> str | None:
    # tag is like <div ...> or </div>
    t = tag.strip()[1:-1].strip()
    if not t:
        return None
    if t.startswith("!") or t.startswith("?"):
        return None
    if t.startswith("/"):
        t = t[1:].lstrip()
    name = re.split(r"\s+", t, maxsplit=1)[0].lower()
    return name


def translate_attrs(tag: str, mapping: dict[str, str]) -> str:
    # Don't touch aria-labelledby since it's usually an id.
    def repl(m: re.Match[str]) -> str:
        space, key, val = m.group(1), m.group(2), m.group(3)
        k = key.lower()
        if k not in ATTR_TRANSLATE or k == "aria-labelledby":
            return m.group(0)
        quote = val[0]
        inner = val[1:-1]
        new_inner = translate_phrase(inner, mapping)
        if new_inner == inner:
            return m.group(0)
        return f"{space}{key}={quote}{new_inner}{quote}"

    return RE_ATTR.sub(repl, tag)


def translate_html(src: str, mapping: dict[str, str]) -> str:
    parts = RE_TAG.split(src)
    out: list[str] = []

    skip_depth = 0
    for part in parts:
        if part.startswith("<") and part.endswith(">"):
            name = tag_name(part)

            # track skip tags nesting
            if name in SKIP_TAGS:
                if part.lstrip().startswith("</"):
                    if skip_depth > 0:
                        skip_depth -= 1
                else:
                    skip_depth += 1

            out.append(translate_attrs(part, mapping))
        else:
            if skip_depth > 0:
                out.append(part)
            else:
                out.append(translate_text_node(part, mapping))

    return "".join(out)


def write_if_changed(path: Path, content: str) -> None:
    old = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else None
    if old == content:
        return
    path.write_text(content, encoding="utf-8")


def main() -> int:
    html_files = sorted(ROOT.rglob("*.html"))
    for p in html_files:
        if should_skip_path(p):
            continue

        # Skip already translated files
        if p.name.endswith("_zh.html") or p.name.endswith("_jp.html"):
            continue

        # Only translate pages that look like they are part of the unified site
        # (Keep legacy old HTML alone unless user explicitly wants all)
        src = p.read_text(encoding="utf-8", errors="ignore")

        # Output names
        zh_path = p.with_name(p.stem + "_zh.html")
        jp_path = p.with_name(p.stem + "_jp.html")

        zh = translate_html(src, ZH)
        jp = translate_html(src, JA)

        # Set lang attribute if present
        zh = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="zh"', zh, count=1)
        jp = re.sub(r"<html\s+lang=\"[^\"]*\"", '<html lang="ja"', jp, count=1)

        write_if_changed(zh_path, zh)
        write_if_changed(jp_path, jp)

    return 0


if __name__ == "__main__":
    raise SystemExit(main())