#!/usr/bin/env python3
"""Very small internal link checker.

Checks:
- href="/path" and src="/path" for local files
- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc.

Usage:
  python tools/link_check.py
"""

from __future__ import annotations

import re
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]

SKIP_DIRS = {".git"}
SKIP_FILES = {
    "test.html",
    "test_jp.html",
    "test_zh.html",
    "startpage/test.html",
}

RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I)


def is_external(u: str) -> bool:
    u = u.strip()
    return (
        u.startswith("http://")
        or u.startswith("https://")
        or u.startswith("mailto:")
        or u.startswith("xmpp:")
        or u.startswith("signal:")
        or u.startswith("data:")
        or u.startswith("javascript:")
        or u.startswith("#")
        or u.startswith("//")
        or u.endswith(".onion/")
        or ".onion" in u
    )


def normalize(p: Path, url: str) -> Path | None:
    url = url.split("#", 1)[0].split("?", 1)[0].strip()
    if not url:
        return None
    if is_external(url):
        return None

    if url.startswith("/"):
        return (ROOT / url.lstrip("/")).resolve()

    # relative
    return (p.parent / url).resolve()


def main() -> int:
    missing = []
    for html in ROOT.rglob("*.html"):
        if any(part in SKIP_DIRS for part in html.parts):
            continue
        rel = html.relative_to(ROOT).as_posix()
        if rel in SKIP_FILES:
            continue
        text = html.read_text(encoding="utf-8", errors="ignore")
        for m in RE_URL.finditer(text):
            url = m.group(2)
            target = normalize(html, url)
            if not target:
                continue
            # if it points to a directory, allow index.html
            if target.is_dir():
                if (target / "index.html").exists():
                    continue
            if not target.exists():
                missing.append((str(html.relative_to(ROOT)), url))

    if missing:
        print("Missing local links:")
        for src, url in missing:
            print(f"- {src}: {url}")
        return 1

    print("OK: no missing local href/src found")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())