#!/usr/bin/env python3 """Very small internal link checker. Checks: - href="/path" and src="/path" for local files - Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc. Usage: python tools/link_check.py """ from __future__ import annotations import re from pathlib import Path ROOT = Path(__file__).resolve().parents[1] SKIP_DIRS = {".git"} SKIP_FILES = { "test.html", "test_jp.html", "test_zh.html", "startpage/test.html", } RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I) def is_external(u: str) -> bool: u = u.strip() return ( u.startswith("http://") or u.startswith("https://") or u.startswith("mailto:") or u.startswith("xmpp:") or u.startswith("signal:") or u.startswith("data:") or u.startswith("javascript:") or u.startswith("#") or u.startswith("//") or u.endswith(".onion/") or ".onion" in u ) def normalize(p: Path, url: str) -> Path | None: url = url.split("#", 1)[0].split("?", 1)[0].strip() if not url: return None if is_external(url): return None if url.startswith("/"): return (ROOT / url.lstrip("/")).resolve() # relative return (p.parent / url).resolve() def main() -> int: missing = [] for html in ROOT.rglob("*.html"): if any(part in SKIP_DIRS for part in html.parts): continue rel = html.relative_to(ROOT).as_posix() if rel in SKIP_FILES: continue text = html.read_text(encoding="utf-8", errors="ignore") for m in RE_URL.finditer(text): url = m.group(2) target = normalize(html, url) if not target: continue # if it points to a directory, allow index.html if target.is_dir(): if (target / "index.html").exists(): continue if not target.exists(): missing.append((str(html.relative_to(ROOT)), url)) if missing: print("Missing local links:") for src, url in missing: print(f"- {src}: {url}") return 1 print("OK: no missing local href/src found") return 0 if __name__ == "__main__": raise SystemExit(main())