diff options
Diffstat (limited to 'tools/link_check.py')
| -rw-r--r-- | tools/link_check.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/tools/link_check.py b/tools/link_check.py new file mode 100644 index 0000000..246eaf8 --- /dev/null +++ b/tools/link_check.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Very small internal link checker. + +Checks: +- href="/path" and src="/path" for local files +- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc. + +Usage: + python tools/link_check.py +""" + +from __future__ import annotations + +import re +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + +SKIP_DIRS = {".git"} +SKIP_FILES = { + "test.html", + "test_jp.html", + "test_zh.html", + "startpage/test.html", +} + +RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I) + + +def is_external(u: str) -> bool: + u = u.strip() + return ( + u.startswith("http://") + or u.startswith("https://") + or u.startswith("mailto:") + or u.startswith("xmpp:") + or u.startswith("signal:") + or u.startswith("data:") + or u.startswith("javascript:") + or u.startswith("#") + or u.startswith("//") + or u.endswith(".onion/") + or ".onion" in u + ) + + +def normalize(p: Path, url: str) -> Path | None: + url = url.split("#", 1)[0].split("?", 1)[0].strip() + if not url: + return None + if is_external(url): + return None + + if url.startswith("/"): + return (ROOT / url.lstrip("/")).resolve() + + # relative + return (p.parent / url).resolve() + + +def main() -> int: + missing = [] + for html in ROOT.rglob("*.html"): + if any(part in SKIP_DIRS for part in html.parts): + continue + rel = html.relative_to(ROOT).as_posix() + if rel in SKIP_FILES: + continue + text = html.read_text(encoding="utf-8", errors="ignore") + for m in RE_URL.finditer(text): + url = m.group(2) + target = normalize(html, url) + if not target: + continue + # if it points to a directory, allow index.html + if target.is_dir(): + if (target / "index.html").exists(): + continue + if not target.exists(): + missing.append((str(html.relative_to(ROOT)), url)) + + if missing: + print("Missing local links:") + for src, url in missing: + print(f"- {src}: {url}") + return 1 + + print("OK: no missing local href/src found") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) |
