summaryrefslogtreecommitdiff
path: root/tools/link_check.py
diff options
context:
space:
mode:
authorsillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
committersillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
commit720d752748b793a2f5cf3cc14cb75ad86e8919c0 (patch)
tree29120103307cb17e7d6c283cc198ec2484f934cd /tools/link_check.py
First commit
Diffstat (limited to 'tools/link_check.py')
-rw-r--r--tools/link_check.py94
1 files changed, 94 insertions, 0 deletions
diff --git a/tools/link_check.py b/tools/link_check.py
new file mode 100644
index 0000000..246eaf8
--- /dev/null
+++ b/tools/link_check.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""Very small internal link checker.
+
+Checks:
+- href="/path" and src="/path" for local files
+- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc.
+
+Usage:
+ python tools/link_check.py
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+SKIP_DIRS = {".git"}
+SKIP_FILES = {
+ "test.html",
+ "test_jp.html",
+ "test_zh.html",
+ "startpage/test.html",
+}
+
+RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I)
+
+
+def is_external(u: str) -> bool:
+ u = u.strip()
+ return (
+ u.startswith("http://")
+ or u.startswith("https://")
+ or u.startswith("mailto:")
+ or u.startswith("xmpp:")
+ or u.startswith("signal:")
+ or u.startswith("data:")
+ or u.startswith("javascript:")
+ or u.startswith("#")
+ or u.startswith("//")
+ or u.endswith(".onion/")
+ or ".onion" in u
+ )
+
+
+def normalize(p: Path, url: str) -> Path | None:
+ url = url.split("#", 1)[0].split("?", 1)[0].strip()
+ if not url:
+ return None
+ if is_external(url):
+ return None
+
+ if url.startswith("/"):
+ return (ROOT / url.lstrip("/")).resolve()
+
+ # relative
+ return (p.parent / url).resolve()
+
+
+def main() -> int:
+ missing = []
+ for html in ROOT.rglob("*.html"):
+ if any(part in SKIP_DIRS for part in html.parts):
+ continue
+ rel = html.relative_to(ROOT).as_posix()
+ if rel in SKIP_FILES:
+ continue
+ text = html.read_text(encoding="utf-8", errors="ignore")
+ for m in RE_URL.finditer(text):
+ url = m.group(2)
+ target = normalize(html, url)
+ if not target:
+ continue
+ # if it points to a directory, allow index.html
+ if target.is_dir():
+ if (target / "index.html").exists():
+ continue
+ if not target.exists():
+ missing.append((str(html.relative_to(ROOT)), url))
+
+ if missing:
+ print("Missing local links:")
+ for src, url in missing:
+ print(f"- {src}: {url}")
+ return 1
+
+ print("OK: no missing local href/src found")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())