1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#!/usr/bin/env python3
"""Very small internal link checker.
Checks:
- href="/path" and src="/path" for local files
- Only checks local paths (starting with / or relative), skips http(s), mailto, xmpp, onion, etc.
Usage:
python tools/link_check.py
"""
from __future__ import annotations
import re
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SKIP_DIRS = {".git"}
SKIP_FILES = {
"test.html",
"test_jp.html",
"test_zh.html",
"startpage/test.html",
}
RE_URL = re.compile(r"\b(?:href|src)=(['\"])(.*?)\1", re.I)
def is_external(u: str) -> bool:
u = u.strip()
return (
u.startswith("http://")
or u.startswith("https://")
or u.startswith("mailto:")
or u.startswith("xmpp:")
or u.startswith("signal:")
or u.startswith("data:")
or u.startswith("javascript:")
or u.startswith("#")
or u.startswith("//")
or u.endswith(".onion/")
or ".onion" in u
)
def normalize(p: Path, url: str) -> Path | None:
url = url.split("#", 1)[0].split("?", 1)[0].strip()
if not url:
return None
if is_external(url):
return None
if url.startswith("/"):
return (ROOT / url.lstrip("/")).resolve()
# relative
return (p.parent / url).resolve()
def main() -> int:
missing = []
for html in ROOT.rglob("*.html"):
if any(part in SKIP_DIRS for part in html.parts):
continue
rel = html.relative_to(ROOT).as_posix()
if rel in SKIP_FILES:
continue
text = html.read_text(encoding="utf-8", errors="ignore")
for m in RE_URL.finditer(text):
url = m.group(2)
target = normalize(html, url)
if not target:
continue
# if it points to a directory, allow index.html
if target.is_dir():
if (target / "index.html").exists():
continue
if not target.exists():
missing.append((str(html.relative_to(ROOT)), url))
if missing:
print("Missing local links:")
for src, url in missing:
print(f"- {src}: {url}")
return 1
print("OK: no missing local href/src found")
return 0
if __name__ == "__main__":
raise SystemExit(main())
|