summaryrefslogtreecommitdiff
path: root/tools/html_audit.py
diff options
context:
space:
mode:
authorsillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
committersillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
commit720d752748b793a2f5cf3cc14cb75ad86e8919c0 (patch)
tree29120103307cb17e7d6c283cc198ec2484f934cd /tools/html_audit.py
First commit
Diffstat (limited to 'tools/html_audit.py')
-rw-r--r--tools/html_audit.py94
1 files changed, 94 insertions, 0 deletions
diff --git a/tools/html_audit.py b/tools/html_audit.py
new file mode 100644
index 0000000..4e85c36
--- /dev/null
+++ b/tools/html_audit.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""Lightweight HTML audit for common a11y/markup issues."""
+
+from __future__ import annotations
+
+from html.parser import HTMLParser
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+
+SKIP_DIRS = {"partials", ".git"}
+SKIP_FILES = {
+ "test.html",
+ "test_jp.html",
+ "test_zh.html",
+ "startpage/test.html",
+}
+
+
+class AuditParser(HTMLParser):
+ def __init__(self) -> None:
+ super().__init__()
+ self.ids: dict[str, int] = {}
+ self.duplicate_ids: set[str] = set()
+ self.missing_alt: list[str] = []
+ self.missing_iframe_title: list[str] = []
+ self.blank_rel: list[str] = []
+
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+ attr_map = {k.lower(): (v or "") for k, v in attrs}
+
+ if "id" in attr_map:
+ ident = attr_map["id"]
+ if ident:
+ if ident in self.ids:
+ self.duplicate_ids.add(ident)
+ self.ids[ident] = self.ids.get(ident, 0) + 1
+
+ if tag == "img":
+ if "alt" not in attr_map:
+ src = attr_map.get("src", "")
+ self.missing_alt.append(src)
+
+ if tag == "iframe":
+ if not attr_map.get("title", ""):
+ src = attr_map.get("src", "")
+ self.missing_iframe_title.append(src)
+
+ if tag == "a":
+ if attr_map.get("target", "") == "_blank":
+ rel = attr_map.get("rel", "")
+ if "noopener" not in rel:
+ href = attr_map.get("href", "")
+ self.blank_rel.append(href)
+
+
+def main() -> int:
+ issues = []
+
+ for html in ROOT.rglob("*.html"):
+ if any(part in SKIP_DIRS for part in html.parts):
+ continue
+ rel = html.relative_to(ROOT).as_posix()
+ if rel in SKIP_FILES:
+ continue
+
+ parser = AuditParser()
+ parser.feed(html.read_text(encoding="utf-8", errors="ignore"))
+
+ if parser.duplicate_ids:
+ issues.append((rel, "duplicate-ids", sorted(parser.duplicate_ids)))
+ if parser.missing_alt:
+ issues.append((rel, "img-missing-alt", parser.missing_alt))
+ if parser.missing_iframe_title:
+ issues.append((rel, "iframe-missing-title", parser.missing_iframe_title))
+ if parser.blank_rel:
+ issues.append((rel, "target-blank-missing-noopener", parser.blank_rel))
+
+ if not issues:
+ print("OK: no audit issues found")
+ return 0
+
+ print("HTML audit issues:")
+ for rel, kind, items in issues:
+ print(f"- {rel}: {kind}")
+ for item in items[:10]:
+ print(f" - {item}")
+ if len(items) > 10:
+ print(f" - ... ({len(items) - 10} more)")
+ return 1
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())