diff options
Diffstat (limited to 'tools/html_audit.py')
| -rw-r--r-- | tools/html_audit.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/tools/html_audit.py b/tools/html_audit.py new file mode 100644 index 0000000..4e85c36 --- /dev/null +++ b/tools/html_audit.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Lightweight HTML audit for common a11y/markup issues.""" + +from __future__ import annotations + +from html.parser import HTMLParser +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] + +SKIP_DIRS = {"partials", ".git"} +SKIP_FILES = { + "test.html", + "test_jp.html", + "test_zh.html", + "startpage/test.html", +} + + +class AuditParser(HTMLParser): + def __init__(self) -> None: + super().__init__() + self.ids: dict[str, int] = {} + self.duplicate_ids: set[str] = set() + self.missing_alt: list[str] = [] + self.missing_iframe_title: list[str] = [] + self.blank_rel: list[str] = [] + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + attr_map = {k.lower(): (v or "") for k, v in attrs} + + if "id" in attr_map: + ident = attr_map["id"] + if ident: + if ident in self.ids: + self.duplicate_ids.add(ident) + self.ids[ident] = self.ids.get(ident, 0) + 1 + + if tag == "img": + if "alt" not in attr_map: + src = attr_map.get("src", "") + self.missing_alt.append(src) + + if tag == "iframe": + if not attr_map.get("title", ""): + src = attr_map.get("src", "") + self.missing_iframe_title.append(src) + + if tag == "a": + if attr_map.get("target", "") == "_blank": + rel = attr_map.get("rel", "") + if "noopener" not in rel: + href = attr_map.get("href", "") + self.blank_rel.append(href) + + +def main() -> int: + issues = [] + + for html in ROOT.rglob("*.html"): + if any(part in SKIP_DIRS for part in html.parts): + continue + rel = html.relative_to(ROOT).as_posix() + if rel in SKIP_FILES: + continue + + parser = AuditParser() + parser.feed(html.read_text(encoding="utf-8", errors="ignore")) + + if parser.duplicate_ids: + issues.append((rel, "duplicate-ids", sorted(parser.duplicate_ids))) + if parser.missing_alt: + issues.append((rel, "img-missing-alt", parser.missing_alt)) + if parser.missing_iframe_title: + issues.append((rel, "iframe-missing-title", parser.missing_iframe_title)) + if parser.blank_rel: + issues.append((rel, "target-blank-missing-noopener", parser.blank_rel)) + + if not issues: + print("OK: no audit issues found") + return 0 + + print("HTML audit issues:") + for rel, kind, items in issues: + print(f"- {rel}: {kind}") + for item in items[:10]: + print(f" - {item}") + if len(items) > 10: + print(f" - ... ({len(items) - 10} more)") + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) |
