#!/usr/bin/env python3 """Generate sitemap.xml from local HTML files.""" from __future__ import annotations from pathlib import Path from datetime import datetime, timezone import re ROOT = Path(__file__).resolve().parents[1] SITE = "https://www.sillylaird.ca" EXCLUDE_FILES = { "404.html", "50x.html", } EXCLUDE_DIRS = {"partials", ".git"} def should_skip(path: Path) -> bool: if any(part in EXCLUDE_DIRS for part in path.parts): return True if path.name.endswith("~"): return True if path.name in EXCLUDE_FILES: return True return False def url_for_path(path: Path) -> str: rel = path.relative_to(ROOT).as_posix() if rel == "index.html": return SITE + "/" if rel.endswith("/index.html"): return SITE + "/" + rel[:-len("index.html")] return SITE + "/" + rel def lastmod_for_path(path: Path) -> str: ts = path.stat().st_mtime dt = datetime.fromtimestamp(ts, tz=timezone.utc) return dt.strftime("%Y-%m-%d") def main() -> int: urls = [] for html in ROOT.rglob("*.html"): if should_skip(html): continue urls.append((url_for_path(html), lastmod_for_path(html))) urls.sort(key=lambda item: item[0]) lines = [ "", '', ] for loc, lastmod in urls: lines.append(" ") lines.append(f" {loc}") lines.append(f" {lastmod}") lines.append(" ") lines.append("") (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8") return 0 if __name__ == "__main__": raise SystemExit(main())