From 720d752748b793a2f5cf3cc14cb75ad86e8919c0 Mon Sep 17 00:00:00 2001 From: sillylaird Date: Tue, 3 Feb 2026 21:27:57 -0500 Subject: First commit --- tools/generate_sitemap.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tools/generate_sitemap.py (limited to 'tools/generate_sitemap.py') diff --git a/tools/generate_sitemap.py b/tools/generate_sitemap.py new file mode 100644 index 0000000..4d34cd7 --- /dev/null +++ b/tools/generate_sitemap.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Generate sitemap.xml from local HTML files.""" + +from __future__ import annotations + +from pathlib import Path +from datetime import datetime, timezone +import re + +ROOT = Path(__file__).resolve().parents[1] +SITE = "https://www.sillylaird.ca" + +EXCLUDE_FILES = { + "404.html", + "50x.html", +} +EXCLUDE_DIRS = {"partials", ".git"} + + +def should_skip(path: Path) -> bool: + if any(part in EXCLUDE_DIRS for part in path.parts): + return True + if path.name.endswith("~"): + return True + if path.name in EXCLUDE_FILES: + return True + return False + + +def url_for_path(path: Path) -> str: + rel = path.relative_to(ROOT).as_posix() + if rel == "index.html": + return SITE + "/" + if rel.endswith("/index.html"): + return SITE + "/" + rel[:-len("index.html")] + return SITE + "/" + rel + + +def lastmod_for_path(path: Path) -> str: + ts = path.stat().st_mtime + dt = datetime.fromtimestamp(ts, tz=timezone.utc) + return dt.strftime("%Y-%m-%d") + + +def main() -> int: + urls = [] + for html in ROOT.rglob("*.html"): + if should_skip(html): + continue + urls.append((url_for_path(html), lastmod_for_path(html))) + + urls.sort(key=lambda item: item[0]) + + lines = [ + "", + '', + ] + for loc, lastmod in urls: + lines.append(" ") + lines.append(f" {loc}") + lines.append(f" {lastmod}") + lines.append(" ") + lines.append("") + + (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) -- cgit v1.2.3