summaryrefslogtreecommitdiff
path: root/tools/generate_sitemap.py
diff options
context:
space:
mode:
authorsillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
committersillylaird <sillylaird@fastmail.ca>2026-02-03 21:27:57 -0500
commit720d752748b793a2f5cf3cc14cb75ad86e8919c0 (patch)
tree29120103307cb17e7d6c283cc198ec2484f934cd /tools/generate_sitemap.py
First commit
Diffstat (limited to 'tools/generate_sitemap.py')
-rw-r--r--tools/generate_sitemap.py70
1 files changed, 70 insertions, 0 deletions
diff --git a/tools/generate_sitemap.py b/tools/generate_sitemap.py
new file mode 100644
index 0000000..4d34cd7
--- /dev/null
+++ b/tools/generate_sitemap.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""Generate sitemap.xml from local HTML files."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from datetime import datetime, timezone
+import re
+
+ROOT = Path(__file__).resolve().parents[1]
+SITE = "https://www.sillylaird.ca"
+
+EXCLUDE_FILES = {
+ "404.html",
+ "50x.html",
+}
+EXCLUDE_DIRS = {"partials", ".git"}
+
+
+def should_skip(path: Path) -> bool:
+ if any(part in EXCLUDE_DIRS for part in path.parts):
+ return True
+ if path.name.endswith("~"):
+ return True
+ if path.name in EXCLUDE_FILES:
+ return True
+ return False
+
+
+def url_for_path(path: Path) -> str:
+ rel = path.relative_to(ROOT).as_posix()
+ if rel == "index.html":
+ return SITE + "/"
+ if rel.endswith("/index.html"):
+ return SITE + "/" + rel[:-len("index.html")]
+ return SITE + "/" + rel
+
+
+def lastmod_for_path(path: Path) -> str:
+ ts = path.stat().st_mtime
+ dt = datetime.fromtimestamp(ts, tz=timezone.utc)
+ return dt.strftime("%Y-%m-%d")
+
+
+def main() -> int:
+ urls = []
+ for html in ROOT.rglob("*.html"):
+ if should_skip(html):
+ continue
+ urls.append((url_for_path(html), lastmod_for_path(html)))
+
+ urls.sort(key=lambda item: item[0])
+
+ lines = [
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
+ ]
+ for loc, lastmod in urls:
+ lines.append(" <url>")
+ lines.append(f" <loc>{loc}</loc>")
+ lines.append(f" <lastmod>{lastmod}</lastmod>")
+ lines.append(" </url>")
+ lines.append("</urlset>")
+
+ (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())