summaryrefslogtreecommitdiff
path: root/tools/generate_sitemap.py
blob: 4d34cd7079741d94e49f6ce5b93cba3715f9924a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
"""Generate sitemap.xml from local HTML files."""

from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import re

ROOT = Path(__file__).resolve().parents[1]
SITE = "https://www.sillylaird.ca"

EXCLUDE_FILES = {
    "404.html",
    "50x.html",
}
EXCLUDE_DIRS = {"partials", ".git"}


def should_skip(path: Path) -> bool:
    if any(part in EXCLUDE_DIRS for part in path.parts):
        return True
    if path.name.endswith("~"):
        return True
    if path.name in EXCLUDE_FILES:
        return True
    return False


def url_for_path(path: Path) -> str:
    rel = path.relative_to(ROOT).as_posix()
    if rel == "index.html":
        return SITE + "/"
    if rel.endswith("/index.html"):
        return SITE + "/" + rel[:-len("index.html")]
    return SITE + "/" + rel


def lastmod_for_path(path: Path) -> str:
    ts = path.stat().st_mtime
    dt = datetime.fromtimestamp(ts, tz=timezone.utc)
    return dt.strftime("%Y-%m-%d")


def main() -> int:
    urls = []
    for html in ROOT.rglob("*.html"):
        if should_skip(html):
            continue
        urls.append((url_for_path(html), lastmod_for_path(html)))

    urls.sort(key=lambda item: item[0])

    lines = [
        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
        '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
    ]
    for loc, lastmod in urls:
        lines.append("  <url>")
        lines.append(f"    <loc>{loc}</loc>")
        lines.append(f"    <lastmod>{lastmod}</lastmod>")
        lines.append("  </url>")
    lines.append("</urlset>")

    (ROOT / "sitemap.xml").write_text("\n".join(lines) + "\n", encoding="utf-8")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())