#!/usr/bin/env python3 from datetime import datetime as dt, timezone from glob import iglob from itertools import starmap import json from os import mkdir from os.path import exists from pathlib import Path import re from shutil import copy2, rmtree from subprocess import run from sys import stdin from warnings import warn from xml.dom import XHTML_NAMESPACE from xml.dom.minidom import getDOMImplementation, parseString GIT_DIRECTORY = "/home/USERNAME/Status.git" BUILD_DIRECTORY = "/home/USERNAME/status.site.example/.build" PUBLIC_DIRECTORY = "/home/USERNAME/status.site.example/public" PUBLIC_URL = "https://status.site.example" LANG = "en" LIVE_BRANCH = "live" UTC = timezone.utc CURRENT_DATETIME = f"{dt.now(UTC).replace(tzinfo=None).isoformat(timespec='seconds')}Z" ATOM_NAMESPACE = "http://www.w3.org/2005/Atom" if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}": print(f"This is an update to the '{LIVE_BRANCH}' branch; regenerating site…") # Set up the build directory. if exists(BUILD_DIRECTORY): rmtree(BUILD_DIRECTORY) cloneresult = run(["git", "clone", "--local", "--branch", LIVE_BRANCH, GIT_DIRECTORY, BUILD_DIRECTORY], capture_output=True, encoding="utf-8") # if cloneresult.stderr: # print(cloneresult.stderr) cloneresult.check_returncode() # Set up various containers. irimap = {} months = {} topics = {} # Create an XML representation of the provided status text. def statusxml (text, version="1.0"): doc = getDOMImplementation().createDocument(None, "article", None) articleElt = doc.documentElement articleElt.setAttribute("xmlns", XHTML_NAMESPACE) articleElt.setAttribute("lang", LANG) for para in text.split("\n\n"): paraElt = articleElt.appendChild(doc.createElement("p")) for component in re.findall(r'<[a-z]+:[^\s]*>(?:="[^\n"]+")?|\n|[^<\n]+|<(?![a-z]+:[^\s]*>)', para): if component == "\n": paraElt.appendChild(doc.createElement("br")) elif re.fullmatch(r'<[a-z]+:[^\s]*>(?:="[^\n"]+")?', component): href = component.split(">", maxsplit=1)[0][1:] anchorElt = paraElt.appendChild(doc.createElement("a")) anchorElt.setAttribute("href", href) anchorElt.setAttribute("rel", "noreferrer") anchorElt.appendChild(doc.createTextNode(component if len(href) == len(component) - 2 else component[len(href)+4:-1])) else: paraElt.appendChild(doc.createTextNode(component)) return articleElt.toxml() # Map status paths to status objects, or None if there is an error. # # The provided path must be to a `text` object. def statusmap (topic, path): status = { "@type": "MicroblogPost" } version_path = next(path.parent.glob("0=*"), None) if version_path and version_path.name != "0=x_status_git_1.0": warn(f"Unrecognized version for {path}; skipping.") return None if topic: status["subject"] = topic author_path = next(path.parent.glob("1=*"), None) if author_path: status["author"] = { "name": author_path.name[2:] } with author_path.open("r", encoding="utf-8") as text: status["author"]["@id"] = text.read().strip() title_path = next(path.parent.glob("2=*"), None) if title_path: with title_path.open("r", encoding="utf-8") as text: title = text.read().strip() status["title"] = title date_path = next(path.parent.glob("3=*"), None) datetime = "" if date_path: with date_path.open("r", encoding="utf-8") as text: datetime = text.read().strip() status["created"] = datetime else: warn(f"Missing date for {path}; skipping.") return None identifier_path = next(path.parent.glob("4=*"), None) identifier = "" if identifier_path: identifier = identifier_path.name[2:] status["@id"] = f"{PUBLIC_URL}/topics/{topic}/{identifier}" if topic else f"{PUBLIC_URL}/statuses/{datetime[0:7]}/{identifier}" with identifier_path.open("r", encoding="utf-8") as text: status["identifier"] = text.read().strip() irimap[status["identifier"]] = status["@id"] else: warn(f"Missing identifier for {path}; skipping.") return None with path.open("r", encoding="utf-8") as text: status["content"] = statusxml(text.read().strip()) return (datetime, identifier, status) def atomForLD (ld): doc = getDOMImplementation().createDocument(None, "feed", None) atomElt = doc.documentElement atomElt.setAttribute("xmlns", ATOM_NAMESPACE) atomElt.setAttribute("xml:lang", LANG) subject = ld["subject"] if "subject" in ld else "Statuses" titleElt = atomElt.appendChild(doc.createElement("title")) titleElt.appendChild(doc.createTextNode(f"{subject} @ {PUBLIC_URL}")) updatedElt = atomElt.appendChild(doc.createElement("updated")) updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME)) generatorElt = atomElt.appendChild(doc.createElement("generator")) generatorElt.appendChild(doc.createTextNode("x_status_git")) generatorElt.setAttribute("uri", "https://git.ladys.computer/x_status_git") atomLinks = {} if "OrderedCollectionPage" in ld["@type"]: idElt = atomElt.appendChild(doc.createElement("id")) idElt.appendChild(doc.createTextNode(f"{PUBLIC_URL}/statuses")) atomLinks["alternate"] = f"{PUBLIC_URL}/statuses" atomLinks["current"] = f"{PUBLIC_URL}/statuses.atom" atomLinks["self"] = atomLinks["current"] if ld["@id"] == ld["current"] else f"{ld['@id']}.atom" if "prev" in ld: atomLinks["prev-archive"] = f"{ld['prev']}.atom" if "next" in ld and ld["next"] != ld["current"]: atomLinks["next-archive"] = f"{ld['next']}.atom" else: idElt = atomElt.appendChild(doc.createElement("id")) idElt.appendChild(doc.createTextNode(ld["@id"])) atomLinks["alternate"] = ld["@id"] atomLinks["self"] = f"{ld['@id']}.atom" for (rel, href) in atomLinks.items(): linkElt = atomElt.appendChild(doc.createElement("link")) linkElt.setAttribute("rel", rel) linkElt.setAttribute("href", href) for item in ld["items"]: entryElt = atomElt.appendChild(doc.createElement("entry")) title = item["title"] if "title" in item else item["content"][0:27] + "…" titleElt = entryElt.appendChild(doc.createElement("title")) titleElt.appendChild(doc.createTextNode(title)) idElt = entryElt.appendChild(doc.createElement("id")) idElt.appendChild(doc.createTextNode(item["@id"])) updatedElt = entryElt.appendChild(doc.createElement("updated")) updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME)) if "created" in item: publishedElt = entryElt.appendChild(doc.createElement("published")) publishedElt.appendChild(doc.createTextNode(item["created"])) authorElt = entryElt.appendChild(doc.createElement("author")) if "author" in item: nameElt = authorElt.appendChild(doc.createElement("name")) nameElt.appendChild(doc.createTextNode(item["author"]["name"])) uriElt = authorElt.appendChild(doc.createElement("uri")) uriElt.appendChild(doc.createTextNode(item["author"]["@id"])) else: nameElt = authorElt.appendChild(doc.createElement("name")) nameElt.appendChild(doc.createTextNode("Anonymous")) contentElt = entryElt.appendChild(doc.createElement("content")) contentElt.setAttribute("type", "xhtml") contentDiv = contentElt.appendChild(doc.createElement("div")) contentDiv.setAttribute("xmlns", XHTML_NAMESPACE) contentDiv.setAttribute("lang", LANG) for child in list(parseString(item["content"]).documentElement.childNodes): contentDiv.appendChild(child) return (atomLinks["self"], atomElt.toxml()) # Get status paths. status_paths = [] for yearpath in Path(f"{BUILD_DIRECTORY}/").glob("[0-9][0-9][0-9][0-9]"): for monthpath in yearpath.glob("[0-9][0-9]"): for daypath in monthpath.glob("[0-9][0-9]"): for statuspath in daypath.glob("*/text"): status_paths.append((None, statuspath)) for topicpath in Path(f"{BUILD_DIRECTORY}/").glob("topic/*"): for hash0path in topicpath.glob("[0-9a-f]"): for hash1path in hash0path.glob("[0-9a-f]"): for hash2path in hash1path.glob("[0-9a-f]"): for hash3path in hash2path.glob("[0-9a-f]"): for statuspath in hash3path.glob("*/text"): status_paths.append((topicpath.name, statuspath)) # Build status objects and listings. for (datetime, identifier, status) in sorted(filter(None, starmap(statusmap, status_paths))): if "subject" in status: topic = status["subject"] if topic not in topics: topics[topic] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id": "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "subject": "dct:subject", "name": "foaf:name", "title": "dct:title", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/topics/{topic}", "@type": ["OrderedCollection", "Thread"], "items": [], "subject": topic } topics[topic]["items"].append(status) else: yyyy_mm = datetime[0:7] if yyyy_mm not in months: months[yyyy_mm] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollectionPage": "activity:OrderedCollectionPage", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "partOf": { "@id": "activity:partOf", "@type": "@id" }, "prev": { "@id": "activity:prev", "@type": "@id" }, "next": { "@id": "activity:next", "@type": "@id" }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id": "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "name": "foaf:name", "title": "dct:title", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/statuses/{yyyy_mm}", "@type": ["OrderedCollectionPage", "Thread"], "items": [], "partOf": f"{PUBLIC_URL}/statuses" } months[yyyy_mm]["items"].append(status) # Set up the public directory. if exists(PUBLIC_DIRECTORY): rmtree(PUBLIC_DIRECTORY) mkdir(PUBLIC_DIRECTORY) # Copy H·T·M·L files to their expected locations. copy2(f"{BUILD_DIRECTORY}/index.html", f"{PUBLIC_DIRECTORY}/index.html") copy2(f"{BUILD_DIRECTORY}/about.html", f"{PUBLIC_DIRECTORY}/.about.html") copy2(f"{BUILD_DIRECTORY}/status.html", f"{PUBLIC_DIRECTORY}/.status.html") copy2(f"{BUILD_DIRECTORY}/statuses.html", f"{PUBLIC_DIRECTORY}/.statuses.html") copy2(f"{BUILD_DIRECTORY}/topic.html", f"{PUBLIC_DIRECTORY}/.topic.html") copy2(f"{BUILD_DIRECTORY}/topics.html", f"{PUBLIC_DIRECTORY}/.topics.html") # Output “about” metadata if not exists(f"{PUBLIC_DIRECTORY}/about"): mkdir(f"{PUBLIC_DIRECTORY}/about") with open(f"{PUBLIC_DIRECTORY}/about/index.jsonld", "w", encoding="utf-8") as f: json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "Forum": "sioc:Forum", "Thread": "sioc:Thread", "Microblog": "sioct:Microblog", "streams": { "@id": "activity:streams", "@type": "@id" } }, "@id": f"{PUBLIC_URL}", "@type": "Microblog", "streams": [{ "@id": f"{PUBLIC_URL}/statuses", "@type": "Thread" }, { "@id": f"{PUBLIC_URL}/topics", "@type": "Forum" }] }, f, ensure_ascii=False, allow_nan=False) # Output month‐based listings and the non‐topic index if not exists(f"{PUBLIC_DIRECTORY}/statuses"): mkdir(f"{PUBLIC_DIRECTORY}/statuses") statuspairs = list(enumerate(months.items())) for (index, (yyyy_mm, ld)) in statuspairs: if not exists(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}"): mkdir(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}") ld["first"] = f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}" ld["current"] = f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}" if index > 0: ld["prev"] = f"{PUBLIC_URL}/statuses/{statuspairs[index - 1][1][0]}" if index < len(statuspairs) - 1: ld["next"] = f"{PUBLIC_URL}/statuses/{statuspairs[index + 1][1][0]}" with open(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}/index.jsonld", "w", encoding="utf-8") as f: json.dump(ld, f, ensure_ascii=False, allow_nan=False) atomlink, atomxml = atomForLD(ld) with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f: f.write(atomxml) with open(f"{PUBLIC_DIRECTORY}/statuses/index.jsonld", "w", encoding="utf-8") as f: json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "id" } }, "@id": f"{PUBLIC_URL}/statuses", "@type": ["OrderedCollection", "Thread"], "first": f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}", "current": f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}", "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False) # Output topic‐based listings and the topic index if not exists(f"{PUBLIC_DIRECTORY}/topics"): mkdir(f"{PUBLIC_DIRECTORY}/topics") for (topic, ld) in topics.items(): if not exists(f"{PUBLIC_DIRECTORY}/topics/{topic}"): mkdir(f"{PUBLIC_DIRECTORY}/topics/{topic}") with open(f"{PUBLIC_DIRECTORY}/topics/{topic}/index.jsonld", "w", encoding="utf-8") as f: json.dump(ld, f, ensure_ascii=False, allow_nan=False) atomlink, atomxml = atomForLD(ld) with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f: f.write(atomxml) with open(f"{PUBLIC_DIRECTORY}/topics/index.jsonld", "w", encoding="utf-8") as f: json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "sioc": "http://rdfs.org/sioc/ns#", "Collection": "activity:Collection", "Forum": "sioc:Forum", "items": { "@id": "activity:items", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "id" }, "subject": "dct:subject" }, "@id": f"{PUBLIC_URL}/topics", "@type": ["Collection", "Forum"], "items": list(map(lambda a: { "@id": a["@id"], "subject": a["subject"] }, topics.values())), "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False) # Output the I·R·I redirection page with open(f"{PUBLIC_DIRECTORY}/.lookup.xhtml", "w", encoding="utf-8") as f: doc = getDOMImplementation().createDocument(None, "xml", None) htmlElt = doc.documentElement htmlElt.setAttribute("xmlns", XHTML_NAMESPACE) htmlElt.setAttribute("lang", LANG) headElt = htmlElt.appendChild(doc.createElement("head")) titleElt = headElt.appendChild(doc.createElement("title")) titleElt.appendChild(doc.createTextNode("Redirecting…")) scriptElt = headElt.appendChild(doc.createElement("script")) scriptElt.setAttribute("type", "text/javascript") scriptElt.appendChild(doc.createTextNode(f"location={json.dumps(irimap)}[location.pathname.substring(1)]??`/`")) bodyElt = htmlElt.appendChild(doc.createElement("body")) bodyElt.appendChild(doc.createTextNode("Attempting to redirect to the proper page… (Requires Javascript.)")) f.write(doc.toxml()) # Remove the build directory. rmtree(BUILD_DIRECTORY)