Capture source and use it to generate Atom titles

[x_status_git] / post-receive
diff --git a/post-receive b/post-receive

index e1da847d92113611b02bc89ef9f700f7a667982d..9698ba44dab7d6aa30c0bd5d86847fb612596ab8 100755 (executable)
--- a/post-receive
+++ b/post-receive
@@ -1,4 +1,5 @@
  #!/usr/bin/env python3
+from datetime import datetime as dt, timezone
  from glob import iglob
  from itertools import starmap
  import json
@@ -11,7 +12,7 @@ from subprocess import run
  from sys import stdin
  from warnings import warn
  from xml.dom import XHTML_NAMESPACE
-from xml.dom.minidom import getDOMImplementation
+from xml.dom.minidom import getDOMImplementation, parseString
  
  GIT_DIRECTORY = "/home/USERNAME/Status.git"
  BUILD_DIRECTORY = "/home/USERNAME/status.site.example/.build"
@@ -20,6 +21,10 @@ PUBLIC_URL = "https://status.site.example"
  LANG = "en"
  LIVE_BRANCH = "live"
  
+UTC = timezone.utc
+CURRENT_DATETIME = f"{dt.now(UTC).replace(tzinfo=None).isoformat(timespec='seconds')}Z"
+ATOM_NAMESPACE = "http://www.w3.org/2005/Atom"
+
  if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
  
         print(f"This is an update to the '{LIVE_BRANCH}' branch; regenerating site…")
@@ -27,9 +32,13 @@ if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
         # Set up the build directory.
         if exists(BUILD_DIRECTORY):
                 rmtree(BUILD_DIRECTORY)
-       run(["git", "clone", "--local", "--branch", "live", GIT_DIRECTORY, BUILD_DIRECTORY], capture_output=True, encoding="utf-8")
+       cloneresult = run(["git", "clone", "--local", "--branch", LIVE_BRANCH, GIT_DIRECTORY, BUILD_DIRECTORY], capture_output=True, encoding="utf-8")
+       # if cloneresult.stderr:
+               # print(cloneresult.stderr)
+       cloneresult.check_returncode()
  
         # Set up various containers.
+       irimap = {}
         months = {}
         topics = {}
  
@@ -59,23 +68,23 @@ if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
         # The provided path must be to a `text` object.
         def statusmap (topic, path):
                 status = { "@type": "MicroblogPost" }
-               version_path = next(path.parent.glob("0=*"))
+               version_path = next(path.parent.glob("0=*"), None)
                 if version_path and version_path.name != "0=x_status_git_1.0":
                         warn(f"Unrecognized version for {path}; skipping.")
                         return None
                 if topic:
                         status["subject"] = topic
-               author_path = next(path.parent.glob("1=*"))
+               author_path = next(path.parent.glob("1=*"), None)
                 if author_path:
                         status["author"] = { "name": author_path.name[2:] }
                         with author_path.open("r", encoding="utf-8") as text:
                                 status["author"]["@id"] = text.read().strip()
-               title_path = next(path.parent.glob("2=*"))
+               title_path = next(path.parent.glob("2=*"), None)
                 if title_path:
                         with title_path.open("r", encoding="utf-8") as text:
                                 title = text.read().strip()
                                 status["title"] = title
-               date_path = next(path.parent.glob("3=*"))
+               date_path = next(path.parent.glob("3=*"), None)
                 datetime = ""
                 if date_path:
                         with date_path.open("r", encoding="utf-8") as text:
@@ -84,20 +93,90 @@ if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
                 else:
                         warn(f"Missing date for {path}; skipping.")
                         return None
-               identifier_path = next(path.parent.glob("4=*"))
+               identifier_path = next(path.parent.glob("4=*"), None)
                 identifier = ""
                 if identifier_path:
                         identifier = identifier_path.name[2:]
-                       status["@id"] = f"{PUBLIC_URL}/topics/{topic}/{identifier}" if topic else f"{PUBLIC_URL}/{datetime[0:7]}/{identifier}"
+                       status["@id"] = f"{PUBLIC_URL}/topics/{topic}/{identifier}" if topic else f"{PUBLIC_URL}/statuses/{datetime[0:7]}/{identifier}"
                         with identifier_path.open("r", encoding="utf-8") as text:
                                 status["identifier"] = text.read().strip()
+                       irimap[status["identifier"]] = status["@id"]
                 else:
                         warn(f"Missing identifier for {path}; skipping.")
                         return None
                 with path.open("r", encoding="utf-8") as text:
-                       status["content"] = statusxml(text.read().strip())
+                       source = text.read().strip()
+                       status["content"] = statusxml(source)
+                       status["source"] = { "content": source, "mediaType": "text/plain" }
                 return (datetime, identifier, status)
  
+       def atomForLD (ld):
+               doc = getDOMImplementation().createDocument(None, "feed", None)
+               atomElt = doc.documentElement
+               atomElt.setAttribute("xmlns", ATOM_NAMESPACE)
+               atomElt.setAttribute("xml:lang", LANG)
+               subject = ld["subject"] if "subject" in ld else "Statuses"
+               titleElt = atomElt.appendChild(doc.createElement("title"))
+               titleElt.appendChild(doc.createTextNode(f"{subject} @ {PUBLIC_URL}"))
+               updatedElt = atomElt.appendChild(doc.createElement("updated"))
+               updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME))
+               generatorElt = atomElt.appendChild(doc.createElement("generator"))
+               generatorElt.appendChild(doc.createTextNode("x_status_git"))
+               generatorElt.setAttribute("uri", "https://git.ladys.computer/x_status_git")
+               atomLinks = {}
+               if "OrderedCollectionPage" in ld["@type"]:
+                       idElt = atomElt.appendChild(doc.createElement("id"))
+                       idElt.appendChild(doc.createTextNode(f"{PUBLIC_URL}/statuses"))
+                       atomLinks["alternate"] = f"{PUBLIC_URL}/statuses"
+                       atomLinks["current"] = f"{PUBLIC_URL}/statuses.atom"
+                       atomLinks["self"] = atomLinks["current"] if ld["@id"] == ld["current"] else f"{ld['@id']}.atom"
+                       if "prev" in ld:
+                               atomLinks["prev-archive"] = f"{ld['prev']}.atom"
+                       if "next" in ld and ld["next"] != ld["current"]:
+                               atomLinks["next-archive"] = f"{ld['next']}.atom"
+               else:
+                       idElt = atomElt.appendChild(doc.createElement("id"))
+                       idElt.appendChild(doc.createTextNode(ld["@id"]))
+                       atomLinks["alternate"] = ld["@id"]
+                       atomLinks["self"] = f"{ld['@id']}.atom"
+               for (rel, href) in atomLinks.items():
+                       linkElt = atomElt.appendChild(doc.createElement("link"))
+                       linkElt.setAttribute("rel", rel)
+                       linkElt.setAttribute("href", href)
+               for item in ld["items"]:
+                       entryElt = atomElt.appendChild(doc.createElement("entry"))
+                       title = item["source"]["content"].partition("\n")[0]
+                       if "title" in item:
+                               title = item["title"]
+                       elif len(title) >= 28:
+                               title = title[0:27] + "…"
+                       titleElt = entryElt.appendChild(doc.createElement("title"))
+                       titleElt.appendChild(doc.createTextNode(title))
+                       idElt = entryElt.appendChild(doc.createElement("id"))
+                       idElt.appendChild(doc.createTextNode(item["@id"]))
+                       updatedElt = entryElt.appendChild(doc.createElement("updated"))
+                       updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME))
+                       if "created" in item:
+                               publishedElt = entryElt.appendChild(doc.createElement("published"))
+                               publishedElt.appendChild(doc.createTextNode(item["created"]))
+                       authorElt = entryElt.appendChild(doc.createElement("author"))
+                       if "author" in item:
+                               nameElt = authorElt.appendChild(doc.createElement("name"))
+                               nameElt.appendChild(doc.createTextNode(item["author"]["name"]))
+                               uriElt = authorElt.appendChild(doc.createElement("uri"))
+                               uriElt.appendChild(doc.createTextNode(item["author"]["@id"]))
+                       else:
+                               nameElt = authorElt.appendChild(doc.createElement("name"))
+                               nameElt.appendChild(doc.createTextNode("Anonymous"))
+                       contentElt = entryElt.appendChild(doc.createElement("content"))
+                       contentElt.setAttribute("type", "xhtml")
+                       contentDiv = contentElt.appendChild(doc.createElement("div"))
+                       contentDiv.setAttribute("xmlns", XHTML_NAMESPACE)
+                       contentDiv.setAttribute("lang", LANG)
+                       for child in list(parseString(item["content"]).documentElement.childNodes):
+                               contentDiv.appendChild(child)
+               return (atomLinks["self"], atomElt.toxml())
+
         # Get status paths.
         status_paths = []
         for yearpath in Path(f"{BUILD_DIRECTORY}/").glob("[0-9][0-9][0-9][0-9]"):
@@ -118,12 +197,12 @@ if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
                 if "subject" in status:
                         topic = status["subject"]
                         if topic not in topics:
-                               topics[topic] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "subject": "dct:subject", "name": "foaf:name", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/topics/{topic}", "@type": ["OrderedCollection", "Thread"], "items": [], "subject": topic }
+                               topics[topic] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "source": { "@id": "activity:source", "@type": "@id", "@context": { "content": { "@id": "activity:content", "@type": "http://www.w3.org/2001/XMLSchema#string" }, "mediaType": "activity:mediaType" } }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "subject": "dct:subject", "title": "dct:title", "name": "foaf:name", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" }, "feed": { "@id": "sioc:feed", "@type": "@id" } }, "@id": f"{PUBLIC_URL}/topics/{topic}", "@type": ["OrderedCollection", "Thread"], "feed": f"{PUBLIC_URL}/topics/{topic}.atom", "items": [], "subject": topic }
                         topics[topic]["items"].append(status)
                 else:
                         yyyy_mm = datetime[0:7]
                         if yyyy_mm not in months:
-                               months[yyyy_mm] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollectionPage": "activity:OrderedCollectionPage", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "partOf": { "@id": "activity:partOf", "@type": "@id" }, "prev": { "@id": "activity:prev", "@type": "@id" }, "next": { "@id": "activity:next", "@type": "@id" }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "name": "foaf:name", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/{yyyy_mm}", "@type": ["OrderedCollectionPage", "Thread"], "items": [], "partOf": f"{PUBLIC_URL}/statuses" }
+                               months[yyyy_mm] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollectionPage": "activity:OrderedCollectionPage", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "partOf": { "@id": "activity:partOf", "@type": "@id" }, "prev": { "@id": "activity:prev", "@type": "@id" }, "next": { "@id": "activity:next", "@type": "@id" }, "source": { "@id": "activity:source", "@type": "@id", "@context": { "content": { "@id": "activity:content", "@type": "http://www.w3.org/2001/XMLSchema#string" }, "mediaType": "activity:mediaType" } }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "title": "dct:title", "name": "foaf:name", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/statuses/{yyyy_mm}", "@type": ["OrderedCollectionPage", "Thread"], "items": [], "partOf": f"{PUBLIC_URL}/statuses" }
                         months[yyyy_mm]["items"].append(status)
  
         # Set up the public directory.
@@ -146,22 +225,25 @@ if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
                 json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "Forum": "sioc:Forum", "Thread": "sioc:Thread", "Microblog": "sioct:Microblog", "streams": { "@id": "activity:streams", "@type": "@id" } }, "@id": f"{PUBLIC_URL}", "@type": "Microblog", "streams": [{ "@id": f"{PUBLIC_URL}/statuses", "@type": "Thread" }, { "@id": f"{PUBLIC_URL}/topics", "@type": "Forum" }] }, f, ensure_ascii=False, allow_nan=False)
  
         # Output month‐based listings and the non‐topic index
+       if not exists(f"{PUBLIC_DIRECTORY}/statuses"):
+               mkdir(f"{PUBLIC_DIRECTORY}/statuses")
         statuspairs = list(enumerate(months.items()))
         for (index, (yyyy_mm, ld)) in statuspairs:
-               if not exists(f"{PUBLIC_DIRECTORY}/{yyyy_mm}"):
-                       mkdir(f"{PUBLIC_DIRECTORY}/{yyyy_mm}")
-               ld["first"] = f"{PUBLIC_URL}/{statuspairs[0][1][0]}"
-               ld["current"] = f"{PUBLIC_URL}/{statuspairs[-1][1][0]}"
+               if not exists(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}"):
+                       mkdir(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}")
+               ld["first"] = f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}"
+               ld["current"] = f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}"
                 if index > 0:
-                       ld["prev"] = f"{PUBLIC_URL}/{statuspairs[index - 1][1][0]}"
+                       ld["prev"] = f"{PUBLIC_URL}/statuses/{statuspairs[index - 1][1][0]}"
                 if index < len(statuspairs) - 1:
-                       ld["next"] = f"{PUBLIC_URL}/{statuspairs[index + 1][1][0]}"
-               with open(f"{PUBLIC_DIRECTORY}/{yyyy_mm}/index.jsonld", "w", encoding="utf-8") as f:
+                       ld["next"] = f"{PUBLIC_URL}/statuses/{statuspairs[index + 1][1][0]}"
+               with open(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}/index.jsonld", "w", encoding="utf-8") as f:
                         json.dump(ld, f, ensure_ascii=False, allow_nan=False)
-       if not exists(f"{PUBLIC_DIRECTORY}/statuses"):
-               mkdir(f"{PUBLIC_DIRECTORY}/statuses")
+               atomlink, atomxml = atomForLD(ld)
+               with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f:
+                       f.write(atomxml)
         with open(f"{PUBLIC_DIRECTORY}/statuses/index.jsonld", "w", encoding="utf-8") as f:
-               json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "id" } }, "@id": f"{PUBLIC_URL}/statuses", "@type": ["OrderedCollection", "Thread"], "first": f"{PUBLIC_URL}/{statuspairs[0][1][0]}", "current": f"{PUBLIC_URL}/{statuspairs[-1][1][0]}", "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)
+               json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "@id" }, "feed": { "@id": "sioc:feed", "@type": "@id" } }, "@id": f"{PUBLIC_URL}/statuses", "@type": ["OrderedCollection", "Thread"], "feed": f"{PUBLIC_URL}/statuses.atom", "first": f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}", "current": f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}", "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)
  
         # Output topic‐based listings and the topic index
         if not exists(f"{PUBLIC_DIRECTORY}/topics"):
@@ -171,8 +253,27 @@ if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
                         mkdir(f"{PUBLIC_DIRECTORY}/topics/{topic}")
                 with open(f"{PUBLIC_DIRECTORY}/topics/{topic}/index.jsonld", "w", encoding="utf-8") as f:
                         json.dump(ld, f, ensure_ascii=False, allow_nan=False)
+               atomlink, atomxml = atomForLD(ld)
+               with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f:
+                       f.write(atomxml)
         with open(f"{PUBLIC_DIRECTORY}/topics/index.jsonld", "w", encoding="utf-8") as f:
-               json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "sioc": "http://rdfs.org/sioc/ns#", "Collection": "activity:Collection", "Forum": "sioc:Forum", "items": { "@id": "activity:items", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "id" }, "subject": "dct:subject" }, "@id": f"{PUBLIC_URL}/topics", "@type": ["Collection", "Forum"], "items": list(map(lambda a: { "@id": a["@id"], "subject": a["subject"] }, topics.values())), "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)
+               json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "sioc": "http://rdfs.org/sioc/ns#", "Collection": "activity:Collection", "Forum": "sioc:Forum", "items": { "@id": "activity:items", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "@id" }, "subject": "dct:subject" }, "@id": f"{PUBLIC_URL}/topics", "@type": ["Collection", "Forum"], "items": list(map(lambda a: { "@id": a["@id"], "subject": a["subject"] }, topics.values())), "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)
+
+       # Output the I·R·I redirection page
+       with open(f"{PUBLIC_DIRECTORY}/.lookup.xhtml", "w", encoding="utf-8") as f:
+               doc = getDOMImplementation().createDocument(None, "xml", None)
+               htmlElt = doc.documentElement
+               htmlElt.setAttribute("xmlns", XHTML_NAMESPACE)
+               htmlElt.setAttribute("lang", LANG)
+               headElt = htmlElt.appendChild(doc.createElement("head"))
+               titleElt = headElt.appendChild(doc.createElement("title"))
+               titleElt.appendChild(doc.createTextNode("Redirecting…"))
+               scriptElt = headElt.appendChild(doc.createElement("script"))
+               scriptElt.setAttribute("type", "text/javascript")
+               scriptElt.appendChild(doc.createTextNode(f"location={json.dumps(irimap)}[location.pathname.substring(1)]??`/`"))
+               bodyElt = htmlElt.appendChild(doc.createElement("body"))
+               bodyElt.appendChild(doc.createTextNode("Attempting to redirect to the proper page… (Requires Javascript.)"))
+               f.write(doc.toxml())
  
         # Remove the build directory.
         rmtree(BUILD_DIRECTORY)