Lady’s Gitweb - x_status_git/blob - post-receive

   1 #!/usr/bin/env python3
   2 from datetime import datetime as dt, timezone
   3 from glob import iglob
   4 from itertools import starmap
   5 import json
   6 from os import mkdir
   7 from os.path import exists
   8 from pathlib import Path
   9 import re
  10 from shutil import copy2, rmtree
  11 from subprocess import run
  12 from sys import stdin
  13 from warnings import warn
  14 from xml.dom import XHTML_NAMESPACE
  15 from xml.dom.minidom import getDOMImplementation, parseString
  16
  17 GIT_DIRECTORY = "/home/USERNAME/Status.git"
  18 BUILD_DIRECTORY = "/home/USERNAME/status.site.example/.build"
  19 PUBLIC_DIRECTORY = "/home/USERNAME/status.site.example/public"
  20 PUBLIC_URL = "https://status.site.example"
  21 LANG = "en"
  22 LIVE_BRANCH = "live"
  23
  24 UTC = timezone.utc
  25 CURRENT_DATETIME = f"{dt.now(UTC).replace(tzinfo=None).isoformat(timespec='seconds')}Z"
  26 ATOM_NAMESPACE = "http://www.w3.org/2005/Atom"
  27
  28 if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":
  29
  30         print(f"This is an update to the '{LIVE_BRANCH}' branch; regenerating site…")
  31
  32         # Set up the build directory.
  33         if exists(BUILD_DIRECTORY):
  34                 rmtree(BUILD_DIRECTORY)
  35         cloneresult = run(["git", "clone", "--local", "--branch", LIVE_BRANCH, GIT_DIRECTORY, BUILD_DIRECTORY], capture_output=True, encoding="utf-8")
  36         # if cloneresult.stderr:
  37                 # print(cloneresult.stderr)
  38         cloneresult.check_returncode()
  39
  40         # Set up various containers.
  41         irimap = {}
  42         months = {}
  43         topics = {}
  44
  45         # Create an XML representation of the provided status text.
  46         def statusxml (text, version="1.0"):
  47                 doc = getDOMImplementation().createDocument(None, "article", None)
  48                 articleElt = doc.documentElement
  49                 articleElt.setAttribute("xmlns", XHTML_NAMESPACE)
  50                 articleElt.setAttribute("lang", LANG)
  51                 for para in text.split("\n\n"):
  52                         paraElt = articleElt.appendChild(doc.createElement("p"))
  53                         for component in re.findall(r'<[a-z]+:[^\s]*>(?:="[^\n"]+")?|\n|[^<\n]+|<(?![a-z]+:[^\s]*>)', para):
  54                                 if component == "\n":
  55                                         paraElt.appendChild(doc.createElement("br"))
  56                                 elif re.fullmatch(r'<[a-z]+:[^\s]*>(?:="[^\n"]+")?', component):
  57                                         href = component.split(">", maxsplit=1)[0][1:]
  58                                         anchorElt = paraElt.appendChild(doc.createElement("a"))
  59                                         anchorElt.setAttribute("href", href)
  60                                         anchorElt.setAttribute("rel", "noreferrer")
  61                                         anchorElt.appendChild(doc.createTextNode(component if len(href) == len(component) - 2 else component[len(href)+4:-1]))
  62                                 else:
  63                                         paraElt.appendChild(doc.createTextNode(component))
  64                 return articleElt.toxml()
  65
  66         # Map status paths to status objects, or None if there is an error.
  67         #
  68         # The provided path must be to a `text` object.
  69         def statusmap (topic, path):
  70                 status = { "@type": "MicroblogPost" }
  71                 version_path = next(path.parent.glob("0=*"), None)
  72                 if version_path and version_path.name != "0=x_status_git_1.0":
  73                         warn(f"Unrecognized version for {path}; skipping.")
  74                         return None
  75                 if topic:
  76                         status["subject"] = topic
  77                 author_path = next(path.parent.glob("1=*"), None)
  78                 if author_path:
  79                         status["creator"] = { "name": author_path.name[2:] }
  80                         with author_path.open("r", encoding="utf-8") as text:
  81                                 status["creator"]["@id"] = text.read().strip()
  82                 title_path = next(path.parent.glob("2=*"), None)
  83                 if title_path:
  84                         with title_path.open("r", encoding="utf-8") as text:
  85                                 title = text.read().strip()
  86                                 status["title"] = title
  87                 date_path = next(path.parent.glob("3=*"), None)
  88                 datetime = ""
  89                 if date_path:
  90                         with date_path.open("r", encoding="utf-8") as text:
  91                                 datetime = text.read().strip()
  92                                 status["created"] = datetime
  93                 else:
  94                         warn(f"Missing date for {path}; skipping.")
  95                         return None
  96                 identifier_path = next(path.parent.glob("4=*"), None)
  97                 identifier = ""
  98                 if identifier_path:
  99                         identifier = identifier_path.name[2:]
 100                         status["@id"] = f"{PUBLIC_URL}/topics/{topic}/{identifier}" if topic else f"{PUBLIC_URL}/statuses/{datetime[0:7]}/{identifier}"
 101                         with identifier_path.open("r", encoding="utf-8") as text:
 102                                 status["identifier"] = text.read().strip()
 103                         irimap[status["identifier"]] = status["@id"]
 104                 else:
 105                         warn(f"Missing identifier for {path}; skipping.")
 106                         return None
 107                 with path.open("r", encoding="utf-8") as text:
 108                         source = text.read().strip()
 109                         status["content"] = statusxml(source)
 110                         status["source"] = { "content": source, "mediaType": "text/plain" }
 111                 return (datetime, identifier, status)
 112
 113         def atomForLD (ld):
 114                 doc = getDOMImplementation().createDocument(None, "feed", None)
 115                 atomElt = doc.documentElement
 116                 atomElt.setAttribute("xmlns", ATOM_NAMESPACE)
 117                 atomElt.setAttribute("xml:lang", LANG)
 118                 subject = ld["subject"] if "subject" in ld else "Statuses"
 119                 titleElt = atomElt.appendChild(doc.createElement("title"))
 120                 titleElt.appendChild(doc.createTextNode(f"{subject} @ {PUBLIC_URL}"))
 121                 updatedElt = atomElt.appendChild(doc.createElement("updated"))
 122                 updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME))
 123                 generatorElt = atomElt.appendChild(doc.createElement("generator"))
 124                 generatorElt.appendChild(doc.createTextNode("x_status_git"))
 125                 generatorElt.setAttribute("uri", "https://git.ladys.computer/x_status_git")
 126                 atomLinks = {}
 127                 if "OrderedCollectionPage" in ld["@type"]:
 128                         idElt = atomElt.appendChild(doc.createElement("id"))
 129                         idElt.appendChild(doc.createTextNode(f"{PUBLIC_URL}/statuses"))
 130                         atomLinks["alternate"] = f"{PUBLIC_URL}/statuses"
 131                         atomLinks["current"] = f"{PUBLIC_URL}/statuses.atom"
 132                         atomLinks["self"] = atomLinks["current"] if ld["@id"] == ld["current"] else f"{ld['@id']}.atom"
 133                         if "prev" in ld:
 134                                 atomLinks["prev-archive"] = f"{ld['prev']}.atom"
 135                         if "next" in ld and ld["next"] != ld["current"]:
 136                                 atomLinks["next-archive"] = f"{ld['next']}.atom"
 137                 else:
 138                         idElt = atomElt.appendChild(doc.createElement("id"))
 139                         idElt.appendChild(doc.createTextNode(ld["@id"]))
 140                         atomLinks["alternate"] = ld["@id"]
 141                         atomLinks["self"] = f"{ld['@id']}.atom"
 142                 for (rel, href) in atomLinks.items():
 143                         linkElt = atomElt.appendChild(doc.createElement("link"))
 144                         linkElt.setAttribute("rel", rel)
 145                         linkElt.setAttribute("href", href)
 146                 for item in ld["items"]:
 147                         entryElt = atomElt.appendChild(doc.createElement("entry"))
 148                         title = item["source"]["content"].partition("\n")[0]
 149                         if "title" in item:
 150                                 title = item["title"]
 151                         elif len(title) >= 28:
 152                                 title = title[0:27] + "…"
 153                         titleElt = entryElt.appendChild(doc.createElement("title"))
 154                         titleElt.appendChild(doc.createTextNode(title))
 155                         idElt = entryElt.appendChild(doc.createElement("id"))
 156                         idElt.appendChild(doc.createTextNode(item["@id"]))
 157                         updatedElt = entryElt.appendChild(doc.createElement("updated"))
 158                         updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME))
 159                         if "created" in item:
 160                                 publishedElt = entryElt.appendChild(doc.createElement("published"))
 161                                 publishedElt.appendChild(doc.createTextNode(item["created"]))
 162                         authorElt = entryElt.appendChild(doc.createElement("author"))
 163                         if "creator" in item:
 164                                 nameElt = authorElt.appendChild(doc.createElement("name"))
 165                                 nameElt.appendChild(doc.createTextNode(item["creator"]["name"]))
 166                                 uriElt = authorElt.appendChild(doc.createElement("uri"))
 167                                 uriElt.appendChild(doc.createTextNode(item["creator"]["@id"]))
 168                         else:
 169                                 nameElt = authorElt.appendChild(doc.createElement("name"))
 170                                 nameElt.appendChild(doc.createTextNode("Anonymous"))
 171                         contentElt = entryElt.appendChild(doc.createElement("content"))
 172                         contentElt.setAttribute("type", "xhtml")
 173                         contentDiv = contentElt.appendChild(doc.createElement("div"))
 174                         contentDiv.setAttribute("xmlns", XHTML_NAMESPACE)
 175                         contentDiv.setAttribute("lang", LANG)
 176                         for child in list(parseString(item["content"]).documentElement.childNodes):
 177                                 contentDiv.appendChild(child)
 178                 return (atomLinks["self"], atomElt.toxml())
 179
 180         # Get status paths.
 181         status_paths = []
 182         for yearpath in Path(f"{BUILD_DIRECTORY}/").glob("[0-9][0-9][0-9][0-9]"):
 183                 for monthpath in yearpath.glob("[0-9][0-9]"):
 184                         for daypath in monthpath.glob("[0-9][0-9]"):
 185                                 for statuspath in daypath.glob("*/text"):
 186                                         status_paths.append((None, statuspath))
 187         for topicpath in Path(f"{BUILD_DIRECTORY}/").glob("topic/*"):
 188                 for hash0path in topicpath.glob("[0-9a-f]"):
 189                         for hash1path in hash0path.glob("[0-9a-f]"):
 190                                 for hash2path in hash1path.glob("[0-9a-f]"):
 191                                         for hash3path in hash2path.glob("[0-9a-f]"):
 192                                                 for statuspath in hash3path.glob("*/text"):
 193                                                         status_paths.append((topicpath.name, statuspath))
 194
 195         # Build status objects and listings.
 196         for (datetime, identifier, status) in sorted(filter(None, starmap(statusmap, status_paths))):
 197                 if "subject" in status:
 198                         topic = status["subject"]
 199                         if topic not in topics:
 200                                 topics[topic] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "source": { "@id": "activity:source", "@type": "@id", "@context": { "content": { "@id": "activity:content", "@type": "http://www.w3.org/2001/XMLSchema#string" }, "mediaType": "activity:mediaType" } }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "subject": "dct:subject", "title": "dct:title", "name": "foaf:name", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" }, "feed": { "@id": "sioc:feed", "@type": "@id" } }, "@id": f"{PUBLIC_URL}/topics/{topic}", "@type": ["OrderedCollection", "Thread"], "feed": f"{PUBLIC_URL}/topics/{topic}.atom", "items": [], "subject": topic }
 201                         topics[topic]["items"].append(status)
 202                 else:
 203                         yyyy_mm = datetime[0:7]
 204                         if yyyy_mm not in months:
 205                                 months[yyyy_mm] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollectionPage": "activity:OrderedCollectionPage", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "partOf": { "@id": "activity:partOf", "@type": "@id" }, "prev": { "@id": "activity:prev", "@type": "@id" }, "next": { "@id": "activity:next", "@type": "@id" }, "source": { "@id": "activity:source", "@type": "@id", "@context": { "content": { "@id": "activity:content", "@type": "http://www.w3.org/2001/XMLSchema#string" }, "mediaType": "activity:mediaType" } }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "title": "dct:title", "name": "foaf:name", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/statuses/{yyyy_mm}", "@type": ["OrderedCollectionPage", "Thread"], "items": [], "partOf": f"{PUBLIC_URL}/statuses" }
 206                         months[yyyy_mm]["items"].append(status)
 207
 208         # Set up the public directory.
 209         if exists(PUBLIC_DIRECTORY):
 210                 rmtree(PUBLIC_DIRECTORY)
 211         mkdir(PUBLIC_DIRECTORY)
 212
 213         # Copy H·T·M·L files to their expected locations.
 214         copy2(f"{BUILD_DIRECTORY}/index.html", f"{PUBLIC_DIRECTORY}/index.html")
 215         copy2(f"{BUILD_DIRECTORY}/about.html", f"{PUBLIC_DIRECTORY}/.about.html")
 216         copy2(f"{BUILD_DIRECTORY}/status.html", f"{PUBLIC_DIRECTORY}/.status.html")
 217         copy2(f"{BUILD_DIRECTORY}/statuses.html", f"{PUBLIC_DIRECTORY}/.statuses.html")
 218         copy2(f"{BUILD_DIRECTORY}/topic.html", f"{PUBLIC_DIRECTORY}/.topic.html")
 219         copy2(f"{BUILD_DIRECTORY}/topics.html", f"{PUBLIC_DIRECTORY}/.topics.html")
 220
 221         # Output “about” metadata
 222         if not exists(f"{PUBLIC_DIRECTORY}/about"):
 223                 mkdir(f"{PUBLIC_DIRECTORY}/about")
 224         with open(f"{PUBLIC_DIRECTORY}/about/index.jsonld", "w", encoding="utf-8") as f:
 225                 json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "Forum": "sioc:Forum", "Thread": "sioc:Thread", "Microblog": "sioct:Microblog", "streams": { "@id": "activity:streams", "@type": "@id" } }, "@id": f"{PUBLIC_URL}", "@type": "Microblog", "streams": [{ "@id": f"{PUBLIC_URL}/statuses", "@type": "Thread" }, { "@id": f"{PUBLIC_URL}/topics", "@type": "Forum" }] }, f, ensure_ascii=False, allow_nan=False)
 226
 227         # Output month‐based listings and the non‐topic index
 228         if not exists(f"{PUBLIC_DIRECTORY}/statuses"):
 229                 mkdir(f"{PUBLIC_DIRECTORY}/statuses")
 230         statuspairs = list(enumerate(months.items()))
 231         for (index, (yyyy_mm, ld)) in statuspairs:
 232                 if not exists(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}"):
 233                         mkdir(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}")
 234                 ld["first"] = f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}"
 235                 ld["current"] = f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}"
 236                 if index > 0:
 237                         ld["prev"] = f"{PUBLIC_URL}/statuses/{statuspairs[index - 1][1][0]}"
 238                 if index < len(statuspairs) - 1:
 239                         ld["next"] = f"{PUBLIC_URL}/statuses/{statuspairs[index + 1][1][0]}"
 240                 with open(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}/index.jsonld", "w", encoding="utf-8") as f:
 241                         json.dump(ld, f, ensure_ascii=False, allow_nan=False)
 242                 atomlink, atomxml = atomForLD(ld)
 243                 with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f:
 244                         f.write(atomxml)
 245         with open(f"{PUBLIC_DIRECTORY}/statuses/index.jsonld", "w", encoding="utf-8") as f:
 246                 json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "@id" }, "feed": { "@id": "sioc:feed", "@type": "@id" } }, "@id": f"{PUBLIC_URL}/statuses", "@type": ["OrderedCollection", "Thread"], "feed": f"{PUBLIC_URL}/statuses.atom", "first": f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}", "current": f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}", "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)
 247
 248         # Output topic‐based listings and the topic index
 249         if not exists(f"{PUBLIC_DIRECTORY}/topics"):
 250                 mkdir(f"{PUBLIC_DIRECTORY}/topics")
 251         for (topic, ld) in topics.items():
 252                 if not exists(f"{PUBLIC_DIRECTORY}/topics/{topic}"):
 253                         mkdir(f"{PUBLIC_DIRECTORY}/topics/{topic}")
 254                 with open(f"{PUBLIC_DIRECTORY}/topics/{topic}/index.jsonld", "w", encoding="utf-8") as f:
 255                         json.dump(ld, f, ensure_ascii=False, allow_nan=False)
 256                 atomlink, atomxml = atomForLD(ld)
 257                 with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f:
 258                         f.write(atomxml)
 259         with open(f"{PUBLIC_DIRECTORY}/topics/index.jsonld", "w", encoding="utf-8") as f:
 260                 json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "sioc": "http://rdfs.org/sioc/ns#", "Collection": "activity:Collection", "Forum": "sioc:Forum", "items": { "@id": "activity:items", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "@id" }, "subject": "dct:subject" }, "@id": f"{PUBLIC_URL}/topics", "@type": ["Collection", "Forum"], "items": list(map(lambda a: { "@id": a["@id"], "subject": a["subject"] }, topics.values())), "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)
 261
 262         # Output the I·R·I redirection page
 263         with open(f"{PUBLIC_DIRECTORY}/.lookup.xhtml", "w", encoding="utf-8") as f:
 264                 doc = getDOMImplementation().createDocument(None, "xml", None)
 265                 htmlElt = doc.documentElement
 266                 htmlElt.setAttribute("xmlns", XHTML_NAMESPACE)
 267                 htmlElt.setAttribute("lang", LANG)
 268                 headElt = htmlElt.appendChild(doc.createElement("head"))
 269                 titleElt = headElt.appendChild(doc.createElement("title"))
 270                 titleElt.appendChild(doc.createTextNode("Redirecting…"))
 271                 scriptElt = headElt.appendChild(doc.createElement("script"))
 272                 scriptElt.setAttribute("type", "text/javascript")
 273                 scriptElt.appendChild(doc.createTextNode(f"location={json.dumps(irimap)}[location.pathname.substring(1)]??`/`"))
 274                 bodyElt = htmlElt.appendChild(doc.createElement("body"))
 275                 bodyElt.appendChild(doc.createTextNode("Attempting to redirect to the proper page… (Requires Javascript.)"))
 276                 f.write(doc.toxml())
 277
 278         # Remove the build directory.
 279         rmtree(BUILD_DIRECTORY)