#!/usr/bin/env python3
from datetime import datetime as dt, timezone
from glob import iglob
from itertools import starmap
import json
from os import mkdir
from os.path import exists
from pathlib import Path
import re
from shutil import copy2, rmtree
from subprocess import run
from sys import stdin
from warnings import warn
from xml.dom import XHTML_NAMESPACE
from xml.dom.minidom import getDOMImplementation, parseString

GIT_DIRECTORY = "/home/USERNAME/Status.git"
BUILD_DIRECTORY = "/home/USERNAME/status.site.example/.build"
PUBLIC_DIRECTORY = "/home/USERNAME/status.site.example/public"
PUBLIC_URL = "https://status.site.example"
LANG = "en"
LIVE_BRANCH = "live"

UTC = timezone.utc
CURRENT_DATETIME = f"{dt.now(UTC).replace(tzinfo=None).isoformat(timespec='seconds')}Z"
ATOM_NAMESPACE = "http://www.w3.org/2005/Atom"

if stdin.read().split()[-1] == f"refs/heads/{LIVE_BRANCH}":

	print(f"This is an update to the '{LIVE_BRANCH}' branch; regenerating site…")

	# Set up the build directory.
	if exists(BUILD_DIRECTORY):
		rmtree(BUILD_DIRECTORY)
	cloneresult = run(["git", "clone", "--local", "--branch", LIVE_BRANCH, GIT_DIRECTORY, BUILD_DIRECTORY], capture_output=True, encoding="utf-8")
	# if cloneresult.stderr:
		# print(cloneresult.stderr)
	cloneresult.check_returncode()

	# Set up various containers.
	irimap = {}
	months = {}
	topics = {}

	# Create an XML representation of the provided status text.
	def statusxml (text, version="1.0"):
		doc = getDOMImplementation().createDocument(None, "article", None)
		articleElt = doc.documentElement
		articleElt.setAttribute("xmlns", XHTML_NAMESPACE)
		articleElt.setAttribute("lang", LANG)
		for para in text.split("\n\n"):
			paraElt = articleElt.appendChild(doc.createElement("p"))
			for component in re.findall(r'<[a-z]+:[^\s]*>(?:="[^\n"]+")?|\n|[^<\n]+|<(?![a-z]+:[^\s]*>)', para):
				if component == "\n":
					paraElt.appendChild(doc.createElement("br"))
				elif re.fullmatch(r'<[a-z]+:[^\s]*>(?:="[^\n"]+")?', component):
					href = component.split(">", maxsplit=1)[0][1:]
					anchorElt = paraElt.appendChild(doc.createElement("a"))
					anchorElt.setAttribute("href", href)
					anchorElt.setAttribute("rel", "noreferrer")
					anchorElt.appendChild(doc.createTextNode(component if len(href) == len(component) - 2 else component[len(href)+4:-1]))
				else:
					paraElt.appendChild(doc.createTextNode(component))
		return articleElt.toxml()

	# Map status paths to status objects, or None if there is an error.
	#
	# The provided path must be to a `text` object.
	def statusmap (topic, path):
		status = { "@type": "MicroblogPost" }
		version_path = next(path.parent.glob("0=*"), None)
		if version_path and version_path.name != "0=x_status_git_1.0":
			warn(f"Unrecognized version for {path}; skipping.")
			return None
		if topic:
			status["subject"] = topic
		author_path = next(path.parent.glob("1=*"), None)
		if author_path:
			status["author"] = { "name": author_path.name[2:] }
			with author_path.open("r", encoding="utf-8") as text:
				status["author"]["@id"] = text.read().strip()
		title_path = next(path.parent.glob("2=*"), None)
		if title_path:
			with title_path.open("r", encoding="utf-8") as text:
				title = text.read().strip()
				status["title"] = title
		date_path = next(path.parent.glob("3=*"), None)
		datetime = ""
		if date_path:
			with date_path.open("r", encoding="utf-8") as text:
				datetime = text.read().strip()
				status["created"] = datetime
		else:
			warn(f"Missing date for {path}; skipping.")
			return None
		identifier_path = next(path.parent.glob("4=*"), None)
		identifier = ""
		if identifier_path:
			identifier = identifier_path.name[2:]
			status["@id"] = f"{PUBLIC_URL}/topics/{topic}/{identifier}" if topic else f"{PUBLIC_URL}/statuses/{datetime[0:7]}/{identifier}"
			with identifier_path.open("r", encoding="utf-8") as text:
				status["identifier"] = text.read().strip()
			irimap[status["identifier"]] = status["@id"]
		else:
			warn(f"Missing identifier for {path}; skipping.")
			return None
		with path.open("r", encoding="utf-8") as text:
			status["content"] = statusxml(text.read().strip())
		return (datetime, identifier, status)

	def atomForLD (ld):
		doc = getDOMImplementation().createDocument(None, "feed", None)
		atomElt = doc.documentElement
		atomElt.setAttribute("xmlns", ATOM_NAMESPACE)
		atomElt.setAttribute("xml:lang", LANG)
		subject = ld["subject"] if "subject" in ld else "Statuses"
		titleElt = atomElt.appendChild(doc.createElement("title"))
		titleElt.appendChild(doc.createTextNode(f"{subject} @ {PUBLIC_URL}"))
		updatedElt = atomElt.appendChild(doc.createElement("updated"))
		updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME))
		generatorElt = atomElt.appendChild(doc.createElement("generator"))
		generatorElt.appendChild(doc.createTextNode("x_status_git"))
		generatorElt.setAttribute("uri", "https://git.ladys.computer/x_status_git")
		atomLinks = {}
		if "OrderedCollectionPage" in ld["@type"]:
			idElt = atomElt.appendChild(doc.createElement("id"))
			idElt.appendChild(doc.createTextNode(f"{PUBLIC_URL}/statuses"))
			atomLinks["alternate"] = f"{PUBLIC_URL}/statuses"
			atomLinks["current"] = f"{PUBLIC_URL}/statuses.atom"
			atomLinks["self"] = atomLinks["current"] if ld["@id"] == ld["current"] else f"{ld['@id']}.atom"
			if "prev" in ld:
				atomLinks["prev-archive"] = f"{ld['prev']}.atom"
			if "next" in ld and ld["next"] != ld["current"]:
				atomLinks["next-archive"] = f"{ld['next']}.atom"
		else:
			idElt = atomElt.appendChild(doc.createElement("id"))
			idElt.appendChild(doc.createTextNode(ld["@id"]))
			atomLinks["alternate"] = ld["@id"]
			atomLinks["self"] = f"{ld['@id']}.atom"
		for (rel, href) in atomLinks.items():
			linkElt = atomElt.appendChild(doc.createElement("link"))
			linkElt.setAttribute("rel", rel)
			linkElt.setAttribute("href", href)
		for item in ld["items"]:
			entryElt = atomElt.appendChild(doc.createElement("entry"))
			title = item["title"] if "title" in item else item["content"][0:27] + "…"
			titleElt = entryElt.appendChild(doc.createElement("title"))
			titleElt.appendChild(doc.createTextNode(title))
			idElt = entryElt.appendChild(doc.createElement("id"))
			idElt.appendChild(doc.createTextNode(item["@id"]))
			updatedElt = entryElt.appendChild(doc.createElement("updated"))
			updatedElt.appendChild(doc.createTextNode(CURRENT_DATETIME))
			if "created" in item:
				publishedElt = entryElt.appendChild(doc.createElement("published"))
				publishedElt.appendChild(doc.createTextNode(item["created"]))
			authorElt = entryElt.appendChild(doc.createElement("author"))
			if "author" in item:
				nameElt = authorElt.appendChild(doc.createElement("name"))
				nameElt.appendChild(doc.createTextNode(item["author"]["name"]))
				uriElt = authorElt.appendChild(doc.createElement("uri"))
				uriElt.appendChild(doc.createTextNode(item["author"]["@id"]))
			else:
				nameElt = authorElt.appendChild(doc.createElement("name"))
				nameElt.appendChild(doc.createTextNode("Anonymous"))
			contentElt = entryElt.appendChild(doc.createElement("content"))
			contentElt.setAttribute("type", "xhtml")
			contentDiv = contentElt.appendChild(doc.createElement("div"))
			contentDiv.setAttribute("xmlns", XHTML_NAMESPACE)
			contentDiv.setAttribute("lang", LANG)
			for child in list(parseString(item["content"]).documentElement.childNodes):
				contentDiv.appendChild(child)
		return (atomLinks["self"], atomElt.toxml())

	# Get status paths.
	status_paths = []
	for yearpath in Path(f"{BUILD_DIRECTORY}/").glob("[0-9][0-9][0-9][0-9]"):
		for monthpath in yearpath.glob("[0-9][0-9]"):
			for daypath in monthpath.glob("[0-9][0-9]"):
				for statuspath in daypath.glob("*/text"):
					status_paths.append((None, statuspath))
	for topicpath in Path(f"{BUILD_DIRECTORY}/").glob("topic/*"):
		for hash0path in topicpath.glob("[0-9a-f]"):
			for hash1path in hash0path.glob("[0-9a-f]"):
				for hash2path in hash1path.glob("[0-9a-f]"):
					for hash3path in hash2path.glob("[0-9a-f]"):
						for statuspath in hash3path.glob("*/text"):
							status_paths.append((topicpath.name, statuspath))

	# Build status objects and listings.
	for (datetime, identifier, status) in sorted(filter(None, starmap(statusmap, status_paths))):
		if "subject" in status:
			topic = status["subject"]
			if topic not in topics:
				topics[topic] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "subject": "dct:subject", "name": "foaf:name", "title": "dct:title", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/topics/{topic}", "@type": ["OrderedCollection", "Thread"], "items": [], "subject": topic }
			topics[topic]["items"].append(status)
		else:
			yyyy_mm = datetime[0:7]
			if yyyy_mm not in months:
				months[yyyy_mm] = { "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "foaf": "http://xmlns.com/foaf/0.1/", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "OrderedCollectionPage": "activity:OrderedCollectionPage", "Thread": "sioc:Thread", "MicroblogPost": "sioct:MicroblogPost", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "items": { "@id": "activity:items", "@type": "@id", "@container": "@list" }, "partOf": { "@id": "activity:partOf", "@type": "@id" }, "prev": { "@id": "activity:prev", "@type": "@id" }, "next": { "@id": "activity:next", "@type": "@id" }, "created": { "@id": "dct:created", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" }, "creator": { "@id": "dct:creator", "@type": "@id" }, "identifier": { "@id":  "dct:identifier", "@type": "http://www.w3.org/2001/XMLSchema#anyURI" }, "name": "foaf:name", "title": "dct:title", "content": { "@id": "sioc:content", "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" } }, "@id": f"{PUBLIC_URL}/statuses/{yyyy_mm}", "@type": ["OrderedCollectionPage", "Thread"], "items": [], "partOf": f"{PUBLIC_URL}/statuses" }
			months[yyyy_mm]["items"].append(status)

	# Set up the public directory.
	if exists(PUBLIC_DIRECTORY):
		rmtree(PUBLIC_DIRECTORY)
	mkdir(PUBLIC_DIRECTORY)

	# Copy H·T·M·L files to their expected locations.
	copy2(f"{BUILD_DIRECTORY}/index.html", f"{PUBLIC_DIRECTORY}/index.html")
	copy2(f"{BUILD_DIRECTORY}/about.html", f"{PUBLIC_DIRECTORY}/.about.html")
	copy2(f"{BUILD_DIRECTORY}/status.html", f"{PUBLIC_DIRECTORY}/.status.html")
	copy2(f"{BUILD_DIRECTORY}/statuses.html", f"{PUBLIC_DIRECTORY}/.statuses.html")
	copy2(f"{BUILD_DIRECTORY}/topic.html", f"{PUBLIC_DIRECTORY}/.topic.html")
	copy2(f"{BUILD_DIRECTORY}/topics.html", f"{PUBLIC_DIRECTORY}/.topics.html")

	# Output “about” metadata
	if not exists(f"{PUBLIC_DIRECTORY}/about"):
		mkdir(f"{PUBLIC_DIRECTORY}/about")
	with open(f"{PUBLIC_DIRECTORY}/about/index.jsonld", "w", encoding="utf-8") as f:
		json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "sioct": "http://rdfs.org/sioc/types#", "Forum": "sioc:Forum", "Thread": "sioc:Thread", "Microblog": "sioct:Microblog", "streams": { "@id": "activity:streams", "@type": "@id" } }, "@id": f"{PUBLIC_URL}", "@type": "Microblog", "streams": [{ "@id": f"{PUBLIC_URL}/statuses", "@type": "Thread" }, { "@id": f"{PUBLIC_URL}/topics", "@type": "Forum" }] }, f, ensure_ascii=False, allow_nan=False)

	# Output month‐based listings and the non‐topic index
	if not exists(f"{PUBLIC_DIRECTORY}/statuses"):
		mkdir(f"{PUBLIC_DIRECTORY}/statuses")
	statuspairs = list(enumerate(months.items()))
	for (index, (yyyy_mm, ld)) in statuspairs:
		if not exists(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}"):
			mkdir(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}")
		ld["first"] = f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}"
		ld["current"] = f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}"
		if index > 0:
			ld["prev"] = f"{PUBLIC_URL}/statuses/{statuspairs[index - 1][1][0]}"
		if index < len(statuspairs) - 1:
			ld["next"] = f"{PUBLIC_URL}/statuses/{statuspairs[index + 1][1][0]}"
		with open(f"{PUBLIC_DIRECTORY}/statuses/{yyyy_mm}/index.jsonld", "w", encoding="utf-8") as f:
			json.dump(ld, f, ensure_ascii=False, allow_nan=False)
		atomlink, atomxml = atomForLD(ld)
		with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f:
			f.write(atomxml)
	with open(f"{PUBLIC_DIRECTORY}/statuses/index.jsonld", "w", encoding="utf-8") as f:
		json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "sioc": "http://rdfs.org/sioc/ns#", "OrderedCollection": "activity:OrderedCollection", "Thread": "sioc:Thread", "current": { "@id": "activity:current", "@type": "@id" }, "first": { "@id": "activity:first", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "id" } }, "@id": f"{PUBLIC_URL}/statuses", "@type": ["OrderedCollection", "Thread"], "first": f"{PUBLIC_URL}/statuses/{statuspairs[0][1][0]}", "current": f"{PUBLIC_URL}/statuses/{statuspairs[-1][1][0]}", "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)

	# Output topic‐based listings and the topic index
	if not exists(f"{PUBLIC_DIRECTORY}/topics"):
		mkdir(f"{PUBLIC_DIRECTORY}/topics")
	for (topic, ld) in topics.items():
		if not exists(f"{PUBLIC_DIRECTORY}/topics/{topic}"):
			mkdir(f"{PUBLIC_DIRECTORY}/topics/{topic}")
		with open(f"{PUBLIC_DIRECTORY}/topics/{topic}/index.jsonld", "w", encoding="utf-8") as f:
			json.dump(ld, f, ensure_ascii=False, allow_nan=False)
		atomlink, atomxml = atomForLD(ld)
		with open(f"{PUBLIC_DIRECTORY}/{atomlink[len(PUBLIC_URL):-5]}/index.atom", "w", encoding="utf-8") as f:
			f.write(atomxml)
	with open(f"{PUBLIC_DIRECTORY}/topics/index.jsonld", "w", encoding="utf-8") as f:
		json.dump({ "@context": { "@language": LANG, "activity": "https://www.w3.org/ns/activitystreams#", "dct": "http://purl.org/dc/terms/", "sioc": "http://rdfs.org/sioc/ns#", "Collection": "activity:Collection", "Forum": "sioc:Forum", "items": { "@id": "activity:items", "@type": "@id" }, "has_parent": { "@id": "sioc:has_parent", "@type": "id" }, "subject": "dct:subject" }, "@id": f"{PUBLIC_URL}/topics", "@type": ["Collection", "Forum"], "items": list(map(lambda a: { "@id": a["@id"], "subject": a["subject"] }, topics.values())), "has_parent": f"{PUBLIC_URL}" }, f, ensure_ascii=False, allow_nan=False)

	# Output the I·R·I redirection page
	with open(f"{PUBLIC_DIRECTORY}/.lookup.xhtml", "w", encoding="utf-8") as f:
		doc = getDOMImplementation().createDocument(None, "xml", None)
		htmlElt = doc.documentElement
		htmlElt.setAttribute("xmlns", XHTML_NAMESPACE)
		htmlElt.setAttribute("lang", LANG)
		headElt = htmlElt.appendChild(doc.createElement("head"))
		titleElt = headElt.appendChild(doc.createElement("title"))
		titleElt.appendChild(doc.createTextNode("Redirecting…"))
		scriptElt = headElt.appendChild(doc.createElement("script"))
		scriptElt.setAttribute("type", "text/javascript")
		scriptElt.appendChild(doc.createTextNode(f"location={json.dumps(irimap)}[location.pathname.substring(1)]??`/`"))
		bodyElt = htmlElt.appendChild(doc.createElement("body"))
		bodyElt.appendChild(doc.createTextNode("Attempting to redirect to the proper page… (Requires Javascript.)"))
		f.write(doc.toxml())

	# Remove the build directory.
	rmtree(BUILD_DIRECTORY)