From: Lady Date: Sun, 10 Aug 2025 21:33:12 +0000 (-0400) Subject: Support parsed metadata X-Git-Url: https://git.ladys.computer/Shushe/commitdiff_plain/HEAD?ds=sidebyside Support parsed metadata It is very common to want to have some basic metadata for files with·out needing to include them in their entirety and then parse out the metadata during the transform phase. This can be done, to an extent, using a multi·stage build, but it would be a lot easier to simply allow basic metadata to be parsed out of documents at the same time as they are being parsed otherwise. This commit implements this functionality. The approach taken essentially changes the result of parsing to look like :⁠— ```xml <书社:parsed> <书社:result> <书社:metadata> ``` This requires adding an additional step to extract out the contents of `/书社:parsed/书社:result`, which is very fast conceptually but yet another file read∕write. Fortunately, as with parsed results in general, this file should be infrequenly updated. The parsed metadata is applied to `$书社:about//*[@rdf:about=$IDENTIFIER]/nie:interpretedAs/*`; i·e it is metadata on the _interpretation_ of the file; this aligns with it being “parsed out” of the file by the parser. Many of the best use·cases for this feature depend on being able to declare “soft dependencies” (depending on a file with·out embedding it), so that files can read in metadata from else·where with·out costly reads and embeds, and files can depend on each others metadata in a circular manner. “Soft dependency” support is planned, but not yet implemented. Right now, assets do not have parsed metadata. How·ever, it would be nice if parsed metadata support could be added for assets which have X·M·P metadata, which matches this format. That would have to be an optional feature if `exiftool` (or similar) is available, and is left as future work. --- diff --git a/.metadata-format-changed-since b/.metadata-format-changed-since index 6cf4047..b6faef5 100644 --- a/.metadata-format-changed-since +++ b/.metadata-format-changed-since @@ -1,10 +1,10 @@ -SPDX-FileCopyrightText: 2024 Lady +SPDX-FileCopyrightText: 2024, 2025 Lady SPDX-License-Identifier: CC0-1.0 The following hash indicates a commit in which the metadata format generated by ⛩📰 书社 was different than it currently is :⁠— -c84c2b38caf34807fd1c52a8f19fcf0af7e9807e +338b26f8c92351bad03a180ad4b4f88e4cfeab76 The purpose of this file is to serve as a trackable dependency which will prompt a rebuild of metadata when the generation mechanism diff --git a/GNUmakefile b/GNUmakefile index 80e1c4c..8f62e46 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -83,7 +83,7 @@ override define makefileinfo ║╰────────────────────────────────────────────────────────────╯║ ╟┬ ¶ Copyright & License ─────────────────────────────────────┬╢ ║│ │║ -║│ Copyright © 2023–2024 Lady [@ Ladys Computer]. │║ +║│ Copyright © 2023–2025 Lady [@ Ladys Computer]. │║ ║│ │║ ║│ This Source Code Form is subject to the terms of the │║ ║│ Mozilla Public License, v 2.0. If a copy of the M·P·L was │║ @@ -488,7 +488,7 @@ override includepath = $(or $(firstword $(foreach directory,$(INCLUDEDIR),$(if $ # (callable) Get base64 data u·r·i’s for the given files. # # ☡ This variable creates a subshell every time it is computed. -override datauri = $(foreach file,$1,data:$(call typeoffile,$(file));base64,$(shell $(UUENCODE) -m $(call quote,$(file)) _ | $(SED) '2,$$!d;$$d' | $(TR) -d ' \n')) +override datauri = $(foreach file,$1,data:$(call typeoffile,$(file));base64,$(shell $(UUENCODE) -m $(call quote,$(abspath $(file))) _ | $(SED) '2,$$!d;$$d' | $(TR) -d ' \n')) # Pair each source file and include with its local u·r·i. override sourcelocalpair := $(foreach file,$(sourcefiles) $(sourceincludes),$(file)|about:shushe?$(if $(filter $(file),$(sourceincludes)),include=$(call pathenc,$(call includepath,$(file))),source=$(call pathenc,$(call sourcepath,$(file))))) @@ -508,23 +508,32 @@ override typeupdates := $(and $(wildcard $(BUILDDIR)/.update-types),FORCE) # Pair each source file and include with its metadata location. override sourcemetadatapair := $(foreach file,$(sourcefiles) $(sourceincludes),$(file)|$(BUILDDIR)/$(if $(filter $(file),$(sourceincludes)),includes.metadata/$(call includepath,$(file)),sources.metadata/$(call sourcepath,$(file)))) -# (callable) Get the location of the transformed X·M·L files for the given source files. +# (callable) Get the location of the metadata files for the given source files. override metadata = $(foreach file,$1,$(patsubst $(file)|%,%,$(filter $(file)|%,$(sourcemetadatapair)))) -# (callable) Get the source files for the given parsed file. +# (callable) Get the source files for the given metadata files. override datadata = $(foreach file,$1,$(patsubst %|$(file),%,$(filter %|$(file),$(sourcemetadatapair)))) # Pair each source file and include with its parsed location. -override sourceparsedpair := $(foreach file,$(sourcefiles) $(sourceincludes),$(file)|$(BUILDDIR)/$(if $(filter $(file),$(sourceincludes)),includes/$(call includepath,$(file)),sources/$(call sourcepath,$(file)))) +override sourceparsedpair := $(foreach file,$(sourcefiles) $(sourceincludes),$(file)|$(BUILDDIR)/$(if $(filter $(file),$(sourceincludes)),includes.parsed/$(call includepath,$(file)),sources.parsed/$(call sourcepath,$(file)))) # (callable) Get the location of the transformed X·M·L files for the given source files. override parsed = $(foreach file,$1,$(patsubst $(file)|%,%,$(filter $(file)|%,$(sourceparsedpair)))) -# (callable) Get the source files for the given parsed file. +# (callable) Get the source files for the given parsed files. override unparsed = $(foreach file,$1,$(patsubst %|$(file),%,$(filter %|$(file),$(sourceparsedpair)))) -# Pair each build directory, transform, source file, or parsed file with its file u·r·i. -override fileuripairs := $(join $(patsubst %,%|,$(BUILDDIR) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes))),$(call pathenc,$(foreach uriable,$(BUILDDIR) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes)),file://$(abspath $(uriable))))) +# Pair each source file and include with its parsed result location. +override parsepair := $(foreach file,$(sourcefiles) $(sourceincludes),$(file)|$(BUILDDIR)/$(if $(filter $(file),$(sourceincludes)),includes/$(call includepath,$(file)),sources/$(call sourcepath,$(file)))) + +# (callable) Get the location of the parsed results for the given source files. +override parseresult = $(foreach file,$1,$(patsubst $(file)|%,%,$(filter $(file)|%,$(parsepair)))) + +# (callable) Get the source files for the given parsed results. +override parsesource = $(foreach file,$1,$(patsubst %|$(file),%,$(filter %|$(file),$(parsepair)))) + +# Pair each build directory, transform, source file, or parsed file, parse result file with its file u·r·i. +override fileuripairs := $(join $(patsubst %,%|,$(BUILDDIR) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes)) $(call parseresult,$(sourcefiles) $(sourceincludes))),$(call pathenc,$(foreach uriable,$(BUILDDIR) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes)) $(call parseresult,$(sourcefiles) $(sourceincludes)),file://$(abspath $(uriable))))) # (callable) Get the file u·r·is for the given transforms, source file or parsed files. override fileuri = $(foreach file,$1,$(or $(patsubst $(file)|%,%,$(filter $(file)|%,$(fileuripairs))),$(error Unable to get file u·r·i for `$(file)´))) @@ -596,14 +605,14 @@ override installed = $(foreach file,$1,$(DESTDIR)/$(call destination,$(file))) # ─ ¶ Recipe Variable Definitions ───────────────────────────────────── # (callable) Sanitize and wrap the provided plaintext file in X·M·L, printing to `stdout´. -override wrapplaintext = { $(PRINTF) '%s\n%s' '' ''; } +override wrapplaintext = { $(PRINTF) '%s\n%s' '' ''; } # (callable) Check if the provided X·M·L file is X·M·L 1.1, and if so, coerce to X·M·L 1.0 as best as possible, printing the result (or the original file contents) to `stdout´. # # The X·M·L declaration will be dropped and character escapes for C0 control codes will be replaced with a literal `U+0091 PRIVATE USE ONE´, which is invalid in X·M·L 1.1, but valid X·M·L 1.0 (making the replacement obvious). # # This isn’t a perfect substitution (it makes some assumptions about the format of the underlying X·M·L), but it should be workable for most sensible, welformed files. -override serializexml = $(SED) "$$($(PRINTF) '%b' '/]*?>//\n s/&\0043x0*[12345678BCEFbcef];/\0302\0221/g\n s/&\0043x0*1[0123456789ABCDEFabcdef];/\0302\0221/g\n s/&\00430*[12345678];/\0302\0221/g\n s/&\00430*1[12456789];/\0302\0221/g\n s/&\00430*2[0123456789];/\0302\0221/g\n s/&\00430*3[01];/\0302\0221/g\n}')" <$(call quote,$1) | $(SED) "$$($(PRINTF) '%b' ':a\n/^\\n*$$/{ $$d\n N\n ba\n}')" +override serializexml = $(SED) "$$($(PRINTF) '%b' '/]*?>//\n s/&\0043x0*[12345678BCEFbcef];/\0302\0221/g\n s/&\0043x0*1[0123456789ABCDEFabcdef];/\0302\0221/g\n s/&\00430*[12345678];/\0302\0221/g\n s/&\00430*1[12456789];/\0302\0221/g\n s/&\00430*2[0123456789];/\0302\0221/g\n s/&\00430*3[01];/\0302\0221/g\n}')" <$(call quote,$(abspath $1)) | $(SED) "$$($(PRINTF) '%b' ':a\n/^\\n*$$/{ $$d\n N\n ba\n}')" # ─ ¶ Phony Targets ─────────────────────────────────────────────────── @@ -656,12 +665,6 @@ $(THISDIR)/GNUmakefile : $(BUILDDIR)/transform.xslt # ─ ¶ Build Targets ─────────────────────────────────────────────────── -# Generate R·D·F metadata for files. -$(call metadata,$(sourcefiles) $(sourceincludes)) : % : $$(call datadata,$$@) $(THISDIR)/.metadata-format-changed-since $(typeupdates) - $(call inform,$(PRINTF) '%s\n' $(call quote,Generating metadata for `$<´…) >&2) - $(silent)$(call ensuredirectory,$(dir $@)) - $(silent){ if $(TEST) ! -f $(call quote,$(BUILDDIR)/.mtime); then $(PRINTF) '%b' '\n' >|$(call quote,$(BUILDDIR)/.mtime); fi; $(TOUCH) -r $(call quote,$<) $(call quote,$(BUILDDIR)/.mtime); $(DIFF) -u $(call quote,$(BUILDDIR)/.mtime) /dev/null | $(SED) '1!d;s/.* \([^ ]*\) \([^ ]*\).*$$/\1T\2Z/'; $(CKSUM) $(call quote,$<) | $(SED) 's/[ ].*//'; } | $(xargsmultiquote) | $(XARGS) -E '' $(PRINTF) '<书社vocab:$(if $(filter $<,$(sourceincludes)),IncludeFile,SourceFile) xmlns:nie="http://www.semanticdesktop.org/ontologies/2007/01/19/nie#" xmlns:nfo="http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:书社vocab="urn:fdc:ladys.computer:20231231:Shu1She4:vocab:" rdf:about="%s" 书社vocab:path="%s" nfo:fileUrl="%s">$(if $(filter $<,$(assetfiles)),,)<书社vocab:hasParsedFile nfo:fileUrl="%s"/>%s' $(call quote,$(call attresc,$(call localuri,$<))) $(call quote,$(call attresc,$(if $(filter $<,$(sourceincludes)),$(call includepath,$<),$(call sourcepath,$<)))) $(call quote,$(call attresc,$(call fileuri,$<))) $(call quote,$(call attresc,$(call typeoffile,$<))) $(call quote,$(call attresc,$(call fileuri,$(call parsed,$<)))) >|$(call quote,$@) - # Parse the files. # # Even plain X·M·L files are parsed, because they may contain X·H·T·M·L `