From: Lady Date: Sun, 27 Oct 2024 19:28:09 +0000 (-0400) Subject: Move parse/magic building into different stage X-Git-Tag: 0.13.2~1 X-Git-Url: https://git.ladys.computer/Shushe/commitdiff_plain/af35f96d77cd8ae1b6fb5332509ba115f07fefb5?ds=inline;hp=91f3a166f07e5f260310799a1e1687f45f3c9870 Move parse/magic building into different stage There is a lot of calculation which goes into every “default” run (every source file must be found and characterized every time), which results in a lot of file·system reads. This is unnecessary, and needlessly slow for large numbers of files, on the first run (building the magic and parsers). This commit moves parser‐building into a new “initial” mode which then calls into a submake for the actual build, negating the need for the first restart, simplyfying various aspects of the code, and generally making things quite a bit more efficient. The existing “_2stage” mode is combined with the new “initial” mode, as their functionalities largely overlap. --- diff --git a/GNUmakefile b/GNUmakefile index c601097..3161ecd 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -259,9 +259,9 @@ endif # • ‹ urn:fdc:ladys.computer:20231231:Shu1She4:mode:archive ›: # Generates archive files from parse results. # -# • ‹ urn:fdc:ladys.computer:20231231:Shu1She4:mode:_2stage ›: -# Two‐stage build; runs ⛩📰 书社 twice. -MODE := urn:fdc:ladys.computer:20231231:Shu1She4:mode:$(if $(DATADIR),$(shell if $(TEST) -d $(call quote,$(DATADIR)); then $(PRINTF) '%s\n' '_2stage'; else $(PRINTF) '%s\n' 'default'; fi),default) +# • ‹ urn:fdc:ladys.computer:20231231:Shu1She4:mode:initial ›: +# The initial mode that this make·file starts in; this builds the magic file and parsers and then recursively calls into the default mode to build the files. +MODE := urn:fdc:ladys.computer:20231231:Shu1She4:mode:initial # Set to a non·empty value to silence informative messages. QUIET := @@ -311,6 +311,51 @@ override not = $(if $1,,1) # (callable) Quote the given string for use within shell calls. override quote = '$(subst ','"'"',$1)' +# The command to use for percent‐decoding. +override perdeccmd := $(SED) 's/|/%7C/g;s/[\]/%5C/g;s/%[0123456789ABCDEFabcdef]\{2\}/|&|/g' | $(TR) '|' '\n' | $(SED) '/^%[0123456789ABCDEFabcdef]\{2\}$$/!s/%/|%25|/' | $(TR) '|' '\n' | $(AWK) '$$0!~/%/{printf "%s",$$0}/%/{d="0123456789ABCDEF";v=substr(toupper($$0),2,2);printf "\\%04o",(index(d,substr(v,1,1))-1)*16+index(d,substr(v,2,1))-1}' | $(SED) $(call quote,s/'/'"'"'/g;s/^/'/;s/$$/'/;$$!s/$$/\\/) | $(XARGS) -E '' $(PRINTF) '%b' + +# (callable) Percent‐decode the given strings. +# +# ☡ This variable creates a subshell every time it is computed. +override perdec = $(shell $(PRINTF) '%s\0450A' $(foreach encoded,$1,$(call quote,$(encoded))) | $(perdeccmd)) + +# (callable) Percent‐encode the given strings. +# +# This singly‐encodes u·r·i characters and doubly‐encodes other characters, then calls `perdec´ to decode back to a single encoding. +# The encoding assumes the input is a “u·r·i component”; e·g that the resulting string should only contain `pchar´ (but can contain any `sub-delim´). +# +# It is assumed that the given strings do not contain newlines. +# +# ☡ This variable creates a subshell every time it is computed. +override perenc = $(shell $(PRINTF) '%s\n' $(foreach unencoded,$1,$(call quote,$(unencoded))) | $(OD) -t x1 | $(SED) 's/^[0123456789]*//;s/0[Aa]/%&/g;s/2[146789ABCDEabcde]/%&/g;s/3[0123456789AaBbDd]/%&/g;s/40/%&/g;s/[46][123456789ABCDEFabcdef]/%&/g;s/[57][0123456789Aa]/%&/g;s/5[Ff]/%&/g;s/7[Ee]/%&/g;s/[ ][0123456789ABCDEFabcdef]\{2\}/%25&/g' | $(TR) -d ' \n' | $(TR) 'abcdef' 'ABCDEF' | $(perdeccmd)) + +# (callable) Percent‐encode each component in the given paths. +# +# ☡ This variable creates a subshell every time it is computed. +override pathenc = $(subst %2F,/,$(call perenc,$1)) + +# (callable) Create a unique name for the given file, based on its absolute path. +# +# ☡ This variable creates a subshell every time it is computed. +namehash = $(shell $(PRINTF) '%s' $(call quote,$(abspath $1)) | $(CKSUM) | $(SED) 's/ .*//' | $(XARGS) -E '' $(PRINTF) '%X')-$(notdir $1) + +# The location of this make·file. +override makefile := $(abspath $(THISDIR)/GNUmakefile) + +# Non·empty if `help´ or `clean´, and no targets other than `help´ or `clean´, were specified as goals on the commandline. +override notbuilding := $(and $(filter help clean,$(MAKECMDGOALS)),$(call not,$(filter-out help clean,$(MAKECMDGOALS)))) + +# (callable) Tests to see if the prerequisites provided by the second argument matches the value in the file corresponding to the first argument in `$(BUILDDIR)/lastprereqs´. +# If not, saves the new value. +# Returns the values plus the file in `$(BUILDDIR)/lastprereqs´, which will always be newer than the target if there was a change. +# +# Calling this variable is useful when a given target should be updated whenever its list of prerequisites changes in addition to whenever there is a change to one of its prerequisites. +# +# If `$(notbuilding)´ is non·empty, this variable produces no result to avoid unnecessary work. +# +# ☡ This variable creates at least one subshell every time it is computed. +override diffprereqs = $(if $(notbuilding),,$(and $(subst $(shell $(CAT) $(call quote,$(BUILDDIR)/lastprereqs/$1) 2>>/dev/null || :),,$2),$(shell $(call ensuredirectory,$(BUILDDIR)/lastprereqs) && $(PRINTF) '%s\n' $(call quote,$2) >|$(BUILDDIR)/lastprereqs/$1),)$2 $(BUILDDIR)/lastprereqs/$1) + # ─ ¶ Recipe Variable Definitions ───────────────────────────────────── # Outputs an `@´ to silence rules, unless `VERBOSE´ is non·empty. @@ -325,19 +370,22 @@ override ensuredirectory = if $(TEST) ! -d $(call quote,$1); then $(MKDIR) -p $( # Quote standard input in such a way that piping it to xargs will result in it being processed as a single argument. # # If standard input ends in a newline, it is stripped; all other newlines are preserved. -override xargsquote = $(SED) $(call quote,s/'/'"'"'/g;s/^/'/;s/$$/'/;$$!s/$$/\\/) +override xargsquote := $(SED) $(call quote,s/'/'"'"'/g;s/^/'/;s/$$/'/;$$!s/$$/\\/) # Quote standard input in such a way that piping it to xargs will result in each line being processed as a single argument. -override xargsmultiquote = $(SED) $(call quote,s/'/'"'"'/g;s/^/'/;s/$$/'/) +override xargsmultiquote := $(SED) $(call quote,s/'/'"'"'/g;s/^/'/;s/$$/'/) # (callable) Test if the provided xpath expression matches the provided document. override xpath = $(XMLLINT) --noent --nonet --xpath $(call quote,$1) $(call quote,$2) >>/dev/null 2>>/dev/null +# (callable) Get the identifier for the given parser or transform. +override id = $(XMLLINT) --noent --nonet --xpath '/*/*[local-name()="id" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"]/text()[1]' $(call quote,$1) 2>>/dev/null || $(PRINTF) '%s\n' $(call quote,about:shushe?$(or $2,unknown)=$(call pathenc,$(basename $(notdir $1)))) + # (callable) Extract the value of the text nodes in the provided X·M·L document and print them to `stdout´. override extracttext = $(PRINTF) '%s' '' | $(XSLTPROC) --nonet --novalid --nomkdir --nowrite - $(call quote,$1) # (callable) Process the provided transformation result and output the result to the provided location, given the provided relative path. -override processresultto = if $(call xpath,/*[local-name()="raw-text" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"],$1); then $(call extracttext,$1) >|$(call quote,$2); elif $(call xpath,/*[local-name()="base64-binary" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"],$1); then { $(PRINTF) '%s\n' 'begin-base64 644 -'; $(call extracttext,$1) | $(TR) -d '\t\n\f\r '; $(PRINTF) '\n%s\n' '===='; } | $(UUDECODE) -o /dev/stdout >|$(call quote,$2); elif $(call xpath,/*[local-name()="archive" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"],$1); then $(MAKE) -f $(call quote,$(abspath $(THISDIR)/GNUmakefile)) NAME=$(call quote,$3) SRC=$(call quote,$1) BUILDDIR=$(call quote,$(BUILDDIR)/archive/$3) DESTDIR=$(call quote,$(patsubst %/,%,$(dir $2))) MODE='urn:fdc:ladys.computer:20231231:Shu1She4:mode:archive' $(call quote,$2); else $(FINALIZE) $(call quote,$1) >|$(call quote,$2); fi +override processresultto = if $(call xpath,/*[local-name()="raw-text" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"],$1); then $(call extracttext,$1) >|$(call quote,$2); elif $(call xpath,/*[local-name()="base64-binary" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"],$1); then { $(PRINTF) '%s\n' 'begin-base64 644 -'; $(call extracttext,$1) | $(TR) -d '\t\n\f\r '; $(PRINTF) '\n%s\n' '===='; } | $(UUDECODE) -o /dev/stdout >|$(call quote,$2); elif $(call xpath,/*[local-name()="archive" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"],$1); then $(MAKE) -f $(makefile) NAME=$(call quote,$3) SRC=$(call quote,$1) BUILDDIR=$(call quote,$(BUILDDIR)/archive/$3) DESTDIR=$(call quote,$(patsubst %/,%,$(dir $2))) MODE='urn:fdc:ladys.computer:20231231:Shu1She4:mode:archive' $(call quote,$2); else $(FINALIZE) $(call quote,$1) >|$(call quote,$2); fi # ━ § BEGIN DEFAULT MAKE·FILE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -345,20 +393,6 @@ ifeq ($(MODE),urn:fdc:ladys.computer:20231231:Shu1She4:mode:default) # ─ ¶ Non‐Recipe Variable Definitions ───────────────────────────────── -# Non·empty if `help´ or `clean´, and no targets other than `help´ or `clean´, were specified as goals on the commandline. -override notbuilding := $(and $(filter help clean,$(MAKECMDGOALS)),$(call not,$(filter-out help clean,$(MAKECMDGOALS)))) - -# (callable) Tests to see if the prerequisites provided by the second argument matches the value in the file corresponding to the first argument in `$(BUILDDIR)/lastprereqs´. -# If not, saves the new value. -# Returns the values plus the file in `$(BUILDDIR)/lastprereqs´, which will always be newer than the target if there was a change. -# -# Calling this variable is useful when a given target should be updated whenever its list of prerequisites changes in addition to whenever there is a change to one of its prerequisites. -# -# If `$(notbuilding)´ is non·empty, this variable produces no result to avoid unnecessary work. -# -# ☡ This variable creates at least one subshell every time it is computed. -override diffprereqs = $(if $(notbuilding),,$(and $(subst $(shell $(CAT) $(call quote,$(BUILDDIR)/lastprereqs/$1) 2>>/dev/null || :),,$2),$(shell $(call ensuredirectory,$(BUILDDIR)/lastprereqs) && $(PRINTF) '%s\n' $(call quote,$2) >|$(BUILDDIR)/lastprereqs/$1),)$2 $(BUILDDIR)/lastprereqs/$1) - # (callable) Escape special characters for use in X·M·L. override xmlesc = $(subst >,>,$(subst <,<,$(subst &,&,$1))) @@ -368,34 +402,6 @@ override attresc = $(subst ",",$(call xmlesc,$1)) # (callable) Escape special characters for use in sed regular expressions. override sedesc = $(subst /,[/],$(subst $$,\$$,$(subst *,\*,$(subst .,\.,$(subst [,\[,$(subst ^,\^,$(subst \,\\,$1))))))) -# The command to use for percent‐decoding. -override perdeccmd := $(SED) 's/|/%7C/g;s/[\]/%5C/g;s/%[0123456789ABCDEFabcdef]\{2\}/|&|/g' | $(TR) '|' '\n' | $(SED) '/^%[0123456789ABCDEFabcdef]\{2\}$$/!s/%/|%25|/' | $(TR) '|' '\n' | $(AWK) '$$0!~/%/{printf "%s",$$0}/%/{d="0123456789ABCDEF";v=substr(toupper($$0),2,2);printf "\\%04o",(index(d,substr(v,1,1))-1)*16+index(d,substr(v,2,1))-1}' | $(xargsquote) | $(XARGS) -E '' $(PRINTF) '%b' - -# (callable) Percent‐decode the given strings. -# -# ☡ This variable creates a subshell every time it is computed. -override perdec = $(shell $(PRINTF) '%s\0450A' $(foreach encoded,$1,$(call quote,$(encoded))) | $(perdeccmd)) - -# (callable) Percent‐encode the given strings. -# -# This singly‐encodes u·r·i characters and doubly‐encodes other characters, then calls `perdec´ to decode back to a single encoding. -# The encoding assumes the input is a “u·r·i component”; e·g that the resulting string should only contain `pchar´ (but can contain any `sub-delim´). -# -# It is assumed that the given strings do not contain newlines. -# -# ☡ This variable creates a subshell every time it is computed. -override perenc = $(shell $(PRINTF) '%s\n' $(foreach unencoded,$1,$(call quote,$(unencoded))) | $(OD) -t x1 | $(SED) 's/^[0123456789]*//;s/0[Aa]/%&/g;s/2[146789ABCDEabcde]/%&/g;s/3[0123456789AaBbDd]/%&/g;s/40/%&/g;s/[46][123456789ABCDEFabcdef]/%&/g;s/[57][0123456789Aa]/%&/g;s/5[Ff]/%&/g;s/7[Ee]/%&/g;s/[ ][0123456789ABCDEFabcdef]\{2\}/%25&/g' | $(TR) -d ' \n' | $(TR) 'abcdef' 'ABCDEF' | $(perdeccmd)) - -# (callable) Percent‐encode each component in the given paths. -# -# ☡ This variable creates a subshell every time it is computed. -override pathenc = $(subst %2F,/,$(call perenc,$1)) - -# (callable) Create a unique name for the given file, based on its absolute path. -# -# ☡ This variable creates a subshell every time it is computed. -namehash = $(shell $(PRINTF) '%s' $(call quote,$(abspath $1)) | $(CKSUM) | $(SED) 's/ .*//' | $(XARGS) -E '' $(PRINTF) '%X')-$(notdir $1) - # (overridable) Collect all of the applicable includes from the includes directory. sourceincludes := $(if $(and $(INCLUDEDIR),$(wildcard $(INCLUDEDIR))),$(patsubst ./%,%,$(shell $(FIND) $(foreach dir,$(INCLUDEDIR),$(call quote,$(dir))) '(' $(FINDINCLUDERULES) ')' -a -type f -a -print)),) @@ -425,15 +431,6 @@ override assetfiles := $(filter-out $(xmlfiles) $(plaintextfiles),$(sourcefiles) # (callable) Get the types of the given files. override typeoffile = $(foreach file,$1,$(or $(patsubst $(file)|%,%,$(filter $(file)|%,$(types))),application/octet-stream)) -# Pair each source magic file with its location in the build directory. -override magicpair := $(foreach magicfile,$(MAGIC),$(magicfile)|$(BUILDDIR)/magic/$(call namehash,$(magicfile))) - -# (callable) Get the source file for the given magic files. -override magicsource = $(foreach magicpath,$1,$(patsubst %|$(magicpath),%,$(firstword $(filter %|$(magicpath),$(magicpair))))) - -# (callable) Get the build file for the given magic files. -override magicfile = $(foreach file,$1,$(patsubst $(file)|%,%,$(filter $(file)|%,$(magicpair)))) - # (callable) Get the local path for the given source file. override sourcepath = $(or $(firstword $(foreach directory,$(SRCDIR),$(if $(filter .,$(directory)),$(wildcard $1),$(if $(filter $(directory)/%,$1),$(patsubst $(directory)/%,%,$1),)))),$(error Unable to get local path for source file `$1´)) @@ -478,10 +475,10 @@ override parsed = $(foreach file,$1,$(patsubst $(file)|%,%,$(filter $(file)|%,$( # (callable) Get the source files for the given parsed file. override unparsed = $(foreach file,$1,$(patsubst %|$(file),%,$(filter %|$(file),$(sourceparsedpair)))) -# Pair each build directory, parser, transform, source file, or parsed file with its file u·r·i. -override fileuripairs := $(join $(patsubst %,%|,$(BUILDDIR) $(PARSERS) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes))),$(call pathenc,$(foreach uriable,$(BUILDDIR) $(PARSERS) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes)),file://$(abspath $(uriable))))) +# Pair each build directory, transform, source file, or parsed file with its file u·r·i. +override fileuripairs := $(join $(patsubst %,%|,$(BUILDDIR) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes))),$(call pathenc,$(foreach uriable,$(BUILDDIR) $(TRANSFORMS) $(sourcefiles) $(sourceincludes) $(call parsed,$(sourcefiles) $(sourceincludes)),file://$(abspath $(uriable))))) -# (callable) Get the file u·r·is for the given parsers, transforms, source file or parsed files. +# (callable) Get the file u·r·is for the given transforms, source file or parsed files. override fileuri = $(foreach file,$1,$(or $(patsubst $(file)|%,%,$(filter $(file)|%,$(fileuripairs))),$(error Unable to get file u·r·i for `$(file)´))) ifneq ($(wildcard $(BUILDDIR)/dependencies),) @@ -550,9 +547,6 @@ override installed = $(foreach file,$1,$(DESTDIR)/$(call destination,$(file))) # ─ ¶ Recipe Variable Definitions ───────────────────────────────────── -# (callable) Get the identifier for the given parser or transform. -override id = $(XMLLINT) --noent --nonet --xpath '/*/*[local-name()="id" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"]/text()[1]' $(call quote,$1) 2>>/dev/null || $(PRINTF) '%s\n' $(call quote,about:shushe?$(or $2,unknown)=$(call pathenc,$(basename $(notdir $1)))) - # (callable) Sanitize and wrap the provided plaintext file in X·M·L, printing to `stdout´. override wrapplaintext = { $(PRINTF) '%s\n%s' '' ''; } @@ -563,25 +557,11 @@ override wrapplaintext = { $(PRINTF) '%s\n%s' '' '