From: Lady Date: Tue, 16 Jan 2024 01:46:10 +0000 (-0500) Subject: Percent‐encode filenames when generating u·r·i’s X-Git-Tag: 0.3.0~4 X-Git-Url: https://git.ladys.computer/Shushe/commitdiff_plain/a9dfb3cd8b5a2a8a3c6b8ce90212638166a15fd3?ds=sidebyside;hp=6972774d7db8027876c937abf5a7a5b0420d7965 Percent‐encode filenames when generating u·r·i’s It’s not known or expected whether tools like `xmlcatalog` can handle full leiris. It’s better and safer to just only use u·r·i’s for identifying resources. Note that this _does_ have implications on includes (they must also be percent‐encoded). Ideally, it would be possible to run this conversion in the transforms, but this probably is not possible in X·S·L·T 1.0. --- diff --git a/GNUmakefile b/GNUmakefile index b306ff2..0b644c2 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -14,6 +14,7 @@ override define makefileinfo ║│ version incompatibilities. The full list of program │║ ║│ requirements is as follows :— │║ ║│ │║ +║│ • awk │║ ║│ • cat │║ ║│ • cp │║ ║│ • date │║ @@ -22,6 +23,7 @@ override define makefileinfo ║│ • find │║ ║│ • mkdir (requires support for `-p´) │║ ║│ • mv │║ +║│ • od (requires support for `-t x1´) │║ ║│ • printf │║ ║│ • rm │║ ║│ • sed │║ @@ -31,6 +33,7 @@ override define makefileinfo ║│ • touch │║ ║│ • tr (requires support for `-d´) │║ ║│ • uuencode (requires support for `-m´ and `-r´) │║ +║│ • xargs (requires support for `-0´) │║ ║│ • xmlcatalog (provided by libxml2) │║ ║│ • xmllint (provided by libxml2) │║ ║│ • xsltproc (provided by libxslt) │║ @@ -79,6 +82,7 @@ endef # # If these are not installed on your computer, or you need to use a # different implementation, you can override the appropriate variable. +AWK := awk CAT := cat CP := cp DATE := date @@ -87,6 +91,7 @@ FILE := file FIND := find MKDIR := mkdir MV := mv +OD := od PRINTF := printf RM := rm SED := sed @@ -96,6 +101,7 @@ TEST := test TOUCH := touch TR := tr UUENCODE := uuencode +XARGS := xargs XMLCATALOG := xmlcatalog XMLLINT := xmllint XSLTPROC := xsltproc @@ -203,6 +209,21 @@ override silent := $(if $(VERBOSE),,@) # expressions. override sedesc = $(subst /,[/],$(subst $$,\$$,$(subst *,\*,$(subst .,\.,$(subst [,\[,$(subst ^,\^,$(subst \,\\,$1))))))) +# (callable) Percent‐decode the given strings. +override perdec = $(foreach encoded,$1,$(shell $(PRINTF) '%s\n' $(call quote,$(encoded)) | $(SED) 's/`/`%60`/g;s/[\]/`%5C`/g;s/%[0-9A-Fa-f]\{2\}/`&`/g' | $(TR) '`' '\n' | $(SED) '/^%[0-9A-Fa-f]\{2\}$$/!s/%/`%25`/' | $(TR) '`' '\n' | $(AWK) '$$0!~/%/{printf("%s",$$0)}/%/{sub("%","0x");printf("\\%04o",$$0)}' | $(XARGS) -0 $(PRINTF) '%b')) + +# (callable) Percent‐encode the given strings. +# +# This singly‐encodes u·r·i characters and doubly‐encodes other +# characters, then calls `perdec´ to decode back to a single encoding. +# The encoding assumes the input is a “u·r·i component”; e·g that the +# resulting string should only contain `pchar´ (but can contain any +# `sub-delim´). +override perenc = $(foreach unencoded,$1,$(call perdec,$(shell $(PRINTF) '%s' $(call quote,$(unencoded)) | $(OD) -t x1 | $(SED) 's/^[0-9]*//;s/2[146-9A-Ea-e]/%&/g;s/3[0-9AaBbDd]/%&/g;s/40/%&/g;s/[46][1-9A-Fa-f]/%&/g;s/[57][0-9Aa]/%&/g;s/7[Ee]/%&/g;s/[ ][0-9A-Fa-f]\{2\}/%25&/g' | $(TR) -d ' \n' | $(TR) 'abcdef' 'ABCDEF'))) + +# (callable) Percent‐encode each component in the given paths. +override pathenc = $(foreach path,$1,$(subst $(space),/,$(call perenc,$(subst /, ,$(path))))) + # (overridable) Collect all of the applicable includes from the # includes directory. sourceincludes := $(shell $(FIND) $(FINDOPTS) $(INCLUDEDIR) -type f '(' $(FINDRULES) ')') @@ -234,7 +255,7 @@ override assetfiles := $(filter-out $(xmlfiles) $(plaintextfiles),$(sourcefiles) override typeoffile = $(patsubst $(foreach file,$1,$(file):%),%,$(filter $(foreach file,$1,$(file):%),$(types))) # (callable) Get the identifier for the given transform. -override id = $(or $(shell $(XMLLINT) --xpath '/*/*[local-name()="id" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"]/text()[1]' $1),$(basename $(notdir $1))) +override id = $(or $(shell $(XMLLINT) --xpath '/*/*[local-name()="id" and namespace-uri()="urn:fdc:ladys.computer:20231231:Shu1She4"]/text()[1]' $1),example:$(call pathenc,$(basename $(notdir $1)))) # (callable) Get the local path for the given source file. override sourcepath = $(firstword $(foreach directory,$(SRCDIR),$(if $(filter $(directory)/%,$1),$(patsubst $(directory)/%,%,$1),))) @@ -246,7 +267,7 @@ override includepath = $(firstword $(foreach directory,$(INCLUDEDIR),$(if $(filt override datauri = $(foreach file,$1,data:$(call typeoffile,$(file));base64,$(shell $(UUENCODE) -m -r $(call quote,$(file)) _ | tr -d ' \n')) # Pair each source file and include with its local u·r·i. -override sourcelocalpair = $(foreach file,$(sourcefiles) $(sourceincludes),$(if $(filter $(file),$(sourceincludes)),$(file):about:shushe?include=$(call includepath,$(file)),$(file):about:shushe?source=$(call sourcepath,$(file)))) +override sourcelocalpair = $(foreach file,$(sourcefiles) $(sourceincludes),$(if $(filter $(file),$(sourceincludes)),$(file):about:shushe?include=$(call pathenc,$(call includepath,$(file))),$(file):about:shushe?source=$(call pathenc,$(call sourcepath,$(file))))) # (callable) Get local uris for the given files. override localuri = $(foreach file,$1,$(patsubst $(file):%,%,$(filter $(file):%,$(sourcelocalpair)))) @@ -410,7 +431,7 @@ $(BUILDDIR)/magic.mgc: $(wildcard $(MAGICDIR)/*) $(BUILDDIR)/parser.catalog: $(PARSERS) @$(ECHO) "Generating catalog of parsers…" $(silent)$(XMLCATALOG) --create --noout $(call quote,$@) - $(foreach parser,$(PARSERS),$(silent)$(XMLCATALOG) --add uri $(call quote,$(call id,$(parser))) $(call quote,../$(parser)) --noout $(call quote,$@)$(newline)) + $(foreach parser,$(PARSERS),$(silent)$(XMLCATALOG) --add uri $(call quote,$(call id,$(parser))) $(call quote,$(call pathenc,../$(parser))) --noout $(call quote,$@)$(newline)) $(BUILDDIR)/parser.xslt: $(BUILDDIR)/parser.catalog $(THISDIR)/lib/catalog2parser.xslt @$(ECHO) "Generating main parser…" $(silent)$(XSLTPROC) -o $(call quote,$@) $(call quote,$(THISDIR)/lib/catalog2parser.xslt) $(call quote,$<) @@ -425,14 +446,14 @@ $(call parsed,$(sourcefiles) $(sourceincludes)): %: $$(call unparsed,$$@) $(type $(silent)$(call ensuredirectory,$(dir $@)) $(silent)$(if $(filter $<,$(assetfiles)),$(PRINTF) '%s\n' $(call quote,) > $(call quote,$@),$(if $(filter $<,$(plaintextfiles)),$(call wrapplaintext,$<),$(CAT) $(call quote,$<)) | $(XSLTPROC) -o $(call quote,$@) $(call quote,$(BUILDDIR)/parser.xslt) -) -# Generate a catalog of all transformed files, for use when processing +# Generate a catalog of all parsed files, for use when processing # includes. This does not depend on actually transforming the files. $(BUILDDIR)/catalog: $(sourcefiles) $(sourceincludes) $(typeupdates) @$(ECHO) "Generating catalog of parsed files…" $(silent)$(XMLCATALOG) --create --noout $(call quote,$@) - $(foreach source,$(sourcefiles) $(sourceincludes),$(silent)$(XMLCATALOG) --add uri $(call quote,$(call localuri,$(source))) $(call quote,$(patsubst $(BUILDDIR)/%,%,$(call parsed,$(source)))#$(if $(filter $(source),$(assetfiles)),asset,xml)) --noout $(call quote,$@)$(newline)) + $(foreach source,$(sourcefiles) $(sourceincludes),$(silent)$(XMLCATALOG) --add uri $(call quote,$(call localuri,$(source))) $(call quote,$(call pathenc,$(patsubst $(BUILDDIR)/%,%,$(call parsed,$(source))))#$(if $(filter $(source),$(assetfiles)),asset,xml)) --noout $(call quote,$@)$(newline)) -# Build a list of dependencies for each transformed file. +# Build a list of dependencies for each parsed file. $(BUILDDIR)/dependencies: $(BUILDDIR)/catalog $(call parsed,$(plaintextfiles) $(xmlfiles)) $(THISDIR)/lib/catalog2dependencies.xslt @$(ECHO) "Identifying dependencies…" $(silent)$(XSLTPROC) -o $(call quote,$@) $(call quote,$(THISDIR)/lib/catalog2dependencies.xslt) $(call quote,$<) @@ -441,7 +462,7 @@ $(BUILDDIR)/dependencies: $(BUILDDIR)/catalog $(call parsed,$(plaintextfiles) $( $(BUILDDIR)/transform.catalog: $(TRANSFORMS) @$(ECHO) "Generating catalog of transforms…" $(silent)$(XMLCATALOG) --create --noout $(call quote,$@) - $(foreach transform,$(TRANSFORMS),$(silent)$(XMLCATALOG) --add uri $(call quote,$(call id,$(transform))) $(call quote,../$(transform)) --noout $(call quote,$@)$(newline)) + $(foreach transform,$(TRANSFORMS),$(silent)$(XMLCATALOG) --add uri $(call quote,$(call id,$(transform))) $(call quote,$(call pathenc,../$(transform))) --noout $(call quote,$@)$(newline)) $(BUILDDIR)/transform.xslt: $(BUILDDIR)/transform.catalog $(THISDIR)/lib/catalog2transform.xslt @$(ECHO) "Generating main transform…" $(silent)$(XSLTPROC) -o $(call quote,$@) $(call quote,$(THISDIR)/lib/catalog2transform.xslt) $(call quote,$<) diff --git a/README.markdown b/README.markdown index 8e56f29..dec092a 100644 --- a/README.markdown +++ b/README.markdown @@ -91,6 +91,7 @@ In every case, you may supply your own implementation by overriding the corresponding (allcaps) variable (e·g, set `MKDIR` to supply your own `mkdir` implementation). +- `awk` - `cat` - `cp` - `date` @@ -99,6 +100,7 @@ In every case, you may supply your own implementation by overriding the - `find` - `mkdir` (requires support for `-p`) - `mv` +- `od` (requires support for `-t x1`) - `printf` - `rm` - `sed` @@ -108,6 +110,7 @@ In every case, you may supply your own implementation by overriding the - `touch` - `tr` (requires support for `-d`) - `uuencode` (requires support for `-m` and `-r`) +- `xargs` (requires support for `-0`) - `xmlcatalog` (provided by `libxml2`) - `xmllint` (provided by `libxml2`) - `xsltproc` (provided by `libxslt`)