From: Lady <redacted> Date: Thu, 18 Jan 2024 01:55:19 +0000 (-0500) Subject: Add @书社:parsed-by to parse results X-Git-Tag: 0.4.0~3 X-Git-Url: https://git.ladys.computer/Shushe/commitdiff_plain/90810b4c234960e725e770f331b13c4687f407c8?hp=1c02c25ab49b46225f48d51ccc52648c20c82eaf Add @书社:parsed-by to parse results This switches the parser to use a two‐stage parse, in which each node is by default first processed in the `书社:parse` mode, which then applies templates to the node. This provides a hook for selecting certain kinds of elements, for example `<html:script>` elements, and doing something to the result. The “something” in this case is “making note of the parser which is registered to that type on each result element by setting the `@书社:parsed-by` attribute to its `@id`. This setup also allows a reparsing of the parse result (in case new `<html:script>` elements were produced by it); it is worth noting that this could result in an endless loop if the `<html:script>` element is not actually transformed by any parser. --- diff --git a/README.markdown b/README.markdown index 4c1f1d6..4427c7f 100644 --- a/README.markdown +++ b/README.markdown @@ -255,8 +255,10 @@ For example, the trivial `text/plain` parser is defined as follows :— <transform xmlns="http://www.w3.org/1999/XSL/Transform" xmlns:html="http://www.w3.org/1999/xhtml" + xmlns:书社="urn:fdc:ladys.computer:20231231:Shu1She4" version="1.0" > + <书社:id>example:text/plain</书社:id> <template match="html:script[@type='text/plain']"> <html:pre><value-of select="."/></html:pre> </template> @@ -271,8 +273,21 @@ Alternatively, you can set the `@书社:supported-media-types` attribute on the root element of the parser to override media type support detection. -Parsers can also target specific dialects of X·M·L, in which case they - operate on the same basic principles as transforms (described below). +Even when `@书社:supported-media-types` is set, it is a requirement + that each parser transform any `<html:script>` elements with a + `@type` which matches their registered types into something else. +Otherwise the parser will be stuck in an endless loop. +The result tree of applying the transform to the `<html:script>` + element will be reparsed (in case any new `<html:script>` elements + were added in its subtree), and a `@书社:parsed-by` attribute will be + added to each toplevel element in the result. +The value of this attribute will be the value of the `<书社:id>` + toplevel element in the parser. + +It is possible for parsers to support zero plaintext types. +This is useful when targeting specific dialects of X·M·L; parsers in + this sense operate on the same basic principles as transforms + (described below). The major distinction between X·M·L parsers and transforms is where in the process the transformation happens: Parsers are applied *prior* to embedding (and can be used to generate diff --git a/lib/catalog2parser.xslt b/lib/catalog2parser.xslt index ead56da..d1d2bab 100644 --- a/lib/catalog2parser.xslt +++ b/lib/catalog2parser.xslt @@ -10,6 +10,7 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one <transform xmlns="http://www.w3.org/1999/XSL/Transform" xmlns:catalog="urn:oasis:names:tc:entity:xmlns:xml:catalog" + xmlns:exsl="http://exslt.org/common" xmlns:exslstr="http://exslt.org/strings" xmlns:html="http://www.w3.org/1999/xhtml" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" @@ -76,11 +77,53 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one </if> </xslt:include> </for-each> + <xslt:template name="书社:apply-parsed-by"> + <xslt:param name="id"/> + <xslt:param name="result" select="/.."/> + <xslt:for-each select="$result/node()"> + <xslt:copy> + <xslt:attribute name="书社:parsed-by"> + <xslt:value-of select="$id"/> + <xslt:if test="@书社:parsed-by"> + <xslt:text> </xslt:text> + <xslt:value-of select="@书社:parsed-by"/> + </xslt:if> + </xslt:attribute> + <xslt:copy-of select="@*[not(namespace-uri()='urn:fdc:ladys.computer:20231231:Shu1She4' and local-name()='parsed-by')]|node()"/> + </xslt:copy> + </xslt:for-each> + </xslt:template> + <xslt:template match="/"> + <xslt:apply-templates select="node()" mode="书社:parse"/> + </xslt:template> <xslt:template match="@*|node()" priority="-1"> <xslt:copy> - <xslt:apply-templates select="@*|node()"/> + <xslt:apply-templates select="@*|node()" mode="书社:parse"/> </xslt:copy> </xslt:template> + <xslt:template match="html:script[@type]" mode="书社:parse" priority="1"> + <xslt:variable name="parserdiv" select="exsl:node-set($书社:parsers)//html:div[html:dd=current()/@type]"/> + <xslt:choose> + <xslt:when test="$parserdiv"> + <xslt:variable name="result"> + <xslt:apply-templates select="."/> + </xslt:variable> + <xslt:variable name="reparsed-result"> + <xslt:apply-templates select="exsl:node-set($result)/node()" mode="书社:parse"/> + </xslt:variable> + <xslt:call-template name="书社:apply-parsed-by"> + <xslt:with-param name="id" select="$parserdiv/html:dt"/> + <xslt:with-param name="result" select="exsl:node-set($reparsed-result)"/> + </xslt:call-template> + </xslt:when> + <xslt:otherwise> + <xslt:apply-templates select="."/> + </xslt:otherwise> + </xslt:choose> + </xslt:template> + <xslt:template match="@*|node()" mode="书社:parse"> + <xslt:apply-templates select="."/> + </xslt:template> </xslt:transform> </template> <output method="xml" encoding="UTF-8"/>