]> Lady’s Gitweb - Shushe/commitdiff
Add @书社:parsed-by to parse results
authorLady <redacted>
Thu, 18 Jan 2024 01:55:19 +0000 (20:55 -0500)
committerLady <redacted>
Thu, 18 Jan 2024 02:37:30 +0000 (21:37 -0500)
This switches the parser to use a two‐stage parse, in which each node
is by default first processed in the `书社:parse` mode, which then
applies templates to the node. This provides a hook for selecting
certain kinds of elements, for example `<html:script>` elements, and
doing something to the result.

The “something” in this case is “making note of the parser which is
registered to that type on each result element by setting the
`@书社:parsed-by` attribute to its `@id`.

This setup also allows a reparsing of the parse result (in case new
`<html:script>` elements were produced by it); it is worth noting that
this could result in an endless loop if the `<html:script>` element is
not actually transformed by any parser.

README.markdown
lib/catalog2parser.xslt

index 4c1f1d63f57a91b42c40230ce5419aa5aa1b5c6d..4427c7fed710eb40a5051e147688b77e1f843478 100644 (file)
@@ -255,8 +255,10 @@ For example, the trivial `text/plain` parser is defined as follows :⁠—
 <transform
   xmlns="http://www.w3.org/1999/XSL/Transform"
   xmlns:html="http://www.w3.org/1999/xhtml"
+  xmlns:书社="urn:fdc:ladys.computer:20231231:Shu1She4"
   version="1.0"
 >
+  <书社:id>example:text/plain</书社:id>
   <template match="html:script[@type='text/plain']">
     <html:pre><value-of select="."/></html:pre>
   </template>
@@ -271,8 +273,21 @@ Alternatively, you can set the `@书社:supported-media-types` attribute
   on the root element of the parser to override media type support
   detection.
 
-Parsers can also target specific dialects of X·M·L, in which case they
-  operate on the same basic principles as transforms (described below).
+Even when `@书社:supported-media-types` is set, it is a requirement
+  that each parser transform any `<html:script>` elements with a
+  `@type` which matches their registered types into something else.
+Otherwise the parser will be stuck in an endless loop.
+The result tree of applying the transform to the `<html:script>`
+  element will be reparsed (in case any new `<html:script>` elements
+  were added in its subtree), and a `@书社:parsed-by` attribute will be
+  added to each toplevel element in the result.
+The value of this attribute will be the value of the `<书社:id>`
+  toplevel element in the parser.
+
+It is possible for parsers to support zero plaintext types.
+This is useful when targeting specific dialects of X·M·L; parsers in
+  this sense operate on the same basic principles as transforms
+  (described below).
 The major distinction between X·M·L parsers and transforms is where in
   the process the transformation happens:
 Parsers are applied *prior* to embedding (and can be used to generate
index ead56da91dab9bd2916beda213937a7100703e2c..d1d2babac7142bf4de72671abfd45ca47594751c 100644 (file)
@@ -10,6 +10,7 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 <transform
        xmlns="http://www.w3.org/1999/XSL/Transform"
        xmlns:catalog="urn:oasis:names:tc:entity:xmlns:xml:catalog"
+       xmlns:exsl="http://exslt.org/common"
        xmlns:exslstr="http://exslt.org/strings"
        xmlns:html="http://www.w3.org/1999/xhtml"
        xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
@@ -76,11 +77,53 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
                                        </if>
                                </xslt:include>
                        </for-each>
+                       <xslt:template name="书社:apply-parsed-by">
+                               <xslt:param name="id"/>
+                               <xslt:param name="result" select="/.."/>
+                               <xslt:for-each select="$result/node()">
+                                       <xslt:copy>
+                                               <xslt:attribute name="书社:parsed-by">
+                                                       <xslt:value-of select="$id"/>
+                                                       <xslt:if test="@书社:parsed-by">
+                                                               <xslt:text> </xslt:text>
+                                                               <xslt:value-of select="@书社:parsed-by"/>
+                                                       </xslt:if>
+                                               </xslt:attribute>
+                                               <xslt:copy-of select="@*[not(namespace-uri()='urn:fdc:ladys.computer:20231231:Shu1She4' and local-name()='parsed-by')]|node()"/>
+                                       </xslt:copy>
+                               </xslt:for-each>
+                       </xslt:template>
+                       <xslt:template match="/">
+                               <xslt:apply-templates select="node()" mode="书社:parse"/>
+                       </xslt:template>
                        <xslt:template match="@*|node()" priority="-1">
                                <xslt:copy>
-                                       <xslt:apply-templates select="@*|node()"/>
+                                       <xslt:apply-templates select="@*|node()" mode="书社:parse"/>
                                </xslt:copy>
                        </xslt:template>
+                       <xslt:template match="html:script[@type]" mode="书社:parse" priority="1">
+                               <xslt:variable name="parserdiv" select="exsl:node-set($书社:parsers)//html:div[html:dd=current()/@type]"/>
+                               <xslt:choose>
+                                       <xslt:when test="$parserdiv">
+                                               <xslt:variable name="result">
+                                                       <xslt:apply-templates select="."/>
+                                               </xslt:variable>
+                                               <xslt:variable name="reparsed-result">
+                                                       <xslt:apply-templates select="exsl:node-set($result)/node()" mode="书社:parse"/>
+                                               </xslt:variable>
+                                               <xslt:call-template name="书社:apply-parsed-by">
+                                                       <xslt:with-param name="id" select="$parserdiv/html:dt"/>
+                                                       <xslt:with-param name="result" select="exsl:node-set($reparsed-result)"/>
+                                               </xslt:call-template>
+                                       </xslt:when>
+                                       <xslt:otherwise>
+                                               <xslt:apply-templates select="."/>
+                                       </xslt:otherwise>
+                               </xslt:choose>
+                       </xslt:template>
+                       <xslt:template match="@*|node()" mode="书社:parse">
+                               <xslt:apply-templates select="."/>
+                       </xslt:template>
                </xslt:transform>
        </template>
        <output method="xml" encoding="UTF-8"/>
This page took 0.027688 seconds and 4 git commands to generate.