Add @书社:parsed-by to parse results

author Lady <redacted>

Thu, 18 Jan 2024 01:55:19 +0000 (20:55 -0500)

committer Lady <redacted>

Thu, 18 Jan 2024 02:37:30 +0000 (21:37 -0500)
author Lady <redacted>
Thu, 18 Jan 2024 01:55:19 +0000 (20:55 -0500)
committer Lady <redacted>
Thu, 18 Jan 2024 02:37:30 +0000 (21:37 -0500)
diff --git a/README.markdown b/README.markdown

index 4c1f1d63f57a91b42c40230ce5419aa5aa1b5c6d..4427c7fed710eb40a5051e147688b77e1f843478 100644 (file)
--- a/README.markdown
+++ b/README.markdown
@@ -255,8 +255,10 @@ For example, the trivial `text/plain` parser is defined as follows :⁠—
  <transform
    xmlns="http://www.w3.org/1999/XSL/Transform"
    xmlns:html="http://www.w3.org/1999/xhtml"
+  xmlns:书社="urn:fdc:ladys.computer:20231231:Shu1She4"
    version="1.0"
  >
+  <书社:id>example:text/plain</书社:id>
    <template match="html:script[@type='text/plain']">
      <html:pre><value-of select="."/></html:pre>
    </template>
@@ -271,8 +273,21 @@ Alternatively, you can set the `@书社:supported-media-types` attribute
    on the root element of the parser to override media type support
    detection.
  
-Parsers can also target specific dialects of X·M·L, in which case they
-  operate on the same basic principles as transforms (described below).
+Even when `@书社:supported-media-types` is set, it is a requirement
+  that each parser transform any `<html:script>` elements with a
+  `@type` which matches their registered types into something else.
+Otherwise the parser will be stuck in an endless loop.
+The result tree of applying the transform to the `<html:script>`
+  element will be reparsed (in case any new `<html:script>` elements
+  were added in its subtree), and a `@书社:parsed-by` attribute will be
+  added to each toplevel element in the result.
+The value of this attribute will be the value of the `<书社:id>`
+  toplevel element in the parser.
+
+It is possible for parsers to support zero plaintext types.
+This is useful when targeting specific dialects of X·M·L; parsers in
+  this sense operate on the same basic principles as transforms
+  (described below).
  The major distinction between X·M·L parsers and transforms is where in
    the process the transformation happens:
  Parsers are applied *prior* to embedding (and can be used to generate
diff --git a/lib/catalog2parser.xslt b/lib/catalog2parser.xslt

index ead56da91dab9bd2916beda213937a7100703e2c..d1d2babac7142bf4de72671abfd45ca47594751c 100644 (file)
--- a/lib/catalog2parser.xslt
+++ b/lib/catalog2parser.xslt
@@ -10,6 +10,7 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
  <transform
         xmlns="http://www.w3.org/1999/XSL/Transform"
         xmlns:catalog="urn:oasis:names:tc:entity:xmlns:xml:catalog"
+       xmlns:exsl="http://exslt.org/common"
         xmlns:exslstr="http://exslt.org/strings"
         xmlns:html="http://www.w3.org/1999/xhtml"
         xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
@@ -76,11 +77,53 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
                                         </if>
                                 </xslt:include>
                         </for-each>
+                       <xslt:template name="书社:apply-parsed-by">
+                               <xslt:param name="id"/>
+                               <xslt:param name="result" select="/.."/>
+                               <xslt:for-each select="$result/node()">
+                                       <xslt:copy>
+                                               <xslt:attribute name="书社:parsed-by">
+                                                       <xslt:value-of select="$id"/>
+                                                       <xslt:if test="@书社:parsed-by">
+                                                               <xslt:text> </xslt:text>
+                                                               <xslt:value-of select="@书社:parsed-by"/>
+                                                       </xslt:if>
+                                               </xslt:attribute>
+                                               <xslt:copy-of select="@*[not(namespace-uri()='urn:fdc:ladys.computer:20231231:Shu1She4' and local-name()='parsed-by')]|node()"/>
+                                       </xslt:copy>
+                               </xslt:for-each>
+                       </xslt:template>
+                       <xslt:template match="/">
+                               <xslt:apply-templates select="node()" mode="书社:parse"/>
+                       </xslt:template>
                         <xslt:template match="@*|node()" priority="-1">
                                 <xslt:copy>
-                                       <xslt:apply-templates select="@*|node()"/>
+                                       <xslt:apply-templates select="@*|node()" mode="书社:parse"/>
                                 </xslt:copy>
                         </xslt:template>
+                       <xslt:template match="html:script[@type]" mode="书社:parse" priority="1">
+                               <xslt:variable name="parserdiv" select="exsl:node-set($书社:parsers)//html:div[html:dd=current()/@type]"/>
+                               <xslt:choose>
+                                       <xslt:when test="$parserdiv">
+                                               <xslt:variable name="result">
+                                                       <xslt:apply-templates select="."/>
+                                               </xslt:variable>
+                                               <xslt:variable name="reparsed-result">
+                                                       <xslt:apply-templates select="exsl:node-set($result)/node()" mode="书社:parse"/>
+                                               </xslt:variable>
+                                               <xslt:call-template name="书社:apply-parsed-by">
+                                                       <xslt:with-param name="id" select="$parserdiv/html:dt"/>
+                                                       <xslt:with-param name="result" select="exsl:node-set($reparsed-result)"/>
+                                               </xslt:call-template>
+                                       </xslt:when>
+                                       <xslt:otherwise>
+                                               <xslt:apply-templates select="."/>
+                                       </xslt:otherwise>
+                               </xslt:choose>
+                       </xslt:template>
+                       <xslt:template match="@*|node()" mode="书社:parse">
+                               <xslt:apply-templates select="."/>
+                       </xslt:template>
                 </xslt:transform>
         </template>
         <output method="xml" encoding="UTF-8"/>
author	Lady <redacted>
	Thu, 18 Jan 2024 01:55:19 +0000 (20:55 -0500)
committer	Lady <redacted>
	Thu, 18 Jan 2024 02:37:30 +0000 (21:37 -0500)
README.markdown		patch \| blob \| history
lib/catalog2parser.xslt		patch \| blob \| history