]> Lady’s Gitweb - Shushe/commitdiff
Improve (fix) the T·S·V parser
authorLady <redacted>
Mon, 1 Apr 2024 20:45:42 +0000 (16:45 -0400)
committerLady <redacted>
Mon, 1 Apr 2024 20:45:42 +0000 (16:45 -0400)
Although `exslstr:tokenize()` is fast, it should not be used when
splitting the columns of a T·S·V file, as it will collapse empty
columns. Introduce a new transform in `lib/` for splitting, and import
it into the T·S·V parser.

This transform was largely copied from Caudex
<https://git.ladys.computer/Caudex/blob/0.1.1:/lib/split.xslt> and is
likely to be useful downstream as well.

Continue using `exslstr:tokenize()` for splitting the _rows_ of the
T·S·V, as empty rows _should_ be collapsed.

lib/split.xslt [new file with mode: 0644]
parsers/tsv.xslt

diff --git a/lib/split.xslt b/lib/split.xslt
new file mode 100644 (file)
index 0000000..13c79ed
--- /dev/null
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<!--
+⁌ ⛩️📰 书社 ∷ lib/split.xslt
+
+© 2024 Lady [@ Lady’s Computer]
+
+This Source Code Form is subject to the terms of the Mozilla Public License, v 2.0.
+If a copy of the M·P·L was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
+-->
+<transform
+       xmlns="http://www.w3.org/1999/XSL/Transform"
+       xmlns:html="http://www.w3.org/1999/xhtml"
+       xmlns:书社="urn:fdc:ladys.computer:20231231:Shu1She4"
+       version="1.0"
+>
+       <template name="书社:split">
+               <param name="source"/>
+               <param name="separator" select="'&#xA;'"/>
+               <choose>
+                       <when test="contains($source, $separator)">
+                               <html:span>
+                                       <value-of select="substring-before($source, $separator)"/>
+                               </html:span>
+                               <call-template name="书社:split">
+                                       <with-param name="source" select="substring-after($source, $separator)"/>
+                                       <with-param name="separator" select="$separator"/>
+                               </call-template>
+                       </when>
+                       <otherwise>
+                               <html:span>
+                                       <value-of select="$source"/>
+                               </html:span>
+                       </otherwise>
+               </choose>
+       </template>
+</transform>
index a3dd9275b8cc42da52fa7a641e2c1b6ab3196f1a..ae721e4e8565424d81df15b6303752d492e620c8 100644 (file)
@@ -16,15 +16,22 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
        exclude-result-prefixes="exsl exslstr"
        version="1.0"
 >
+       <import href="../lib/split.xslt"/>
        <书社:id>urn:fdc:ladys.computer:20231231:Shu1She4:tsv.xslt</书社:id>
        <template match="html:script[@type='text/tab-separated-values']">
-               <variable name="rows" select="exslstr:tokenize(., '&#xA;')[normalize-space(.) and not(starts-with(., '#'))]"/>
+               <variable name="rows" select="exslstr:tokenize(., '&#xA;')[normalize-space(.)!='' and not(starts-with(., '#'))]"/>
                <variable name="head" select="$rows[1]"/>
+               <variable name="headcols">
+                       <call-template name="书社:split">
+                               <with-param name="source" select="string($head)"/>
+                               <with-param name="separator" select="'&#x9;'"/>
+                       </call-template>
+               </variable>
                <variable name="body" select="$rows[not(position()=1)]"/>
                <html:table>
                        <html:thead>
                                <html:tr>
-                                       <for-each select="exslstr:tokenize($head, '&#x9;')">
+                                       <for-each select="exsl:node-set($headcols)/*">
                                                <html:th scope="col">
                                                        <value-of select="."/>
                                                </html:th>
@@ -33,12 +40,27 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
                        </html:thead>
                        <html:tbody>
                                <for-each select="$body">
+                                       <variable name="cols">
+                                               <call-template name="书社:split">
+                                                       <with-param name="source" select="string(.)"/>
+                                                       <with-param name="separator" select="'&#x9;'"/>
+                                               </call-template>
+                                       </variable>
                                        <html:tr>
-                                               <for-each select="exslstr:tokenize(., '&#x9;')">
+                                               <for-each select="exsl:node-set($cols)/*[count(exsl:node-set($headcols)/*)>position()]">
                                                        <html:td>
                                                                <value-of select="."/>
+                                                               <if test="position()=count(exsl:node-set($headcols)/*)">
+                                                                       <for-each select="following-sibling:*">
+                                                                               <text>&#x9;</text>
+                                                                               <value-of select="."/>
+                                                                       </for-each>
+                                                               </if>
                                                        </html:td>
                                                </for-each>
+                                               <for-each select="exsl:node-set($headcols)/*[position()>count(exsl:node-set($cols)/*)]">
+                                                       <html:td/>
+                                               </for-each>
                                        </html:tr>
                                </for-each>
                        </html:tbody>
This page took 0.028481 seconds and 4 git commands to generate.