From: Lady <redacted>
Date: Mon, 1 Apr 2024 20:45:42 +0000 (-0400)
Subject: Improve (fix) the T·S·V parser
X-Git-Tag: 0.7.0~5
X-Git-Url: https://git.ladys.computer/Shushe/commitdiff_plain/872c70923f2c19e4c9abad7ea1c2f5be1ffd3e65

Improve (fix) the T·S·V parser

Although `exslstr:tokenize()` is fast, it should not be used when
splitting the columns of a T·S·V file, as it will collapse empty
columns. Introduce a new transform in `lib/` for splitting, and import
it into the T·S·V parser.

This transform was largely copied from Caudex
<https://git.ladys.computer/Caudex/blob/0.1.1:/lib/split.xslt> and is
likely to be useful downstream as well.

Continue using `exslstr:tokenize()` for splitting the _rows_ of the
T·S·V, as empty rows _should_ be collapsed.
---

diff --git a/lib/split.xslt b/lib/split.xslt
new file mode 100644
index 0000000..13c79ed
--- /dev/null
+++ b/lib/split.xslt
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<!--
+⁌ ⛩️📰 书社 ∷ lib/split.xslt
+
+© 2024 Lady [@ Lady’s Computer]
+
+This Source Code Form is subject to the terms of the Mozilla Public License, v 2.0.
+If a copy of the M·P·L was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
+-->
+<transform
+	xmlns="http://www.w3.org/1999/XSL/Transform"
+	xmlns:html="http://www.w3.org/1999/xhtml"
+	xmlns:书社="urn:fdc:ladys.computer:20231231:Shu1She4"
+	version="1.0"
+>
+	<template name="书社:split">
+		<param name="source"/>
+		<param name="separator" select="'&#xA;'"/>
+		<choose>
+			<when test="contains($source, $separator)">
+				<html:span>
+					<value-of select="substring-before($source, $separator)"/>
+				</html:span>
+				<call-template name="书社:split">
+					<with-param name="source" select="substring-after($source, $separator)"/>
+					<with-param name="separator" select="$separator"/>
+				</call-template>
+			</when>
+			<otherwise>
+				<html:span>
+					<value-of select="$source"/>
+				</html:span>
+			</otherwise>
+		</choose>
+	</template>
+</transform>
diff --git a/parsers/tsv.xslt b/parsers/tsv.xslt
index a3dd927..ae721e4 100644
--- a/parsers/tsv.xslt
+++ b/parsers/tsv.xslt
@@ -16,15 +16,22 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 	exclude-result-prefixes="exsl exslstr"
 	version="1.0"
 >
+	<import href="../lib/split.xslt"/>
 	<书社:id>urn:fdc:ladys.computer:20231231:Shu1She4:tsv.xslt</书社:id>
 	<template match="html:script[@type='text/tab-separated-values']">
-		<variable name="rows" select="exslstr:tokenize(., '&#xA;')[normalize-space(.) and not(starts-with(., '#'))]"/>
+		<variable name="rows" select="exslstr:tokenize(., '&#xA;')[normalize-space(.)!='' and not(starts-with(., '#'))]"/>
 		<variable name="head" select="$rows[1]"/>
+		<variable name="headcols">
+			<call-template name="书社:split">
+				<with-param name="source" select="string($head)"/>
+				<with-param name="separator" select="'&#x9;'"/>
+			</call-template>
+		</variable>
 		<variable name="body" select="$rows[not(position()=1)]"/>
 		<html:table>
 			<html:thead>
 				<html:tr>
-					<for-each select="exslstr:tokenize($head, '&#x9;')">
+					<for-each select="exsl:node-set($headcols)/*">
 						<html:th scope="col">
 							<value-of select="."/>
 						</html:th>
@@ -33,12 +40,27 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 			</html:thead>
 			<html:tbody>
 				<for-each select="$body">
+					<variable name="cols">
+						<call-template name="书社:split">
+							<with-param name="source" select="string(.)"/>
+							<with-param name="separator" select="'&#x9;'"/>
+						</call-template>
+					</variable>
 					<html:tr>
-						<for-each select="exslstr:tokenize(., '&#x9;')">
+						<for-each select="exsl:node-set($cols)/*[count(exsl:node-set($headcols)/*)>position()]">
 							<html:td>
 								<value-of select="."/>
+								<if test="position()=count(exsl:node-set($headcols)/*)">
+									<for-each select="following-sibling:*">
+										<text>&#x9;</text>
+										<value-of select="."/>
+									</for-each>
+								</if>
 							</html:td>
 						</for-each>
+						<for-each select="exsl:node-set($headcols)/*[position()>count(exsl:node-set($cols)/*)]">
+							<html:td/>
+						</for-each>
 					</html:tr>
 				</for-each>
 			</html:tbody>