From: Lady <redacted> Date: Mon, 1 Apr 2024 20:45:42 +0000 (-0400) Subject: Improve (fix) the T·S·V parser X-Git-Tag: 0.7.0~5 X-Git-Url: https://git.ladys.computer/Shushe/commitdiff_plain/872c70923f2c19e4c9abad7ea1c2f5be1ffd3e65 Improve (fix) the T·S·V parser Although `exslstr:tokenize()` is fast, it should not be used when splitting the columns of a T·S·V file, as it will collapse empty columns. Introduce a new transform in `lib/` for splitting, and import it into the T·S·V parser. This transform was largely copied from Caudex <https://git.ladys.computer/Caudex/blob/0.1.1:/lib/split.xslt> and is likely to be useful downstream as well. Continue using `exslstr:tokenize()` for splitting the _rows_ of the T·S·V, as empty rows _should_ be collapsed. --- diff --git a/lib/split.xslt b/lib/split.xslt new file mode 100644 index 0000000..13c79ed --- /dev/null +++ b/lib/split.xslt @@ -0,0 +1,36 @@ +<?xml version="1.0"?> +<!-- +⁌ ⛩️📰 书社 ∷ lib/split.xslt + +© 2024 Lady [@ Lady’s Computer] + +This Source Code Form is subject to the terms of the Mozilla Public License, v 2.0. +If a copy of the M·P·L was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0/>. +--> +<transform + xmlns="http://www.w3.org/1999/XSL/Transform" + xmlns:html="http://www.w3.org/1999/xhtml" + xmlns:书社="urn:fdc:ladys.computer:20231231:Shu1She4" + version="1.0" +> + <template name="书社:split"> + <param name="source"/> + <param name="separator" select="'
'"/> + <choose> + <when test="contains($source, $separator)"> + <html:span> + <value-of select="substring-before($source, $separator)"/> + </html:span> + <call-template name="书社:split"> + <with-param name="source" select="substring-after($source, $separator)"/> + <with-param name="separator" select="$separator"/> + </call-template> + </when> + <otherwise> + <html:span> + <value-of select="$source"/> + </html:span> + </otherwise> + </choose> + </template> +</transform> diff --git a/parsers/tsv.xslt b/parsers/tsv.xslt index a3dd927..ae721e4 100644 --- a/parsers/tsv.xslt +++ b/parsers/tsv.xslt @@ -16,15 +16,22 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one exclude-result-prefixes="exsl exslstr" version="1.0" > + <import href="../lib/split.xslt"/> <书社:id>urn:fdc:ladys.computer:20231231:Shu1She4:tsv.xslt</书社:id> <template match="html:script[@type='text/tab-separated-values']"> - <variable name="rows" select="exslstr:tokenize(., '
')[normalize-space(.) and not(starts-with(., '#'))]"/> + <variable name="rows" select="exslstr:tokenize(., '
')[normalize-space(.)!='' and not(starts-with(., '#'))]"/> <variable name="head" select="$rows[1]"/> + <variable name="headcols"> + <call-template name="书社:split"> + <with-param name="source" select="string($head)"/> + <with-param name="separator" select="'	'"/> + </call-template> + </variable> <variable name="body" select="$rows[not(position()=1)]"/> <html:table> <html:thead> <html:tr> - <for-each select="exslstr:tokenize($head, '	')"> + <for-each select="exsl:node-set($headcols)/*"> <html:th scope="col"> <value-of select="."/> </html:th> @@ -33,12 +40,27 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one </html:thead> <html:tbody> <for-each select="$body"> + <variable name="cols"> + <call-template name="书社:split"> + <with-param name="source" select="string(.)"/> + <with-param name="separator" select="'	'"/> + </call-template> + </variable> <html:tr> - <for-each select="exslstr:tokenize(., '	')"> + <for-each select="exsl:node-set($cols)/*[count(exsl:node-set($headcols)/*)>position()]"> <html:td> <value-of select="."/> + <if test="position()=count(exsl:node-set($headcols)/*)"> + <for-each select="following-sibling:*"> + <text>	</text> + <value-of select="."/> + </for-each> + </if> </html:td> </for-each> + <for-each select="exsl:node-set($headcols)/*[position()>count(exsl:node-set($cols)/*)]"> + <html:td/> + </for-each> </html:tr> </for-each> </html:tbody>