From: Lady <redacted>
Date: Sat, 22 Mar 2025 01:45:20 +0000 (-0400)
Subject: Enable nested tags of the same kind
X-Git-Tag: 0.4.0~2
X-Git-Url: https://git.ladys.computer/LesML/commitdiff_plain/440eeff8f1b198dc3028fd4573a0ce871a86fa52?ds=sidebyside

Enable nested tags of the same kind

Previously, the processing rules do not allow nesting an element inside
of itself: ‹ ☞︎nested ☞︎tags☜︎ like ☞︎this☜︎☜︎ › do not work as expected.
This design choice was to (at least) appropriately handle ‹ ☞︎⟨this☜︎
weird ⟨case☜︎⟩ ›, where each inline should stop at the first delimiter.
Correct processing starts by looking for end sigils first, not start
sigils, backtracking to the last start sigil which precedes it,
wrapping that text, and then reprocessing the entire set of nodes until
no more end sigils with matching start sigils are found.

This commit implements that behaviour, which is of course a fair bit
more complicated but should improve the results. It also changes
specifying characters by Unicode code·point to use curly braces rather
than angle brackets, as the latter conflicted with the angle brackets
used in links. Specifying by Unicode code·point still isn’t supported
in links, but the behaviour should be less surprising.

The old Unicode code·point behaviour was probably broken also, in the
case where the code·point was not the first character in a paragraph,
but it is fixed now.
---

diff --git a/README.markdown b/README.markdown
index 2bfa515..6b43717 100644
--- a/README.markdown
+++ b/README.markdown
@@ -1,5 +1,5 @@
 <!--
-SPDX-FileCopyrightText: 2024 Lady <https://www.ladys.computer/about/#lady>
+SPDX-FileCopyrightText: 2024, 2025 Lady <https://www.ladys.computer/about/#lady>
 SPDX-License-Identifier: CC0-1.0
 -->
 # 💄📝 Les·M·L
@@ -241,10 +241,10 @@ Once the tree is built as above, it is remediated into its final form
 - Linebreaks in preformatted paragraphs are replaced with `<html:br>`.
 
 Finally, any character can be escaped by instead providing its Unicode
-  codepoint in the form `<U+NNNN>`, where `NNNN` is one or more
+  codepoint in the form `{U+NNNN}`, where `NNNN` is one or more
   hexadecimal digits.
 Multiple codepoints may be provided separated by periods, as in
-  `<U+WWWW.ZZZZ>`
+  `{U+WWWW.ZZZZ}`
 
 ## Usage
 
diff --git a/parser.xslt b/parser.xslt
index bd510b0..a47b5bb 100644
--- a/parser.xslt
+++ b/parser.xslt
@@ -1,12 +1,12 @@
 <?xml version="1.0"?>
 <!--
-SPDX-FileCopyrightText: 2024 Lady <https://www.ladys.computer/about/#lady>
+SPDX-FileCopyrightText: 2024, 2025 Lady <https://www.ladys.computer/about/#lady>
 SPDX-License-Identifier: MPL-2.0
 -->
 <!--
 ⁌ 💄📝 Les·M·L ∷ parser.xslt
 
-© 2024 Lady [@ Lady’s Computer]
+© 2024–2025 Lady [@ Ladys Computer]
 
 This Source Code Form is subject to the terms of the Mozilla Public License, v 2.0.
 If a copy of the M·P·L was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
@@ -69,11 +69,11 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 	<template name="LesML:unescape">
 		<param name="source"/>
 		<choose>
-			<when test="contains($source, '&lt;U+')">
-				<variable name="after" select="substring-after($source, '&lt;U+')"/>
+			<when test="contains($source, '{U+')">
+				<variable name="after" select="substring-after($source, '{U+')"/>
 				<choose>
-					<when test="contains($after, '>')">
-						<variable name="inner" select="substring-before($after, '>')"/>
+					<when test="contains($after, '}')">
+						<variable name="inner" select="substring-before($after, '}')"/>
 						<variable name="components">
 							<call-template name="LesML:split">
 								<with-param name="source" select="$inner"/>
@@ -81,14 +81,14 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 							</call-template>
 						</variable>
 						<variable name="component-nodes" select="exsl:node-set($components)/node()"/>
+						<value-of select="substring-before($source, '{U+')"/>
 						<choose>
 							<when test="$component-nodes[string(.)='' or translate(., '0123456789ABCDEF', '')!='']">
-								<value-of select="substring-before($source, '&lt;U+')"/>
-								<text>&lt;U+</text>
+								<text>{U+</text>
 								<value-of select="$inner"/>
-								<text>></text>
+								<text>}</text>
 								<call-template name="LesML:unescape">
-									<with-param name="source" select="substring-after($after, '>')"/>
+									<with-param name="source" select="substring-after($after, '}')"/>
 								</call-template>
 							</when>
 							<otherwise>
@@ -98,14 +98,14 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 									<text>;</text>
 								</for-each>
 								<call-template name="LesML:unescape">
-									<with-param name="source" select="substring-after($after, '>')"/>
+									<with-param name="source" select="substring-after($after, '}')"/>
 								</call-template>
 							</otherwise>
 						</choose>
 					</when>
 					<otherwise>
-						<value-of select="substring-before($source, '&lt;U+')"/>
-						<text>&lt;U+</text>
+						<value-of select="substring-before($source, '{U+')"/>
+						<text>{U+</text>
 						<call-template name="LesML:unescape">
 							<with-param name="source" select="$after"/>
 						</call-template>
@@ -641,16 +641,6 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 			<with-param name="lines" select="exsl:node-set($lines-fragment)/*"/>
 		</call-template>
 	</template>
-	<template match="@*|node()" mode="LesML:finalize-tree" priority="-1">
-		<copy>
-			<apply-templates select="@*|node()" mode="LesML:finalize-tree"/>
-		</copy>
-	</template>
-	<template match="text()" mode="LesML:finalize-tree">
-		<call-template name="LesML:break-and-unescape">
-			<with-param name="source" select="string(.)"/>
-		</call-template>
-	</template>
 	<template match="html:blockquote" mode="LesML:finalize-tree">
 		<if test="not(preceding-sibling::node()) or preceding-sibling::node()[position()=1 and not(self::html:blockquote)]">
 			<variable name="notquote" select="following-sibling::node()[not(self::html:blockquote)][1]"/>
@@ -743,6 +733,19 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 			</apply-templates>
 		</if>
 	</template>
+	<template match="processing-instruction()[local-name()='LesML-Link-Escape']" mode="LesML:finalize-tree">
+		<text>🔗</text>
+	</template>
+	<template match="text()" mode="LesML:finalize-tree">
+		<call-template name="LesML:break-and-unescape">
+			<with-param name="source" select="string(.)"/>
+		</call-template>
+	</template>
+	<template match="@*|node()" mode="LesML:finalize-tree" priority="-1">
+		<copy>
+			<apply-templates select="@*|node()" mode="LesML:finalize-tree"/>
+		</copy>
+	</template>
 	<template match="node()" mode="LesML:inline">
 		<param name="element-name"/>
 		<param name="element-namespace" select="'http://www.w3.org/1999/xhtml'"/>
@@ -752,88 +755,85 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 		<param name="langtag-supported" select="false()"/>
 		<choose>
 			<when test="self::*">
-				<copy>
-					<copy-of select="@*"/>
-					<variable name="start-node" select="text()[contains(., $start-sigil)][1]"/>
-					<choose>
-						<when test="$start-node">
-							<variable name="remaining">
-								<value-of select="substring-after($start-node, $start-sigil)"/>
-								<copy-of select="$start-node/following-sibling::node()"/>
-							</variable>
-							<variable name="end-node" select="exsl:node-set($remaining)/node()[self::text() and contains(., $end-sigil)][1]"/>
-							<choose>
-								<when test="$end-node">
-									<variable name="restoftext" select="substring-after($end-node, $end-sigil)"/>
-									<variable name="maybe-langtag">
-										<if test="$langtag-supported and starts-with($restoftext, '@') and contains($restoftext, '$')">
-											<value-of select="substring-before(substring-after($restoftext, '@'), '$')"/>
-										</if>
-									</variable>
-									<variable name="langtag">
-										<if test="translate($maybe-langtag, '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-', '')=''">
-											<value-of select="$maybe-langtag"/>
-										</if>
-									</variable>
-									<variable name="rest">
-										<html:div>
-											<choose>
-												<when test="string($langtag)!=''">
-													<value-of select="substring-after($restoftext, '$')"/>
-												</when>
-												<otherwise>
-													<value-of select="$restoftext"/>
-												</otherwise>
-											</choose>
-											<copy-of select="$end-node/following-sibling::node()"/>
-										</html:div>
-									</variable>
-									<variable name="processed-rest">
-										<apply-templates select="exsl:node-set($rest)/*" mode="LesML:inline">
-											<with-param name="element-name" select="$element-name"/>
-											<with-param name="element-namespace" select="$element-namespace"/>
-											<with-param name="start-sigil" select="$start-sigil"/>
-											<with-param name="end-sigil" select="$end-sigil"/>
-											<with-param name="role" select="$role"/>
-											<with-param name="langtag-supported" select="$langtag-supported"/>
-										</apply-templates>
-									</variable>
-									<copy-of select="$start-node/preceding-sibling::node()"/>
-									<value-of select="substring-before($start-node, $start-sigil)"/>
-									<element name="{$element-name}" namespace="{$element-namespace}">
-										<if test="string($role)!=''">
-											<attribute name="role">
-												<value-of select="$role"/>
-											</attribute>
-										</if>
-										<if test="string($langtag)!=''">
-											<if test="$element-namespace='http://www.w3.org/1999/xhtml'">
-												<attribute name="lang">
-													<value-of select="$langtag"/>
-												</attribute>
-											</if>
-											<attribute name="xml:lang">
+				<variable name="end-node" select="text()[contains(., $end-sigil)][1]"/>
+				<variable name="has-start-node" select="$end-node/preceding-sibling::text()[contains(., $start-sigil)] or string-length(substring-after($end-node, $start-sigil))>string-length(substring-after($end-node, $end-sigil))"/>
+				<choose>
+					<when test="$end-node and $has-start-node">
+						<variable name="preceding">
+							<copy-of select="$end-node/preceding-sibling::node()"/>
+							<value-of select="substring-before($end-node, $end-sigil)"/>
+						</variable>
+						<variable name="start-node" select="exsl:node-set($preceding)/text()[contains(., $start-sigil)][last()]"/>
+						<variable name="restoftext" select="substring-after($end-node, $end-sigil)"/>
+						<variable name="maybe-langtag">
+							<if test="$langtag-supported and starts-with($restoftext, '@') and contains($restoftext, '$')">
+								<value-of select="substring-before(substring-after($restoftext, '@'), '$')"/>
+							</if>
+						</variable>
+						<variable name="langtag">
+							<if test="translate($maybe-langtag, '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-', '')=''">
+								<value-of select="$maybe-langtag"/>
+							</if>
+						</variable>
+						<variable name="start-tokens-fragment">
+							<call-template name="LesML:split">
+								<with-param name="source" select="string($start-node)"/>
+								<with-param name="separator" select="$start-sigil"/>
+							</call-template>
+						</variable>
+						<variable name="start-tokens" select="exsl:node-set($start-tokens-fragment)/*"/>
+						<variable name="wrapped">
+							<copy>
+								<copy-of select="@*"/>
+								<copy-of select="$start-node/preceding-sibling::node()"/>
+								<for-each select="$start-tokens[position()!=last()]">
+									<value-of select="."/>
+									<if test="position()!=last()">
+										<value-of select="$start-sigil"/>
+									</if>
+								</for-each>
+								<element name="{$element-name}" namespace="{$element-namespace}">
+									<if test="string($role)!=''">
+										<attribute name="role">
+											<value-of select="$role"/>
+										</attribute>
+									</if>
+									<if test="string($langtag)!=''">
+										<if test="$element-namespace='http://www.w3.org/1999/xhtml'">
+											<attribute name="lang">
 												<value-of select="$langtag"/>
 											</attribute>
 										</if>
-										<copy-of select="$end-node/preceding-sibling::node()"/>
-										<value-of select="substring-before($end-node, $end-sigil)"/>
-									</element>
-									<copy-of select="exsl:node-set($processed-rest)/*/node()"/>
-								</when>
-								<otherwise>
-									<apply-templates select="node()" mode="LesML:inline">
-										<with-param name="element-name" select="$element-name"/>
-										<with-param name="element-namespace" select="$element-namespace"/>
-										<with-param name="start-sigil" select="$start-sigil"/>
-										<with-param name="end-sigil" select="$end-sigil"/>
-										<with-param name="role" select="$role"/>
-										<with-param name="langtag-supported" select="$langtag-supported"/>
-									</apply-templates>
-								</otherwise>
-							</choose>
-						</when>
-						<otherwise>
+										<attribute name="xml:lang">
+											<value-of select="$langtag"/>
+										</attribute>
+									</if>
+									<value-of select="$start-tokens[last()]"/>
+									<copy-of select="$start-node/following-sibling::node()"/>
+								</element>
+								<choose>
+									<when test="string($langtag)!=''">
+										<value-of select="substring-after($restoftext, '$')"/>
+									</when>
+									<otherwise>
+										<value-of select="$restoftext"/>
+									</otherwise>
+								</choose>
+								<copy-of select="$end-node/following-sibling::node()"/>
+							</copy>
+						</variable>
+						<apply-templates select="exsl:node-set($wrapped)/*" mode="LesML:inline">
+							<with-param name="element-name" select="$element-name"/>
+							<with-param name="element-namespace" select="$element-namespace"/>
+							<with-param name="start-sigil" select="$start-sigil"/>
+							<with-param name="end-sigil" select="$end-sigil"/>
+							<with-param name="role" select="$role"/>
+							<with-param name="langtag-supported" select="$langtag-supported"/>
+						</apply-templates>
+					</when>
+					<otherwise>
+						<copy>
+							<copy-of select="@*"/>
 							<apply-templates select="node()" mode="LesML:inline">
 								<with-param name="element-name" select="$element-name"/>
 								<with-param name="element-namespace" select="$element-namespace"/>
@@ -842,9 +842,9 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 								<with-param name="role" select="$role"/>
 								<with-param name="langtag-supported" select="$langtag-supported"/>
 							</apply-templates>
-						</otherwise>
-					</choose>
-				</copy>
+						</copy>
+					</otherwise>
+				</choose>
 			</when>
 			<otherwise>
 				<copy-of select="."/>
@@ -855,82 +855,108 @@ If a copy of the M·P·L was not distributed with this file, You can obtain one
 		<variable name="result">
 			<choose>
 				<when test="self::*">
-					<copy>
-						<copy-of select="@*"/>
-						<variable name="start-node" select="text()[contains(., '{🔗')][1]"/>
-						<choose>
-							<when test="$start-node">
-								<variable name="remaining">
-									<value-of select="substring-after($start-node, '{🔗')"/>
-									<copy-of select="$start-node/following-sibling::node()"/>
-								</variable>
-								<variable name="end-node" select="exsl:node-set($remaining)/node()[self::text() and contains(., '>}') and not(preceding-sibling::*)][1]"/>
-								<variable name="hyperlink">
-									<for-each select="$end-node/preceding-sibling::node()">
-										<value-of select="."/>
-									</for-each>
-									<value-of select="substring-before($end-node, '>}')"/>
-								</variable>
-								<choose>
-									<when test="contains($hyperlink, '&lt;')">
-										<variable name="ltcomponents">
-											<call-template name="LesML:split">
-												<with-param name="source" select="$hyperlink"/>
-												<with-param name="separator" select="'&lt;'"/>
-											</call-template>
-										</variable>
-										<variable name="ltcomponent-nodes" select="exsl:node-set($ltcomponents)/*"/>
-										<variable name="rest">
-											<html:div>
-												<value-of select="substring-after($end-node, '>}')"/>
-												<copy-of select="$end-node/following-sibling::node()"/>
-											</html:div>
-										</variable>
-										<variable name="processed-rest">
-											<apply-templates select="exsl:node-set($rest)/*" mode="LesML:linkify"/>
-										</variable>
-										<copy-of select="$start-node/preceding-sibling::node()"/>
-										<value-of select="substring-before($start-node, '{🔗')"/>
-										<element name="html:a">
-											<attribute name="href">
-												<value-of select="$ltcomponent-nodes[last()]"/>
-											</attribute>
-											<choose>
-												<when test="count($ltcomponent-nodes)>2 or normalize-space($ltcomponent-nodes[1])!=''">
-													<value-of select="$ltcomponent-nodes[1]"/>
-													<for-each select="$ltcomponent-nodes[position()>1 and position()!=last()]">
-														<text>&lt;</text>
-														<value-of select="."/>
-													</for-each>
-												</when>
-												<otherwise>
-													<value-of select="$ltcomponent-nodes[last()]"/>
-												</otherwise>
-											</choose>
-										</element>
-										<copy-of select="exsl:node-set($processed-rest)/*/node()"/>
-									</when>
-									<otherwise>
-										<variable name="rest">
-											<html:div>
-												<copy-of select="$remaining"/>
-											</html:div>
-										</variable>
-										<variable name="processed-rest">
-											<apply-templates select="exsl:node-set($rest)/*" mode="LesML:linkify"/>
-										</variable>
-										<copy-of select="$start-node/preceding-sibling::node()"/>
-										<value-of select="substring-before($start-node, '{🔗')"/>
-										<text>{🔗</text>
-										<copy-of select="exsl:node-set($processed-rest)/*/node()"/>
-									</otherwise>
-								</choose>
-							</when>
-							<otherwise>
+					<variable name="end-node" select="text()[contains(., '>}')][1]"/>
+					<variable name="has-start-node" select="$end-node/preceding-sibling::text()[contains(., '{🔗') and not(following-sibling::*)] or string-length(substring-after($end-node, '{🔗'))>string-length(substring-after($end-node, '>}'))"/>
+					<choose>
+						<when test="$end-node and $has-start-node">
+							<variable name="preceding">
+								<copy-of select="$end-node/preceding-sibling::node()"/>
+								<value-of select="substring-before($end-node, '>}')"/>
+							</variable>
+							<variable name="start-node" select="exsl:node-set($preceding)/text()[contains(., '{🔗') and not(following-sibling::*)][last()]"/>
+							<variable name="start-tokens-fragment">
+								<call-template name="LesML:split">
+									<with-param name="source" select="string($start-node)"/>
+									<with-param name="separator" select="'{🔗'"/>
+								</call-template>
+							</variable>
+							<variable name="start-tokens" select="exsl:node-set($start-tokens-fragment)/*"/>
+							<variable name="hyperlink">
+								<value-of select="$start-tokens[last()]"/>
+								<for-each select="$start-node/following-sibling::node()">
+									<choose>
+										<when test="self::text()">
+											<value-of select="."/>
+										</when>
+										<when test="self::processing-instruction()[local-name()='LesML-Link-Escape']">
+											<text>🔗</text>
+										</when>
+									</choose>
+								</for-each>
+							</variable>
+							<choose>
+								<when test="contains($hyperlink, '&lt;')">
+									<variable name="ltcomponents-fragment">
+										<call-template name="LesML:split">
+											<with-param name="source" select="$hyperlink"/>
+											<with-param name="separator" select="'&lt;'"/>
+										</call-template>
+									</variable>
+									<variable name="ltcomponents" select="exsl:node-set($ltcomponents-fragment)/*"/>
+									<variable name="wrapped">
+										<copy>
+											<copy-of select="@*"/>
+											<copy-of select="$start-node/preceding-sibling::node()"/>
+											<for-each select="$start-tokens[position()!=last()]">
+												<value-of select="."/>
+												<if test="position()!=last()">
+													<text>{🔗</text>
+												</if>
+											</for-each>
+											<element name="html:a">
+												<attribute name="href">
+													<value-of select="$ltcomponents[last()]"/>
+												</attribute>
+												<choose>
+													<when test="count($ltcomponents)>2 or normalize-space($ltcomponents[1])!=''">
+														<for-each select="$ltcomponents[position()!=last()]">
+															<value-of select="."/>
+															<if test="position()!=last()">
+																<text>&lt;</text>
+															</if>
+														</for-each>
+													</when>
+													<otherwise>
+														<value-of select="$ltcomponents[last()]"/>
+													</otherwise>
+												</choose>
+											</element>
+											<value-of select="substring-after($end-node, '>}')"/>
+											<copy-of select="$end-node/following-sibling::node()"/>
+										</copy>
+									</variable>
+									<apply-templates select="exsl:node-set($wrapped)/*" mode="LesML:linkify"/>
+								</when>
+								<otherwise>
+									<variable name="escaped">
+										<copy>
+											<copy-of select="@*"/>
+											<copy-of select="$start-node/preceding-sibling::node()"/>
+											<for-each select="$start-tokens[position()!=last()]">
+												<value-of select="."/>
+												<if test="position()!=last()">
+													<text>{🔗</text>
+												</if>
+											</for-each>
+											<text>{</text>
+											<processing-instruction name="LesML-Link-Escape"/>
+											<copy-of select="$hyperlink"/>
+											<text>>}</text>
+											<value-of select="substring-after($end-node, '>}')"/>
+											<copy-of select="$end-node/following-sibling::node()"/>
+										</copy>
+									</variable>
+									<apply-templates select="exsl:node-set($escaped)/*" mode="LesML:linkify"/>
+								</otherwise>
+							</choose>
+						</when>
+						<otherwise>
+							<copy>
+								<copy-of select="@*"/>
 								<apply-templates select="node()" mode="LesML:linkify"/>
-							</otherwise>
-						</choose>
-					</copy>
+							</copy>
+						</otherwise>
+					</choose>
 				</when>
 				<otherwise>
 					<copy-of select="."/>