]> Lady’s Gitweb - Codemark/blob - sh/codemark.sh
Initial commit
[Codemark] / sh / codemark.sh
1 #!/usr/bin/env sh
2 # SPDX-FileCopyrightText: 2026 Lady <https://www.ladys.computer/about/#lady>
3 # SPDX-License-Identifier: MPL-2.0
4
5 ## ⋯ 🧮🖍 Codemark ∷ sh ∷ codemark.sh
6 ##
7 ## ⁌ The 🧮🖍 Codemark shell script
8 ##
9 ## ] Copyright © 2026 Lady [@ Ladys Computer].
10 ## ]
11 ## ] This Source Code Form is subject to the terms of the Mozilla
12 ## ] Public License, version 2.0.
13 ## ] If a copy of the M·P·L was not distributed with this file, You can
14 ## ] obtain one at {🔗<https://mozilla.org/MPL/2.0/>}.
15
16 ## Usage :⁠—
17 ##
18 ## »|sh$ sh ./sh/codemark.sh [--] [filename] [syntax]
19
20 set -o 'errexit'
21
22 ## `set -o 'pipefail'´ is disabled because the (now quite old) version
23 ## of Dash bundled with Mac·O·S does not support it.
24 ## If it is ever upgraded to Posix 2024 compliance, then the option can
25 ## (and should) be enabled.
26 ## See the `sh´ directory at
27 ## {🔗<https://github.com/apple-oss-distributions/shell_cmds/>} for
28 ## the current source code for Mac·O·S Dash.
29
30 # set -o 'pipefail'
31
32 ## `LANG´ and `LC_ALL´ are set to `C´ because these scripts assume
33 ## working with U·T·F‐8 strings as opaque series of bytes.
34
35 LANG='C'
36 LC_ALL='C'
37
38 ## § Variable defaults
39 ##
40 ## All of the commands used by this script are overridable with your
41 ## own implementations by setting the corresponding `cmd_COMMAND´
42 ## variable.
43 ## The hope is that this provides a somewhat easier overriding
44 ## mechanism than those built into the shell (e·g defining a function
45 ## of the same name or overriding the `PATH´).
46
47 : "${cmd_AWK:=awk}"
48 : "${cmd_BASENAME:=basename}"
49 : "${cmd_CAT:=cat}"
50 : "${cmd_CKSUM:=cksum}"
51 : "${cmd_CP:=cksum}"
52 : "${cmd_DATE:=date}"
53 : "${cmd_DIRNAME:=dirname}"
54 : "${cmd_MKDIR:=mkdir}"
55 : "${cmd_MV:=cksum}"
56 : "${cmd_PRINTF:=printf}"
57 : "${cmd_REALPATH:=realpath}"
58 : "${cmd_RM:=cksum}"
59 : "${cmd_SED:=sed}"
60 : "${cmd_SORT:=sort}"
61 : "${cmd_TEST:=test}"
62 : "${cmd_TR:=tr}"
63 : "${cmd_XARGS:=xargs}"
64
65 ## Similarly, there are a few special paths that this program needs to
66 ## run.
67 ## Their default values are calculated below.
68
69 thisfile="$(
70 "${cmd_REALPATH}" -- "${0}"
71 )"
72 thisdir="$(
73 "${cmd_DIRNAME}" -- "${thisfile}"
74 )"
75 defaultbuilddir='@build.tmp/🧮🖍'
76 defaultcodemark="$(
77 "${cmd_REALPATH}" -- "${thisdir}"'/..'
78 )"
79 defaultsyntaxdir="${path_CODEMARK}"'/sed/SYNTAXES'
80
81 : "${path_BUILDDIR:=${defaultbuilddir}}"
82 : "${path_CODEMARK:=${defaultcodemark}}"
83 : "${path_SYNTAXDIR:=${defaultsyntaxdir}}"
84
85 ## § Parameter processing
86 ##
87 ## A leading `--´ is allowed (and ignored).
88 ## Otherwise, the first parameter is the source filename, and the
89 ## second the syntax id.
90
91 if "${cmd_TEST}" "${1}" = '--'
92 then :
93 shift
94 fi
95
96 ## If no input file is provided, it defaults to `-´ (standard input).
97
98 if "${cmd_TEST}" -n "${1}" && "${cmd_TEST}" "${1}" != '-'
99 then :
100 inputfile="$(
101 "${cmd_REALPATH}" -- "${1}"
102 )"
103 else :
104 inputfile='-'
105 fi
106
107 ## If no syntax is provided, it is taken from the extension of the
108 ## input file (the characters following the final `.´).
109 ## If the input file does not have an extension, the syntax defaults to
110 ## `lesml´.
111
112 mainsyntax="${2}"
113 if "${cmd_TEST}" -z "${mainsyntax}"
114 then :
115 mainsyntax="$(
116 "${cmd_PRINTF}" '%s\n' "${inputfile}" |
117 "${cmd_SED}" '/[.][^/][^/]*$/!s/$/.lesml/;s/.*[.]\([^/]*\)$/\1/'
118 )"
119 fi
120
121 ## The `sedprog´ variable gives the filename of the Sed program which
122 ## describes the current syntax.
123
124 if "${cmd_TEST}" "${mainsyntax}" = 'lesml'
125 then :
126 sedprog=
127 else :
128 sedprog="${path_SYNTAXDIR}"'/'"${mainsyntax}"'.sed'
129 fi
130
131 ## It is an error if the main syntax is not `lesml´ and has no syntax
132 ## definition.
133
134 if "${cmd_TEST}" -n "${sedprog}" && "${cmd_TEST}" ! -f "${sedprog}"
135 then :
136 "${cmd_PRINTF}" '%s\n' 'Unrecognized syntax: '"${mainsyntax}"'.' >&2
137 exit 1
138 fi
139
140 ## § Processing the file
141 ##
142 ## The input file is processed and saved to a temporary file whose
143 ## filename is generated from the current time and a hash of the
144 ## input filename.
145 ## The hope is that this is enough to prevent collisions (assuming two
146 ## versions of this script do not attempt to process the same file at
147 ## the same second).
148
149 temphash="$(
150 "${cmd_PRINTF}" '%s\n' "${inputfile}" | "${cmd_CKSUM}"
151 )"
152 temptime="$(
153 "${cmd_DATE}" -u '+%Y%m%d%H%M%S'
154 )"
155 tempfile="${path_BUILDDIR}"'/'"${temphash}"'@'"${temptime}"'.lesml'
156
157 ## Assuming the syntax is not `lesml´, the input file is processed to
158 ## produce Les·M·L output.
159 ## This is then saved to the tempfile.
160 ## If the syntax ⹐is⹑ `lesml´, the input file is simply copied.
161
162 if "${cmd_TEST}" "${mainsyntax}" = 'lesml'
163 then :
164 if "${cmd_TEST}" "${inputfile}" = '-'
165 then :
166 "${cmd_CAT}" >|"${tempfile}"
167 else :
168 "${cmd_CP}" -- "${inputfile}" "${tempfile}"
169 fi
170 else :
171 if "${cmd_TEST}" "${inputfile}" = '-'
172 then :
173 "${cmd_SED}" -f "${sedprog}" |
174 "${cmd_SED}" -f "${path_CODEMARK}"'/sed/syntax-comment.sed' |
175 "${cmd_SED}" 's//'"${mainsyntax}"'/g' >|"${tempfile}"
176 else :
177 "${cmd_SED}" -f "${sedprog}" <"${inputfile}" |
178 "${cmd_SED}" -f "${path_CODEMARK}"'/sed/syntax-comment.sed' |
179 "${cmd_SED}" 's//'"${mainsyntax}"'/g' >|"${tempfile}"
180 fi
181 fi
182
183 ## Next, tagged code blocks in the Les·M·L need to be processed.
184 ## First, a list of syntaxes must be collected.
185
186 syntaxes=$(
187 "${cmd_SED}" '/^»|[^ $]*$ /!d;s/»|\([^ $]*\).*/\1/' <"${tempfile}" |
188 "${cmd_SORT}" -u |
189 "${cmd_TR}" -s '\n' ' '
190 )
191
192 ## Each syntax can then be processed.
193 ## Processing blanks out all lines not in the syntax, removes the
194 ## syntax prefix, processes the resulting file, and then restores the
195 ## blanked out lines.
196 ## Unlike with the main syntax, missing syntaxes at this stage are
197 ## simply ignored.
198
199 for s in ${syntaxes}
200 do :
201 syntaxprog="${path_SYNTAXDIR}/${s}.sed"
202 if "${cmd_TEST}" -f "${syntaxprog}"
203 then :
204 "${cmd_MV}" -- "${tempfile}" "${tempfile}"'~'
205 "${cmd_SED}" \
206 -e '/^»|'${s}'$ /!s/.*/\7f/;s/^»|'${s}'$ //' \
207 -f "${syntaxprog}" \
208 -e '/\7f/s/.*//' \
209 <"${tempfile}"'~' |
210 "${cmd_SED}" -f "${path_CODEMARK}"'/sed/syntax-comment.sed' |
211 "${cmd_SED}" 's//'${s}'/g;/./s/^/»/' |
212 "${cmd_AWK}" \
213 -F '\037' \
214 'FNR==NR{a[NR]=$0;next};{r=/^»\|'${s}'\$ /};r{print a[FNR]};!r' \
215 - "${tempfile}"'~' \
216 >|"${tempfile}"
217 "${cmd_RM}" -- "${tempfile}"'~'
218 fi
219 done
220
221 ## Finally, the resulting file is written to standard output and
222 ## deleted.
223
224 if "${cmd_TEST}" -z "$(
225 "${cmd_SED}" -n '/#!lesml/p;q' <"${tempfile}"
226 )"
227 then :
228 defaultheader='#!lesml@'"${lesml_LANG}"'$ profile='"${lesml_PROFILE}"
229 : "${lesml_LANG:=en}"
230 : "${lesml_PROFILE:=urn:fdc:ladys.computer:20260226:codemark:pf:out}"
231 : "${lesml_HEADER:=${defaultheader}}"
232 "${cmd_PRINTF}" '%s\n' "${lesml_HEADER}"
233 fi
234 "${cmd_CAT}" -- "${tempfile}"
235 "${cmd_RM}" -- "${tempfile}"
This page took 0.087624 seconds and 5 git commands to generate.