From: Lady Date: Fri, 28 Mar 2025 17:44:52 +0000 (-0400) Subject: Improve documentation generation X-Git-Url: https://git.ladys.computer/CGirls/commitdiff_plain/1d51bcca18b99d2b82d388ca8832ac85c539fdd6 Improve documentation generation • Update Les·M·L to 0.4.0. • Switch to using a local transform, `xslt/documentation.xslt´, which includes the Les·M·L parser instead of using the Les·M·L parser directly. This offers a great deal more configurability and the potential for customization. Styling information is moved into this transform and expanded. `´ inside of `´ is replaced with `´, unless it has a language tag. • Automatically provide syntax hiliting to code blocks generated from header files. It¦s impossible to do this perfectly, but assuming files follow a consistent style, this should handle most cases transparently. It would be good to document what exactly “consistent style” means. ⋯ More generally, it might be worth extracting this documentation generation code (and maybe some stuff in `cgirls.mak´ as well?) out into a separate repository of C tools, but this work will probably be deferred until there is a second C repository which actually needs to make use of it. • Update the markup, and some text, in those documentation comments in `request.h´. --- diff --git a/LesML b/LesML index 27fb10d..16020b1 160000 --- a/LesML +++ b/LesML @@ -1 +1 @@ -Subproject commit 27fb10d1b6e7ce74d4af82888c0ef3fdc6870f8d +Subproject commit 16020b1dc994098540d8189a39151785ece0e793 diff --git a/make/documentation.mak b/make/documentation.mak index cf80196..9946ed8 100644 --- a/make/documentation.mak +++ b/make/documentation.mak @@ -32,7 +32,7 @@ documentation : $(DOCDIR)/README.xhtml $(DOCUMENTATION_FILES) $(DOCDIR)/index.xh # (callable) Strip portions between `(*´ and `*)´ and collapse newlines in the argument. # Backslash sequences in the argument are handled ⅌ `printf´. # Single quotes must not appear in the argument or this will break. -makeunreadable = $(shell LANG=C LC_ALL=C printf '%s\n' '$(subst $(newline),$(space),$1)' | $(SED) 's/^ *//;s/ *$$//;s/ *([*][^*]*[*]) *//g' | $(XARGS) -0 $(PRINTF) '%b\n') +makeunreadable = $(shell LANG=C LC_ALL=C printf '%s\n' '$(subst $(newline),$(space),$1)' | $(SED) 's/^[ ]*//;s/[ ]*$$//;s/[ ]*([*][^*]*[*])[ ]*//g' | $(XARGS) -0 $(PRINTF) '%b\n') define readablesedcmd \\@^/[*]$$@,\\@^[\t]*[*]/$$@d; (*delete multiline comments*) @@ -41,36 +41,56 @@ define readablesedcmd \\@^/[*][*]$$@s@^.*$$@@; (*delete start of documentation comments (leaving newline)*) \\@^ [*][*]//*$$@s@^.*$$@@; (*delete end of documentation comments (leaving newline)*) \\@^ [*][*]$$@s@$$@ @; (*add trailing space to empty documentation comment lines*) -\\@^ [*][*] @!{\\@.@s@^@\t|`@;\\@.@s@$$@´@;}; (*format non·empty lines of code*) -\\@^ [*][*] @{s@^ [*][*] [ \t]*@@;s@ :—@ :⁠—@g;s@« @« @g;s@‹ @‹ @g;s@—: @—⁠: @g;s@ »@ »@g;s@ ›@ ›@g;s@{<\\([^>]*\\)[.]h>}@{🔗`\\1`<./\\1>}@;}; (*drop prefix from documentation lines and format*) +\\@^ [*][*] @!{ (*format non·empty lines of code*) + s@/[*]\\([^* ]*\\)[*]/@\\1@g; (*explicit formatting*) + \\@/[*][^ ]*[*]/@!{ (*autoformat lines without explicit formatting*) + s@^\0043ifndef \\([^ ]*\\)@\0043ifndef ⟨\\1⟩@; (*macro ifndef*) + s@^\0043define \\([^ ]*\\)@\0043define ⟨\\1⟩@; (*macro define*) + s@^\0043endif /[*] \\([^ ]*\\) [*]/@\0043endif /* ⟨\\1⟩ */@; (*macro define*) + \\@\t*[?:]@!s@^\\(\t\t*\\)\\([^ ]*\\);@\\1⟨\\2⟩;@; (*handle (presumed) enum members without assignment*) + \\@\t*[?:]@!s@^\\(\t\t*\\)\\([^ ]*\\) =@\\1⟨\\2⟩ =@; (*handle (presumed) enum members with assignment*) + \\@\t*[?:]@!s@^\\(\t\t*\\)\\([^= ]\\( *[^= ]\\)*\\) \\([^ ][^ ]*\\);@\\1⸤\\2⸥ ⟨\\4⟩;@; (*handle (presumed) struct members*) + s@^\\(constexpr \\)*\\([^\t= ]\\( *[^= ]\\)*\\) \\([^= ][^= ]*\\) =@\\1⸤\\2⸥ ⟨\\4⟩ =@; (*handle file‐scope assignments*) + s@^enum \\([^ ][^ ]*\\)@enum ⸤\\1⸥@; (*enum declarations*) + \\@^enum@s@: \\([^{]*[^{ ]\\)@: ⸤\\1⸥@; (*enum underlying values*) + s@^struct \\([^ ][^ ]*\\)@struct ⸤\\1⸥@; (*struct declarations*) + s@^typedef \\([^ ]\\( *[^ ]\\)*\\) \\([^ ][^ ]*\\);@typedef ⸤\\1⸥ ⸤\\3⸥;@; (*typedef definitions*) + \\@^[^\t ][^\t ]* [^ ][^ ]*(@s@, *@⸥, ⸤@g; (*format function arguments*) + s@^\\([^\t ][^ ]*\\) \\([^ ][^ ]*\\)(\\([^)][^)]*\\))@⸤\\1⸥ ⟨\\2⟩(⸤\\3⸥)@; (*format functions*) + s@\\(//.*\\)$$@⟦\\1⟧@; (*format comments*) + }; + \\@.@s@^@\t|`@; (*format beginning of line*) + \\@.@s@$$@´@; (*format end of line*) +}; +\\@^ [*][*] @{ (*format documentation*) + s@^ [*][*] [ \t]*@@; (*drop documentation leader*) + s@ :—@ :⁠—@g; (*fixup spacing for :—*) + s@« @« @g; (*fixup spacing for «*) + s@‹ @‹ @g; (*fixup spacing for ‹*) + s@—: @—⁠: @g; (*fixup spacing for —:*) + s@ »@ »@g; (*fixup spacing for »*) + s@ ›@ ›@g; (*fixup spacing for ›*) + s@{<\\([^>]*\\)[.]h>}@{🔗`\\1`<./\\1>}@g; (*header file linking*) +}; endef sedcmd := $(call makeunreadable,$(readablesedcmd)) -define readablestylesheet -@charset "UTF-8"; -article{ Margin: Auto; Width: Min-Content; Max-Width: 100% } -article::after{ Display: Block; Width: 31REM; Max-Width: 100%; Min-Width: 100%; Content: "" } -blockquote>pre{ Margin: Auto; Width: 71CH; Max-Width: 100%; White-Space: Pre-Wrap } -blockquote>pre+pre{ Margin-Top: 1LH } -endef -stylesheet := $(call makeunreadable,$(readablestylesheet)) - # The first argument should be shell commands producing Les·M·L; the second should be the title of the page. -processlesml = { $(PRINTF) '%s\n' '' '$2'; { $(PRINTF) '%s\n%s' '' ''; } | xsltproc $(LESML)/parser.xslt - | $(SED) '1{/^' ; } +processlesml = { $(PRINTF) '%s\n%s' '' ''; } | xsltproc --stringparam PROJECT_NAME '$(PROJECT_NAME)' --stringparam TITLE '$2' xslt/documentation.xslt - -$(DOCUMENTATION_FILES) : $(DOCDIR)/%.xhtml : %.h $(LESML)/parser.xslt +$(DOCUMENTATION_FILES) : $(DOCDIR)/%.xhtml : %.h $(LESML)/parser.xslt xslt/documentation.xslt @if $(TEST) '!' -d $(DOCDIR); then $(MKDIR) -p $(DOCDIR); fi @$(PRINTF) '%s\n' 'Building documentation for <$<>…' >&2 - @$(call processlesml,$(PRINTF) '%s\n\n' '#!lesml@en$$' '⁌ Documentation for `<$<>´'; $(SED) '$(sedcmd)' <'$<'; $(PRINTF) '\n\n%s' '⁂' 'A part of {🔗$(PROJECT_NAME) Documentation<./index.xhtml>}.',Documentation for <$<> | $(PROJECT_NAME) Documentation) >|'$@' + @$(call processlesml,$(PRINTF) '%s\n\n' '#!lesml@en$$' '⁌ Documentation for `<$<>´'; $(SED) '$(sedcmd)' <'$<'; $(PRINTF) '\n\n%s' '⁂' 'A part of {🔗$(PROJECT_NAME) Documentation<./index.xhtml>}.',Documentation for <$<>) >|'$@' -$(DOCDIR)/README.xhtml : README $(LESML)/parser.xslt +$(DOCDIR)/README.xhtml : README $(LESML)/parser.xslt xslt/documentation.xslt @$(PRINTF) '%s\n' 'Building documentation README…' >&2 - @$(call processlesml,$(CAT) README; $(PRINTF) '\n\n%s' '⁂' 'A part of {🔗$(PROJECT_NAME) Documentation<./index.xhtml>}.',$(PROJECT_NAME) Readme | $(PROJECT_NAME) Documentation) >|'$@' + @$(call processlesml,$(CAT) README; $(PRINTF) '\n\n%s' '⁂' 'A part of {🔗$(PROJECT_NAME) Documentation<./index.xhtml>}.',$(PROJECT_NAME) Readme) >|'$@' -$(DOCDIR)/index.xhtml : FORCE $(LESML)/parser.xslt +$(DOCDIR)/index.xhtml : FORCE $(LESML)/parser.xslt xslt/documentation.xslt @$(PRINTF) '%s\n' 'Building documentation index…' >&2 - @$(call processlesml,$(PRINTF) '%s\n\n' '#!lesml@en$$' '⁌ $(PROJECT_NAME) Documentation' '• {🔗$(PROJECT_NAME) Readme<./README.xhtml>}'$(foreach header,$(HEADER_FILES), '• {🔗Documentation for `$(header)´<./$(patsubst %.h,./%.xhtml,$(header))>}'),$(PROJECT_NAME) Documentation) >|'$@' + @$(call processlesml,$(PRINTF) '%s\n\n' '#!lesml@en$$' '⁌ $(PROJECT_NAME) Documentation' '• {🔗$(PROJECT_NAME) Readme<./README.xhtml>}'$(foreach header,$(HEADER_FILES), '• {🔗Documentation for `$(header)´<./$(patsubst %.h,./%.xhtml,$(header))>}'),) >|'$@' $(LESML)/parser.xslt : %/parser.xslt : FORCE $(GIT) submodule update --init '$*' diff --git a/request.h b/request.h index 3ff77e2..7e95fbe 100644 --- a/request.h +++ b/request.h @@ -14,38 +14,38 @@ ** § Types **//////////////////////////////////////////////////////////////////// /** - ** ❦ `enum cgirls_mtype´ + ** ❦ `enum ⸤cgirls_mtype⸥´ ** - ** The `cgirls_mtype´ enumeration is used to indicate recognized + ** The `⸤cgirls_mtype⸥´ enumeration is used to indicate recognized ** mediatype extensions. **/ enum cgirls_mtype : unsigned char { /** - ** The value `cgirls_mtype_any´ indicates no mediatype preference. + ** The value `⟨cgirls_mtype_any⟩´ indicates no mediatype preference. **/ cgirls_mtype_any = 0, /** - ** The value `cgirls_mtype_txt´ indicates a preference for + ** The value `⟨cgirls_mtype_txt⟩´ indicates a preference for ** `text/plain´ content. **/ cgirls_mtype_txt = 1, /** - ** The value `cgirls_mtype_htm´ indicates a preference for + ** The value `⟨cgirls_mtype_htm⟩´ indicates a preference for ** `text/html´ content. **/ cgirls_mtype_htm = 2, /** - ** The value `cgirls_mtype_xml´ indicates a preference for + ** The value `⟨cgirls_mtype_xml⟩´ indicates a preference for ** `application/xml´ content, ideally with an `´ ** processing instruction. **/ cgirls_mtype_xml = 3, /** - ** The value `cgirls_mtype_rdf´ indicates a preference for + ** The value `⟨cgirls_mtype_rdf⟩´ indicates a preference for ** `application/rdf+xml´ content. **/ cgirls_mtype_rdf = 4, @@ -54,53 +54,53 @@ enum cgirls_mtype : unsigned char { typedef enum cgirls_mtype cgirls_mtype; /** - ** ❦ `enum cgirls_vb´ + ** ❦ `enum ⸤cgirls_vb⸥´ ** - ** The `cgirls_vb´ enumeration is used to indicate recognized verbs + ** The `⸤cgirls_vb⸥´ enumeration is used to indicate recognized verbs ** for requests. **/ enum cgirls_vb : unsigned char { /** - ** The value `cgirls_vb_unknown´ indicates an unknown or unspecified + ** The value `⟨cgirls_vb_unknown⟩´ indicates an unknown or unspecified ** verb. **/ cgirls_vb_unknown = 0, /** - ** The value `cgirls_vb_index´ indicates a request for an index of + ** The value `⟨cgirls_vb_index⟩´ indicates a request for an index of ** projects. **/ cgirls_vb_index = 1, /** - ** The value `cgirls_vb_branches´ indicates a request for an index of - ** branches in a given project. The value `cgirls_vb_tags´ indicates a - ** request for an index of tags. + ** The value `⟨cgirls_vb_branches⟩´ indicates a request for an index + ** of branches in a given project. The value `⟨cgirls_vb_tags⟩´ + ** indicates a request for an index of tags. **/ // cgirls_vb_branches = ??, // cgirls_vb_tags = ??, /** - ** The value `cgirls_vb_show´ indicates a request for an object in a - ** human‐readable manner. The value `cgirls_vb_raw´ indicates a + ** The value `⟨cgirls_vb_show⟩´ indicates a request for an object in a + ** human‐readable manner. The value `⟨cgirls_vb_raw⟩´ indicates a ** request for the raw contents of an object. The value - ** `cgirls_vb_blame´ indicates a request for a blame of a commit. + ** `⟨cgirls_vb_blame⟩´ indicates a request for a blame of a commit. **/ cgirls_vb_show = 2, // cgirls_vb_raw = ??, // cgirls_vb_blame = ??, /** - ** The value `cgirls_vb_index´ indicates a request for a diff between - ** two commits. + ** The value `⟨cgirls_vb_index⟩´ indicates a request for a diff + ** between two commits. **/ // cgirls_vb_diff = ??, /** - ** The values `cgirls_vb_log´, `cgirls_vb_shortlog´, `cgirls_vb_atom´, - ** and `cgirls_vb_patch´ indicate requests for logs of a number of - ** commits in various formats. + ** The values `⟨cgirls_vb_log⟩´, `⟨cgirls_vb_shortlog⟩´, + ** `⟨cgirls_vb_atom⟩´, and `⟨cgirls_vb_patch⟩´ indicate requests for + ** logs of a number of commits in various formats. **/ // cgirls_vb_log = ??, // cgirls_vb_shortlog = ??, @@ -114,30 +114,30 @@ enum cgirls_vb : unsigned char { ** ** Verbs can be categorized into a few distinct classes :— ** - ** • Verbs which do not require a project :— `cgirls_vb_index´. + ** • Verbs which do not require a project :— `⟨cgirls_vb_index⟩´. ** ** • Verbs which require a project, but not a revspec :— - ** `cgirls_vb_branches´, `cgirls_vb_tags´. + ** `⟨cgirls_vb_branches⟩´, `⟨cgirls_vb_tags⟩´. ** ** • Verbs which request information about a single object :— - ** `cgirls_vb_show´, `cgirls_vb_raw´, `cgirls_vb_blame´. + ** `⟨cgirls_vb_show⟩´, `⟨cgirls_vb_raw⟩´, `⟨cgirls_vb_blame⟩´. ** - ** • Verbs which compare two commits :— `cgirls_vb_diff´. + ** • Verbs which compare two commits :— `⟨cgirls_vb_diff⟩´. ** ** • Verbs which produce information about an open‐ended number of - ** commits :— `cgirls_vb_log´, `cgirls_vb_shortlog´, - ** `cgirls_vb_atom´, `cgirls_vb_patch´. + ** commits :— `⟨cgirls_vb_log⟩´, `⟨cgirls_vb_shortlog⟩´, + ** `⟨cgirls_vb_atom⟩´, `⟨cgirls_vb_patch⟩´. **/ }; typedef enum cgirls_vb cgirls_vb; /** - ** ❦ `struct cgirls_req_status´ + ** ❦ `struct ⸤cgirls_req_status⸥´ ** - ** The struct `cgirls_req_status´ wraps a status code and message for - ** a response. + ** The struct `⸤cgirls_req_status⸥´ wraps a status code and message + ** for a response. ** - ** The `.message´ is only significant if `.code´ is not `200´. + ** The `⟨.message⟩´ is only significant if `⟨.code⟩´ is not `200´. **/ typedef struct cgirls_req_status cgirls_req_status; struct cgirls_req_status { @@ -146,16 +146,19 @@ struct cgirls_req_status { }; /** - ** ❦ `struct cgirls_req´ + ** ❦ `struct ⸤cgirls_req⸥´ ** - ** The struct `cgirls_req´ represents a request. + ** The struct `⸤cgirls_req⸥´ represents a request. ** ** Requests must have a verb, may specify a mediatype extension, and - ** might also reference a project, revspec, and subpath. + ** might also reference a project, revspec, and subpath. The value of + ** `⟨.subpath⟩´, if not `⟨nullptr⟩´, must be a `⟨nullptr⟩´‐terminated + ** array of strings. ** ** All requests have a status, which is used to express request - ** validity. If `.status.code´ is not `200´, the request is invalid - ** and a response with the associated code and message is recommended. + ** validity. If `⟨.status⟩⟨.code⟩´ is not `200´, the request is + ** invalid and a response with the associated code and message is + ** recommended. **/ typedef struct cgirls_req cgirls_req; struct cgirls_req { @@ -172,14 +175,14 @@ struct cgirls_req { ** § Functions **//////////////////////////////////////////////////////////////////// /** - ** ❦ `cgirls_req cgirls_path·to·req(char const*const)´ + ** ❦ `⸤cgirls_req⸥ ⟨cgirls_path·to·req⟩(⸤char const*const⸥)´ ** - ** The `cgirls_path·to·req´ function takes a path string (such as one - ** provided by the C·G·I `PATH_INFO´ environment variable) and returns - ** a `cgirls_req´ which represents its semantics. + ** The `⟨cgirls_path·to·req⟩´ function takes a path string (such as + ** one provided by the C·G·I `⟨PATH_INFO⟩´ environment variable) and + ** returns a `⸤cgirls_req⸥´ which represents its semantics. ** ** This resulting struct contains a lot of dynamically‐allocated data, - ** so it ☞︎must☜︎ be freed with `cgirls_req·free´ after use. + ** so it ☞︎must☜︎ be freed with `⟨cgirls_req·free⟩´ after use. ** ** Maximally, the path string is processed according to the following ** form :— @@ -191,32 +194,35 @@ struct cgirls_req { ** extension). Not all components necessarily need to be specified, ** and not all possible values are valid or meaningful. ** - ** In case of an error, `.status.code´ on the returned `cgirls_req´ - ** will be some·thing other than `200´. + ** In case of an error, `⟨.status⟩⟨.code⟩´ on the returned + ** `⸤cgirls_req⸥´ will be some·thing other than `200´. **/ cgirls_req cgirls_path·to·req(char const*const); /** - ** ❦ `void cgirls_req·free(cgirls_req)´ + ** ❦ `⸤void⸥ ⟨cgirls_req·free⟩(⸤cgirls_req⸥)´ ** - ** The `cgirls_req·free´ function frees up any dynamically‐allocated - ** memory in the provided `cgirls_req´, assuming that it was created - ** with `cgirls_path·to·req´ or similar. + ** The `⟨cgirls_req·free⟩´ function frees up any dynamically‐allocated + ** memory in the provided `⸤cgirls_req⸥´, assuming that it was created + ** with `⟨cgirls_path·to·req⟩´ or similar. **/ void cgirls_req·free(cgirls_req); /** - ** ❦ `char* cgirls_req·to·path(cgirls_req)´ + ** ❦ `⸤char*⸥ ⟨cgirls_req·to·path⟩(⸤cgirls_req⸥)´ ** - ** The `cgirls_req·to·path´ function does the reverse of - ** `cgirls_path·to·req´: It takes in a `cgirls_req´ structure and + ** The `⟨cgirls_req·to·path⟩´ function does the reverse of + ** `⟨cgirls_path·to·req⟩´: It takes in a `⸤cgirls_req⸥´ struct and ** returns the canonical path string which represents it. ** - ** All possible `cgirls_req´s have a canonical string representation; - ** `cgirls_req·to·path´ will only return `nullptr´ if it + ** All possible `⸤cgirls_req⸥´s have a canonical string + ** representation; `⟨cgirls_req·to·path⟩´ will only return `⟨nullptr⟩´ + ** if it fails to allocate memory while constructing the resulting + ** string. Return values are dynamically allocated and must be + ** manually freed with `⟨free⟩´. ** - ** It is worth noting that, if the `.project´ is the null pointer, the - ** canonical path string will always be the empty string. + ** It is worth noting that, if the `⟨.project⟩´ is the null pointer, + ** the canonical path string will always be the empty string. **/ char* cgirls_req·to·path(cgirls_req); diff --git a/xslt/documentation.xslt b/xslt/documentation.xslt new file mode 100644 index 0000000..139cdd5 --- /dev/null +++ b/xslt/documentation.xslt @@ -0,0 +1,83 @@ + + + + +]> + + + + + + + +