From: Lady Date: Thu, 20 Mar 2025 03:59:56 +0000 (-0400) Subject: Document and improve `request.h´ X-Git-Url: https://git.ladys.computer/CGirls/commitdiff_plain/259adc84f29a70aa2ba33f1eaa7f6a461bb51759 Document and improve `request.h´ Mostly, this commit just adds documentation comments to `request.h´ to fully explain its behaviour, including renaming some things for stylistic reasons. How·ever, it does make one significant change: It reverts the definition of `cgirls_mtype´ and `cgirls_vb´ back to enums. Previously, I thought that it might be possible to be clever and define these as `constexpr´ strings. That would enable them to be serialized by string value and compared by pointer value, perhaps offering the best of both worlds. And this worked in initial tests! Unfortunately, `constexpr´ declarations have internal linkage, which means that a ⹐different⹑ object is created for each translation unit (file). This renders the existence of these `constexpr´s essentially use·less in a header file, since nobody outside of `request.c´ will have the same pointers that `request.c´ has. (This C “feature” is presumably to help guarantee constancy, since anything with `extern´ linkage cannot be truly guaranteed to be constant.) The new approach goes back to enums but provides `static const*const´ arrays which map those enums to string values. This of course means the enums need to be roughly sequential, and one needs to check that a given index is actually in bounds and does not point to `nullptr´ before using its associated value. Defining the enums as `unsigned char´ at least means they can never be negative. --- diff --git a/cgirls-test-pathinfo.c b/cgirls-test-pathinfo.c index 160797c..27f3327 100644 --- a/cgirls-test-pathinfo.c +++ b/cgirls-test-pathinfo.c @@ -26,20 +26,20 @@ int cmd_main(int argc, [[maybe_unused]] const char* argv[argc+1]) { return EXIT_FAILURE; } } - cgirls_req req = cgirls_path2req(line); + cgirls_req req = cgirls_path·to·req(line); if (!empty) { free(line); } - line = cgirls_req2path(req); - cgirls_freereq(req); + line = cgirls_req·to·path(req); + cgirls_req·free(req); if (!line) { free(lineptr[0]); fprintf(stderr, "Error: Failed to allocate string for path.\n"); return EXIT_FAILURE; } - req = cgirls_path2req(line); - reline = cgirls_req2path(req); - cgirls_freereq(req); + req = cgirls_path·to·req(line); + reline = cgirls_req·to·path(req); + cgirls_req·free(req); if (!reline) { free(lineptr[0]); free(line); diff --git a/request.c b/request.c index 25f9625..b868812 100644 --- a/request.c +++ b/request.c @@ -4,23 +4,46 @@ #include "aa.h" #include "request.h" -void cgirls_freereq (cgirls_req req) { - free(req.cgirls_project); - free(req.cgirls_id); - if (req.cgirls_subpath) { +static char const*const cgirls_mtypes[] = { + [cgirls_mtype_txt] = ".txt", + [cgirls_mtype_htm] = ".htm", + [cgirls_mtype_xml] = ".xml", + [cgirls_mtype_rdf] = ".rdf", +}; +constexpr size_t cgirls_n·mtypes = + sizeof(cgirls_mtypes) / sizeof(char*); + +static char const*const cgirls_vbs[] = { + [cgirls_vb_unknown] = "unknown", + [cgirls_vb_index] = "index", + [cgirls_vb_show] = "show", +}; +constexpr size_t cgirls_n·vbs = + sizeof(cgirls_vbs) / sizeof(char*); +static cgirls_vb const cgirls_parsable·vbs[] = { + cgirls_vb_index, + cgirls_vb_show, +}; +constexpr size_t cgirls_n·parsable·vbs = + sizeof(cgirls_parsable·vbs) / sizeof(cgirls_vb); + +void cgirls_req·free (cgirls_req req) { + free(req.project); + free(req.id_THIS_WILL_CHANGE); + if (req.subpath) { size_t i = 0; - char* c = req.cgirls_subpath[i]; + char* c = req.subpath[i]; while (c) { free(c); - c = req.cgirls_subpath[++i]; + c = req.subpath[++i]; } - free(req.cgirls_subpath); + free(req.subpath); } - free(req.cgirls_baseid); - free(req.cgirls_status.cgirls_message); + free(req.baseid_THIS_WILL_CHANGE); + free(req.status.message); } -char* cgirls_gobblepath(char const* ndx[1], char const*const end[1]) { +static char* cgirls_gobble·path(char const* ndx[1], char const*const end[1]) { char const* eor = strchr(ndx[0], '/'); char* result = nullptr; if (!eor) { @@ -37,20 +60,20 @@ char* cgirls_gobblepath(char const* ndx[1], char const*const end[1]) { return result; } -cgirls_req cgirls_path2req(char const*const pathinfo) { +cgirls_req cgirls_path·to·req(char const*const pathinfo) { assert(pathinfo != nullptr); // Initialize the result. cgirls_req req = { - .cgirls_action = cgirls_vb_unknown, - .cgirls_type = cgirls_mtype_any, - .cgirls_project = nullptr, - .cgirls_id = nullptr, - .cgirls_subpath = nullptr, - .cgirls_baseid = nullptr, - .cgirls_status = { - .cgirls_code = 200, - .cgirls_message = nullptr, + .verb = cgirls_vb_unknown, + .mtype = cgirls_mtype_any, + .project = nullptr, + .id_THIS_WILL_CHANGE = nullptr, + .subpath = nullptr, + .baseid_THIS_WILL_CHANGE = nullptr, + .status = { + .code = 200, + .message = nullptr, }, }; @@ -64,7 +87,7 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { // the project of the request. If there is no first slash, the // project extends to the end of the string. An empty string is // equivalent to having no project. - req.cgirls_project = cgirls_gobblepath(ndx, end); + req.project = cgirls_gobble·path(ndx, end); // The portion of the pathinfo which follows the first slash but // precedes the second gives the action of the request. If there is @@ -80,7 +103,7 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { // constants). If a verb is present, but unrecognized, it is assigned // the special value `cgirls_vb_unknown´, which should generally be // interpreted as an error. - char* soa = cgirls_gobblepath(ndx, end); + char* soa = cgirls_gobble·path(ndx, end); if (soa) { char*const eoa = strchr(soa, 0); if (eoa - soa > 4) { @@ -88,10 +111,10 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { // present (it will be the last 4). Then set the first character // of the extension to null, effectively trimming the verb. char* ext = eoa - 4; - for (size_t i = 0; i < cgirls_n·mtypes; ++i) { - cgirls_mtype ixt = cgirls_mtypes[i]; - if (strncmp(ext, ixt, 4) == 0) { - req.cgirls_type = ixt; + for (cgirls_mtype i = 0; i < cgirls_n·mtypes; ++i) { + char const*const ixt = cgirls_mtypes[i]; + if (ixt && strncmp(ext, ixt, 4) == 0) { + req.mtype = i; ext[0] = 0; break; } @@ -99,14 +122,15 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { } for (size_t i = 0; i < cgirls_n·parsable·vbs; ++i) { cgirls_vb ivb = cgirls_parsable·vbs[i]; - if (strcmp(soa, ivb) == 0) { - req.cgirls_action = ivb; + char const*const svb = cgirls_vbs[ivb]; + if (svb && strcmp(soa, svb) == 0) { + req.verb = ivb; break; } } free(soa); } else if (ndx[0] == end[0]) { - req.cgirls_action = cgirls_vb_index; + req.verb = cgirls_vb_index; } // The portion of the pathinfo which follows the second slash but @@ -115,7 +139,7 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { // string. A single identifier may be given, or two identifiers may // be given separated by two periods. An empty string is equivalent // to no identifier. - char* idid = cgirls_gobblepath(ndx, end); + char* idid = cgirls_gobble·path(ndx, end); if (idid) { // If the identifier string contains two successive dots, the base // and target identifiers must be extracted and the original @@ -126,14 +150,14 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { char const*const eods = dots + 2; char const*const eoii = strchr(idid, 0); if (dots > idid) { - req.cgirls_baseid = strndup(idid, dots - idid); + req.baseid_THIS_WILL_CHANGE = strndup(idid, dots - idid); } if (eods < eoii) { - req.cgirls_id = strndup(eods, eoii - eods); + req.id_THIS_WILL_CHANGE = strndup(eods, eoii - eods); } free(idid); } else { - req.cgirls_id = idid; + req.id_THIS_WILL_CHANGE = idid; } } @@ -159,8 +183,8 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { sos = end[0]; } } - req.cgirls_subpath = calloc(n·s + 1, sizeof(char*)); - if (!req.cgirls_subpath) { + req.subpath = calloc(n·s + 1, sizeof(char*)); + if (!req.subpath) { return req; } size_t i·s = 0; @@ -171,7 +195,7 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { sep = end[0]; } if (sep > ndx[0]) { - req.cgirls_subpath[i·s++] = strndup(ndx[0], sep - ndx[0]); + req.subpath[i·s++] = strndup(ndx[0], sep - ndx[0]); } if (end[0] > sep) { ndx[0] = sep + 1; @@ -180,43 +204,53 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { } } assert(i·s == n·s); - req.cgirls_subpath[i·s] = nullptr; + req.subpath[i·s] = nullptr; // Return the result. return req; } -char* cgirls_req2path(cgirls_req req) { - cgirls_vb vb = cgirls_vb_index; - bool has·ids = req.cgirls_baseid || req.cgirls_id; - bool has·type = req.cgirls_type; - bool has·subpath = req.cgirls_subpath && req.cgirls_subpath[0]; +char* cgirls_req·to·path(cgirls_req req) { + char const* vb = nullptr; + char const* mtype = nullptr; + bool has·ids = req.baseid_THIS_WILL_CHANGE || req.id_THIS_WILL_CHANGE; + bool has·subpath = req.subpath && req.subpath[0]; size_t length = 0; + // Get the string corresponding to the verb. Do not assume that the + // verb is welbehaved (actually corresponding to an enumeration + // constant). + if (req.verb < cgirls_n·vbs) { + vb = cgirls_vbs[req.verb]; + } + if (!vb) { + vb = cgirls_vbs[cgirls_vb_unknown]; + } + + // Get the string corresponding to the mediatype, or `nullptr´. Do + // not assume that the verb is welbehaved (actually corresponding to + // an enumeration constant). + if (req.mtype < cgirls_n·mtypes) { + mtype = cgirls_mtypes[req.mtype]; + } + // Get the length of the various parts. This length includes a // trailing slash, but in practice this will be replaced by the final // null byte. - if (req.cgirls_project) { - length += strlen(req.cgirls_project) + 1; - for (size_t i = 0; i < cgirls_n·vbs; ++i) { - cgirls_vb ivb = cgirls_vbs[i]; - if (req.cgirls_action == ivb) { - vb = ivb; - break; - } - } - if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) { + if (req.project) { + length += strlen(req.project) + 1; + if (req.verb != cgirls_vb_index || mtype || has·ids || has·subpath) { length += strlen(vb) + 1; } - if (has·type) { - length += strlen(req.cgirls_type); + if (mtype) { + length += strlen(mtype); } if (has·ids) { - if (req.cgirls_baseid) { - length += strlen(req.cgirls_baseid) + 2; + if (req.baseid_THIS_WILL_CHANGE) { + length += strlen(req.baseid_THIS_WILL_CHANGE) + 2; } - if (req.cgirls_id) { - length += strlen(req.cgirls_id); + if (req.id_THIS_WILL_CHANGE) { + length += strlen(req.id_THIS_WILL_CHANGE); } length += 1; } else if (has·subpath) { @@ -224,10 +258,10 @@ char* cgirls_req2path(cgirls_req req) { } if (has·subpath) { size_t i = 0; - char* c = req.cgirls_subpath[i]; + char* c = req.subpath[i]; while (c) { length += strlen(c) + 1; - c = req.cgirls_subpath[++i]; + c = req.subpath[++i]; } } } else { @@ -241,23 +275,23 @@ char* cgirls_req2path(cgirls_req req) { return nullptr; } char* cursor = result; - if (req.cgirls_project) { - cursor = stpcpy(cursor, req.cgirls_project); + if (req.project) { + cursor = stpcpy(cursor, req.project); (cursor++)[0] = '/'; - if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) { + if (req.verb != cgirls_vb_index || mtype || has·ids || has·subpath) { cursor = stpcpy(cursor, vb); - if (has·type) { - cursor = stpcpy(cursor, req.cgirls_type); + if (mtype) { + cursor = stpcpy(cursor, mtype); } (cursor++)[0] = '/'; } if (has·ids) { - if (req.cgirls_baseid) { - cursor = stpcpy(cursor, req.cgirls_baseid); + if (req.baseid_THIS_WILL_CHANGE) { + cursor = stpcpy(cursor, req.baseid_THIS_WILL_CHANGE); cursor = stpcpy(cursor, ".."); } - if (req.cgirls_id) { - cursor = stpcpy(cursor, req.cgirls_id); + if (req.id_THIS_WILL_CHANGE) { + cursor = stpcpy(cursor, req.id_THIS_WILL_CHANGE); } (cursor++)[0] = '/'; } else if (has·subpath) { @@ -265,10 +299,10 @@ char* cgirls_req2path(cgirls_req req) { } if (has·subpath) { size_t i = 0; - char* c = req.cgirls_subpath[i]; + char* c = req.subpath[i]; while (c) { cursor = stpcpy(cursor, c); - c = req.cgirls_subpath[++i]; + c = req.subpath[++i]; (cursor++)[0] = '/'; } } diff --git a/request.h b/request.h index e1606dc..3ff77e2 100644 --- a/request.h +++ b/request.h @@ -1,109 +1,223 @@ // SPDX-FileCopyrightText: 2025 Lady // SPDX-License-Identifier: GPL-2.0-only +/** + ** This file defines types, constants, and function signatures + ** necessary for dealing with the ⹐semantics⹑ of C·Girls requests. + ** (Implementations of these functions are provided in `request.c´.) + **/ + #ifndef CGIRLS_REQUEST_H #define CGIRLS_REQUEST_H -/* -The following constant expressions provide recognized media type -extensions. -*/ -constexpr char* cgirls_mtype_any = nullptr; -constexpr char cgirls_mtype_txt[] = ".txt"; -constexpr char cgirls_mtype_htm[] = ".htm"; -constexpr char cgirls_mtype_xml[] = ".xml"; -constexpr char cgirls_mtype_rdf[] = ".rdf"; -typedef char const* cgirls_mtype; -constexpr size_t cgirls_n·mtypes = 4; -static cgirls_mtype const cgirls_mtypes[cgirls_n·mtypes] = { - cgirls_mtype_txt, - cgirls_mtype_htm, - cgirls_mtype_xml, - cgirls_mtype_rdf, -}; +/** + ** § Types + **//////////////////////////////////////////////////////////////////// +/** + ** ❦ `enum cgirls_mtype´ + ** + ** The `cgirls_mtype´ enumeration is used to indicate recognized + ** mediatype extensions. + **/ +enum cgirls_mtype : unsigned char { +/** + ** The value `cgirls_mtype_any´ indicates no mediatype preference. + **/ + cgirls_mtype_any = 0, + +/** + ** The value `cgirls_mtype_txt´ indicates a preference for + ** `text/plain´ content. + **/ + cgirls_mtype_txt = 1, + +/** + ** The value `cgirls_mtype_htm´ indicates a preference for + ** `text/html´ content. + **/ + cgirls_mtype_htm = 2, + +/** + ** The value `cgirls_mtype_xml´ indicates a preference for + ** `application/xml´ content, ideally with an `´ + ** processing instruction. + **/ + cgirls_mtype_xml = 3, + +/** + ** The value `cgirls_mtype_rdf´ indicates a preference for + ** `application/rdf+xml´ content. + **/ + cgirls_mtype_rdf = 4, -/* -The following constant expressions provide recognized action verbs. -*/ -// Actions in general: -constexpr char cgirls_vb_index[] = "index"; -constexpr char cgirls_vb_unknown[] = "unknown"; -// Actions on projects: -// constexpr char cgirls_vb_branches[] = "branches"; -// constexpr char cgirls_vb_tags[] = "tags"; -// Actions on single objects: -constexpr char cgirls_vb_show[] = "show"; -// constexpr char cgirls_vb_raw[] = "raw"; -// constexpr char cgirls_vb_blame[] = "blame"; -// Actions on ranges of commits: -// constexpr char cgirls_vb_diff[] = "diff"; -// Actions on lists of commits: -// constexpr char cgirls_vb_log[] = "log"; -// constexpr char cgirls_vb_shortlog[] = "shortlog"; -// constexpr char cgirls_vb_atom[] = "atom"; -// constexpr char cgirls_vb_patch[] = "patch"; -typedef char const* cgirls_vb; -constexpr size_t cgirls_n·vbs = 3; -static cgirls_vb const cgirls_vbs[cgirls_n·vbs] = { - cgirls_vb_index, - cgirls_vb_unknown, - cgirls_vb_show, }; -constexpr size_t cgirls_n·parsable·vbs = 2; -static cgirls_vb const cgirls_parsable·vbs[cgirls_n·parsable·vbs] = { - cgirls_vb_index, - cgirls_vb_show, +typedef enum cgirls_mtype cgirls_mtype; + +/** + ** ❦ `enum cgirls_vb´ + ** + ** The `cgirls_vb´ enumeration is used to indicate recognized verbs + ** for requests. + **/ +enum cgirls_vb : unsigned char { + +/** + ** The value `cgirls_vb_unknown´ indicates an unknown or unspecified + ** verb. + **/ + cgirls_vb_unknown = 0, + +/** + ** The value `cgirls_vb_index´ indicates a request for an index of + ** projects. + **/ + cgirls_vb_index = 1, + +/** + ** The value `cgirls_vb_branches´ indicates a request for an index of + ** branches in a given project. The value `cgirls_vb_tags´ indicates a + ** request for an index of tags. + **/ + // cgirls_vb_branches = ??, + // cgirls_vb_tags = ??, + +/** + ** The value `cgirls_vb_show´ indicates a request for an object in a + ** human‐readable manner. The value `cgirls_vb_raw´ indicates a + ** request for the raw contents of an object. The value + ** `cgirls_vb_blame´ indicates a request for a blame of a commit. + **/ + cgirls_vb_show = 2, + // cgirls_vb_raw = ??, + // cgirls_vb_blame = ??, + +/** + ** The value `cgirls_vb_index´ indicates a request for a diff between + ** two commits. + **/ + // cgirls_vb_diff = ??, + +/** + ** The values `cgirls_vb_log´, `cgirls_vb_shortlog´, `cgirls_vb_atom´, + ** and `cgirls_vb_patch´ indicate requests for logs of a number of + ** commits in various formats. + **/ + // cgirls_vb_log = ??, + // cgirls_vb_shortlog = ??, + // cgirls_vb_atom = ??, + // cgirls_vb_patch = ??, + +/** + ** Note that the numbering for verbs does not follow their order in + ** the above list, but rather is fixed to when they were first + ** supported. New verbs may be added in the future. + ** + ** Verbs can be categorized into a few distinct classes :— + ** + ** • Verbs which do not require a project :— `cgirls_vb_index´. + ** + ** • Verbs which require a project, but not a revspec :— + ** `cgirls_vb_branches´, `cgirls_vb_tags´. + ** + ** • Verbs which request information about a single object :— + ** `cgirls_vb_show´, `cgirls_vb_raw´, `cgirls_vb_blame´. + ** + ** • Verbs which compare two commits :— `cgirls_vb_diff´. + ** + ** • Verbs which produce information about an open‐ended number of + ** commits :— `cgirls_vb_log´, `cgirls_vb_shortlog´, + ** `cgirls_vb_atom´, `cgirls_vb_patch´. + **/ }; +typedef enum cgirls_vb cgirls_vb; +/** + ** ❦ `struct cgirls_req_status´ + ** + ** The struct `cgirls_req_status´ wraps a status code and message for + ** a response. + ** + ** The `.message´ is only significant if `.code´ is not `200´. + **/ typedef struct cgirls_req_status cgirls_req_status; struct cgirls_req_status { - unsigned short cgirls_code; - char* cgirls_message; // if `cgirls_code´ is not ok + unsigned short code; + char* message; // if `code´ is not ok }; +/** + ** ❦ `struct cgirls_req´ + ** + ** The struct `cgirls_req´ represents a request. + ** + ** Requests must have a verb, may specify a mediatype extension, and + ** might also reference a project, revspec, and subpath. + ** + ** All requests have a status, which is used to express request + ** validity. If `.status.code´ is not `200´, the request is invalid + ** and a response with the associated code and message is recommended. + **/ typedef struct cgirls_req cgirls_req; struct cgirls_req { - cgirls_vb cgirls_action; - cgirls_mtype cgirls_type; - char* cgirls_project; - char* cgirls_id; - char** cgirls_subpath; - char* cgirls_baseid; - cgirls_req_status cgirls_status; + cgirls_vb verb; + cgirls_mtype mtype; + char* project; + char* id_THIS_WILL_CHANGE; + char** subpath; + char* baseid_THIS_WILL_CHANGE; + cgirls_req_status status; }; -/* -Frees up any dynamically‐allocated memory which was allocated by -`cgirls_path2req´. -*/ -void cgirls_freereq (cgirls_req req); - -/* -Converts the provided “path info” string into a `cgirls_req´ struct -and returns the result. - -This struct contains dynamically‐allocated strings which must be freed -by calling `cgirls_freereq´. - -Maximally, a “path info” string has the following form :— - - {project}/{action}/{baseid}..{id}/{subpath} - -—: (where subpath can contain additional slashes, and action may -optionally include one of a small number of supported extensions). -`baseid´ is optional; if omitted, the dots preceding `id´ are also -dropped. For all other components, all preceding components must be -provided if a given component is provided. -*/ -cgirls_req cgirls_path2req(char const*const pathinfo); +/** + ** § Functions + **//////////////////////////////////////////////////////////////////// +/** + ** ❦ `cgirls_req cgirls_path·to·req(char const*const)´ + ** + ** The `cgirls_path·to·req´ function takes a path string (such as one + ** provided by the C·G·I `PATH_INFO´ environment variable) and returns + ** a `cgirls_req´ which represents its semantics. + ** + ** This resulting struct contains a lot of dynamically‐allocated data, + ** so it ☞︎must☜︎ be freed with `cgirls_req·free´ after use. + ** + ** Maximally, the path string is processed according to the following + ** form :— + ** + ** |`{project}/{action}/{revspec}/{subpath}´ + ** + ** —: (where `{subpath}´ can contain additional slashes, and + ** `{action}´ consists of a verb and optionally a supported mediatype + ** extension). Not all components necessarily need to be specified, + ** and not all possible values are valid or meaningful. + ** + ** In case of an error, `.status.code´ on the returned `cgirls_req´ + ** will be some·thing other than `200´. + **/ +cgirls_req cgirls_path·to·req(char const*const); -/* -Returns the canonical “path info” string which represents the provided -`cgirls_req´. +/** + ** ❦ `void cgirls_req·free(cgirls_req)´ + ** + ** The `cgirls_req·free´ function frees up any dynamically‐allocated + ** memory in the provided `cgirls_req´, assuming that it was created + ** with `cgirls_path·to·req´ or similar. + **/ +void cgirls_req·free(cgirls_req); -Note that if `cgirls_req.cgirls_project´ is the null pointer, the -canonical “path info” string is always the empty string. -*/ -char* cgirls_req2path(cgirls_req); +/** + ** ❦ `char* cgirls_req·to·path(cgirls_req)´ + ** + ** The `cgirls_req·to·path´ function does the reverse of + ** `cgirls_path·to·req´: It takes in a `cgirls_req´ structure and + ** returns the canonical path string which represents it. + ** + ** All possible `cgirls_req´s have a canonical string representation; + ** `cgirls_req·to·path´ will only return `nullptr´ if it + ** + ** It is worth noting that, if the `.project´ is the null pointer, the + ** canonical path string will always be the empty string. + **/ +char* cgirls_req·to·path(cgirls_req); #endif /* CGIRLS_REQUEST_H */