From: Lady Date: Wed, 19 Mar 2025 01:06:31 +0000 (-0400) Subject: Improve handling of strings X-Git-Url: https://git.ladys.computer/CGirls/commitdiff_plain/f5006884a1f4f37bc3c27ab1f3b882d2b36cd052 Improve handling of strings • String constants are now defined with `constexpr´. Because these are (associated at runtime with) `char const*const´ values, they can be compared more‐or‐less like the old enum values used to be; because those pointers point to actual strings, the code for processing them and serializing them is simplified quite a bit. A few arrays give the list of available strings; these are ⹐not⹑ (cannot be) `constexpr´s because while the strings themselves are known at compile time, the pointers which point to them cannot be. Instead, they are `static const*const´ arrays; the `static´ keyword keeps their visibility internal. ⋯ Exceptionally, `cgirls_mtype_any´ is defined as `nullptr´ rather than a string of zero length; handling this should always be a special case. • Most of the verbs have been commented out to reduce the amount of code needed for an initial working implementation. • The path·info parsing code has been refactored a bit, making use of a new function, `cgirls_gobblepath´ to encapsulate the task of reading up thru the next slash. The serialization code has also been refactored here and there for tidiness. • Some comments in `request.c´ used spaces instead of tabs. Whoops! Note that Clang only supports `constexpr´ in version 19 and later. --- diff --git a/expect/pathinfo/09-canonical b/expect/pathinfo/09-canonical index 1872535..e996361 100644 --- a/expect/pathinfo/09-canonical +++ b/expect/pathinfo/09-canonical @@ -1,3 +1,3 @@ # SPDX-FileCopyrightText: 2025 Lady # SPDX-License-Identifier: CC0-1.0 -p/log.txt/b..i/s +p/show.txt/b..i/s diff --git a/expect/pathinfo/10-blushypath b/expect/pathinfo/10-blushypath index 38db5b2..1cd47a6 100644 --- a/expect/pathinfo/10-blushypath +++ b/expect/pathinfo/10-blushypath @@ -1,3 +1,3 @@ # SPDX-FileCopyrightText: 2025 Lady # SPDX-License-Identifier: CC0-1.0 -p/log/b..i/m/n/o +p/show/b..i/m/n/o diff --git a/request.c b/request.c index ca58f0b..25f9625 100644 --- a/request.c +++ b/request.c @@ -20,13 +20,30 @@ void cgirls_freereq (cgirls_req req) { free(req.cgirls_status.cgirls_message); } +char* cgirls_gobblepath(char const* ndx[1], char const*const end[1]) { + char const* eor = strchr(ndx[0], '/'); + char* result = nullptr; + if (!eor) { + eor = end[0]; + } + if (eor > ndx[0]) { + result = strndup(ndx[0], eor - ndx[0]); + } + if (eor < end[0]) { + ndx[0] = eor + 1; + } else { + ndx[0] = end[0]; + } + return result; +} + cgirls_req cgirls_path2req(char const*const pathinfo) { assert(pathinfo != nullptr); // Initialize the result. cgirls_req req = { - .cgirls_action = cgirls_vb_index, - .cgirls_type = cgirls_mediatype_any, + .cgirls_action = cgirls_vb_unknown, + .cgirls_type = cgirls_mtype_any, .cgirls_project = nullptr, .cgirls_id = nullptr, .cgirls_subpath = nullptr, @@ -37,126 +54,68 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { }, }; - // `sont´ stores the start of the next term; `eopi´ stores the end of - // the `pathinfo´ string, excluding any extension. - char const* sont = pathinfo; - char const*const eopi = strchr(pathinfo, 0); + // `ndx´ stores the start of the next term; `end´ stores the end of + // the `pathinfo´ string. + char const* ndx[1] = { pathinfo }; + char const*const end[1] = { strchr(pathinfo, 0) }; + assert(end[0] != nullptr); // The portion of the pathinfo which precedes the first slash gives // the project of the request. If there is no first slash, the // project extends to the end of the string. An empty string is // equivalent to having no project. - char const* eopj = strchr(sont, '/'); - if (!eopj) { - eopj = eopi; - } - if (eopj > sont) { - req.cgirls_project = strndup(sont, eopj - sont); - } - if (eopj < eopi) { - sont = eopj + 1; - } else { - sont = eopi; - } + req.cgirls_project = cgirls_gobblepath(ndx, end); - // The portion of the pathinfo which follows the first slash but - // precedes the second gives the verb of the request. If there is no - // second slash, the verb extends to the end of the string. If the - // verb is not present, or is the empty string, it is treated as - // `"index"´, unless the second slash is present, in which case it is - // treated as `"unknown"´. - // - // Verbs may be suffixed with one of a small number of extensions to - // request a specific type of response. - // - // Only a few verbs are recognized (corresponding to the `cgirls_vb´ - // constants). If a verb is present, but unrecognized, it is assigned - // the special value `cgirls_vb_unknown´, which should generally be - // interpreted as an error. - char const* eovb = strchr(sont, '/'); - if (!eovb) { - eovb = eopi; - } - char const*const eove = eovb; - char* verb = nullptr; - if (eovb - sont > 4) { - // If the verb is at least 5 characters, extract the extension if - // present (it will be the last 4), and then set the end of the - // verb to the start of the extension. - char const* exts = eovb - 4; - do { - // This “loop” encapsulates extension checking for readability. - // If an extension matches, `eovb´ is re·assigned to point to the - // beginning of the extension. Otherwise, the loop exits early - // and `eovb´ keeps pointing at the end of the string. - if (strncmp(exts, ".txt", 4) == 0) { - req.cgirls_type = cgirls_mediatype_txt; - } else if (strncmp(exts, ".htm", 4) == 0) { - req.cgirls_type = cgirls_mediatype_htm; - } else if (strncmp(exts, ".xml", 4) == 0) { - req.cgirls_type = cgirls_mediatype_xml; - } else if (strncmp(exts, ".rdf", 4) == 0) { - req.cgirls_type = cgirls_mediatype_rdf; - } else { - break; // do not re·assign `eovb´ + // The portion of the pathinfo which follows the first slash but + // precedes the second gives the action of the request. If there is + // no second slash, the action extends to the end of the string. If + // the action is not present, or is the empty string, it is treated + // as `"index"´, unless the second slash is present, in which case it + // is treated as `"unknown"´. + // + // Actions consist of verbs optionally suffixed with one of a small + // number of extensions to request a specific type of response. + // + // Only a few verbs are recognized (corresponding to the `cgirls_vb´ + // constants). If a verb is present, but unrecognized, it is assigned + // the special value `cgirls_vb_unknown´, which should generally be + // interpreted as an error. + char* soa = cgirls_gobblepath(ndx, end); + if (soa) { + char*const eoa = strchr(soa, 0); + if (eoa - soa > 4) { + // If the verb is at least 5 characters, extract the extension if + // present (it will be the last 4). Then set the first character + // of the extension to null, effectively trimming the verb. + char* ext = eoa - 4; + for (size_t i = 0; i < cgirls_n·mtypes; ++i) { + cgirls_mtype ixt = cgirls_mtypes[i]; + if (strncmp(ext, ixt, 4) == 0) { + req.cgirls_type = ixt; + ext[0] = 0; + break; + } + } + } + for (size_t i = 0; i < cgirls_n·parsable·vbs; ++i) { + cgirls_vb ivb = cgirls_parsable·vbs[i]; + if (strcmp(soa, ivb) == 0) { + req.cgirls_action = ivb; + break; } - eovb = exts; - } while (false); - } - if (eovb > sont) { - verb = strndup(sont, eovb - sont); - } - if (eove < eopi) { - sont = eove + 1; - } else { - sont = eopi; - } - if (verb) { - if (strcmp(verb, "branches") == 0) { - req.cgirls_action = cgirls_vb_branches; - } else if (strcmp(verb, "tags") == 0) { - req.cgirls_action = cgirls_vb_tags; - } else if (strcmp(verb, "show") == 0) { - req.cgirls_action = cgirls_vb_show; - } else if (strcmp(verb, "raw") == 0) { - req.cgirls_action = cgirls_vb_raw; - } else if (strcmp(verb, "blame") == 0) { - req.cgirls_action = cgirls_vb_blame; - } else if (strcmp(verb, "log") == 0) { - req.cgirls_action = cgirls_vb_log; - } else if (strcmp(verb, "shortlog") == 0) { - req.cgirls_action = cgirls_vb_shortlog; - } else if (strcmp(verb, "atom") == 0) { - req.cgirls_action = cgirls_vb_atom; - } else if (strcmp(verb, "patch") == 0) { - req.cgirls_action = cgirls_vb_patch; - } else if (strcmp(verb, "index") != 0) { - req.cgirls_action = cgirls_vb_unknown; } - free(verb); - } else if (eovb < eopi) { - req.cgirls_action = cgirls_vb_unknown; + free(soa); + } else if (ndx[0] == end[0]) { + req.cgirls_action = cgirls_vb_index; } - // The portion of the pathinfo which follows the second slash but - // precedes the third identifies the identifiers for the request. If - // there is no third slash, the identifiers extend to the end of the - // string. A single identifier may be given, or two identifiers may - // be given separated by two periods. An empty string is equivalent - // to no identifier. - char const* eoid = strchr(sont, '/'); - if (!eoid) { - eoid = eopi; - } - char* idid = nullptr; - if (eoid > sont) { - idid = strndup(sont, eoid - sont); - } - if (eoid < eopi) { - sont = eoid + 1; - } else { - sont = eopi; - } + // The portion of the pathinfo which follows the second slash but + // precedes the third identifies the identifiers for the request. If + // there is no third slash, the identifiers extend to the end of the + // string. A single identifier may be given, or two identifiers may + // be given separated by two periods. An empty string is equivalent + // to no identifier. + char* idid = cgirls_gobblepath(ndx, end); if (idid) { // If the identifier string contains two successive dots, the base // and target identifiers must be extracted and the original @@ -181,126 +140,78 @@ cgirls_req cgirls_path2req(char const*const pathinfo) { // The portion of the pathinfo which follows the third slash is the // subpath of the request. An empty sting is equivalent to having no // subpath. Trailing and successive slashes are dropped. - char const* soct = sont; - char const* psep = nullptr; - size_t npth = 0; - while (eopi > soct) { + char const* sos = ndx[0]; + char const* sep = nullptr; + size_t n·s = 0; + while (end[0] > sos) { // Count the number of segments in the pathinfo so that the correct // amount of space can be allocated. - psep = strchr(soct, '/'); - if (!psep) { - psep = eopi; + sep = strchr(sos, '/'); + if (!sep) { + sep = end[0]; } - if (psep > soct) { - ++npth; + if (sep > sos) { + ++n·s; } - if (eopi > psep) { - soct = psep + 1; + if (end[0] > sep) { + sos = sep + 1; } else { - soct = eopi; + sos = end[0]; } } - req.cgirls_subpath = calloc(npth + 1, sizeof(char*)); + req.cgirls_subpath = calloc(n·s + 1, sizeof(char*)); if (!req.cgirls_subpath) { return req; } - size_t pthi = 0; - while (eopi > sont) { + size_t i·s = 0; + while (end[0] > ndx[0]) { // Add the segments to the newly allocated array. - psep = strchr(sont, '/'); - if (!psep) { - psep = eopi; + sep = strchr(ndx[0], '/'); + if (!sep) { + sep = end[0]; } - if (psep > sont) { - req.cgirls_subpath[pthi++] = strndup(sont, psep - sont); + if (sep > ndx[0]) { + req.cgirls_subpath[i·s++] = strndup(ndx[0], sep - ndx[0]); } - if (eopi > psep) { - sont = psep + 1; + if (end[0] > sep) { + ndx[0] = sep + 1; } else { - sont = eopi; + ndx[0] = end[0]; } } - assert(pthi == npth); - req.cgirls_subpath[pthi] = nullptr; + assert(i·s == n·s); + req.cgirls_subpath[i·s] = nullptr; // Return the result. return req; } char* cgirls_req2path(cgirls_req req) { - char* action = "unknown"; - char* extnsn = ""; - size_t length = 8; // length of `action´ plus 1, to start + cgirls_vb vb = cgirls_vb_index; + bool has·ids = req.cgirls_baseid || req.cgirls_id; + bool has·type = req.cgirls_type; + bool has·subpath = req.cgirls_subpath && req.cgirls_subpath[0]; + size_t length = 0; - // Get the length of the various parts, saving the verb and the - // extension. This length includes a trailing slash, but in practice - // this will be replaced by the final null byte. - switch (req.cgirls_action) { - case cgirls_vb_index: - action = "index"; - length = 6; - break; - case cgirls_vb_branches: - action = "branches"; - length = 9; - break; - case cgirls_vb_tags: - action = "tags"; - length = 5; - break; - case cgirls_vb_show: - action = "show"; - length = 5; - break; - case cgirls_vb_raw: - action = "raw"; - length = 4; - break; - case cgirls_vb_blame: - action = "blame"; - length = 6; - break; - case cgirls_vb_log: - action = "log"; - length = 4; - break; - case cgirls_vb_shortlog: - action = "shortlog"; - length = 9; - break; - case cgirls_vb_atom: - action = "atom"; - length = 5; - break; - case cgirls_vb_patch: - action = "patch"; - length = 6; - break; - default: - break; - } - switch (req.cgirls_type) { - case cgirls_mediatype_txt: - extnsn = ".txt"; - break; - case cgirls_mediatype_htm: - extnsn = ".htm"; - break; - case cgirls_mediatype_xml: - extnsn = ".xml"; - break; - case cgirls_mediatype_rdf: - extnsn = ".rdf"; - break; - default: - break; - } + // Get the length of the various parts. This length includes a + // trailing slash, but in practice this will be replaced by the final + // null byte. if (req.cgirls_project) { length += strlen(req.cgirls_project) + 1; - if (req.cgirls_type != cgirls_mediatype_any) { - length += 4; + for (size_t i = 0; i < cgirls_n·vbs; ++i) { + cgirls_vb ivb = cgirls_vbs[i]; + if (req.cgirls_action == ivb) { + vb = ivb; + break; + } + } + if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) { + length += strlen(vb) + 1; + } + if (has·type) { + length += strlen(req.cgirls_type); } - if (req.cgirls_baseid || req.cgirls_id) { + if (has·ids) { if (req.cgirls_baseid) { length += strlen(req.cgirls_baseid) + 2; } @@ -308,10 +219,10 @@ char* cgirls_req2path(cgirls_req req) { length += strlen(req.cgirls_id); } length += 1; - } else if (req.cgirls_subpath && req.cgirls_subpath[0]) { + } else if (has·subpath) { length += 3; } - if (req.cgirls_subpath) { + if (has·subpath) { size_t i = 0; char* c = req.cgirls_subpath[i]; while (c) { @@ -333,26 +244,26 @@ char* cgirls_req2path(cgirls_req req) { if (req.cgirls_project) { cursor = stpcpy(cursor, req.cgirls_project); (cursor++)[0] = '/'; - cursor = stpcpy(cursor, action); - if (req.cgirls_type != cgirls_mediatype_any) { - cursor = stpcpy(cursor, extnsn); + if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) { + cursor = stpcpy(cursor, vb); + if (has·type) { + cursor = stpcpy(cursor, req.cgirls_type); + } + (cursor++)[0] = '/'; } - (cursor++)[0] = '/'; - if (req.cgirls_baseid || req.cgirls_id) { + if (has·ids) { if (req.cgirls_baseid) { cursor = stpcpy(cursor, req.cgirls_baseid); - cursor[0] = '.'; - cursor[1] = '.'; - cursor += 2; + cursor = stpcpy(cursor, ".."); } if (req.cgirls_id) { cursor = stpcpy(cursor, req.cgirls_id); } (cursor++)[0] = '/'; - } else if (req.cgirls_subpath && req.cgirls_subpath[0]) { + } else if (has·subpath) { cursor = stpcpy(cursor, "../"); } - if (req.cgirls_subpath) { + if (has·subpath) { size_t i = 0; char* c = req.cgirls_subpath[i]; while (c) { diff --git a/request.h b/request.h index 2ab70e0..e1606dc 100644 --- a/request.h +++ b/request.h @@ -4,37 +4,56 @@ #ifndef CGIRLS_REQUEST_H #define CGIRLS_REQUEST_H -enum cgirls_mediatype : unsigned char { - // Unspecified media type - cgirls_mediatype_any = 0x00, - // Text media types - cgirls_mediatype_txt = 0x10, - cgirls_mediatype_htm = 0x11, - // X·M·L media types - cgirls_mediatype_xml = 0x20, - cgirls_mediatype_rdf = 0x21, +/* +The following constant expressions provide recognized media type +extensions. +*/ +constexpr char* cgirls_mtype_any = nullptr; +constexpr char cgirls_mtype_txt[] = ".txt"; +constexpr char cgirls_mtype_htm[] = ".htm"; +constexpr char cgirls_mtype_xml[] = ".xml"; +constexpr char cgirls_mtype_rdf[] = ".rdf"; +typedef char const* cgirls_mtype; +constexpr size_t cgirls_n·mtypes = 4; +static cgirls_mtype const cgirls_mtypes[cgirls_n·mtypes] = { + cgirls_mtype_txt, + cgirls_mtype_htm, + cgirls_mtype_xml, + cgirls_mtype_rdf, }; -typedef enum cgirls_mediatype cgirls_mediatype; -enum cgirls_vb : unsigned char { - // Actions in general - cgirls_vb_index = 0x00, - // Actions on projects - cgirls_vb_branches = 0x10, - cgirls_vb_tags = 0x11, - // Actions on single objects - cgirls_vb_show = 0x20, - cgirls_vb_raw = 0x21, - cgirls_vb_blame = 0x22, - // Actions on ranges of objects - cgirls_vb_log = 0x30, - cgirls_vb_shortlog = 0x31, - cgirls_vb_atom = 0x32, - cgirls_vb_patch = 0x33, - // Unknown verb - cgirls_vb_unknown = 0xFF, +/* +The following constant expressions provide recognized action verbs. +*/ +// Actions in general: +constexpr char cgirls_vb_index[] = "index"; +constexpr char cgirls_vb_unknown[] = "unknown"; +// Actions on projects: +// constexpr char cgirls_vb_branches[] = "branches"; +// constexpr char cgirls_vb_tags[] = "tags"; +// Actions on single objects: +constexpr char cgirls_vb_show[] = "show"; +// constexpr char cgirls_vb_raw[] = "raw"; +// constexpr char cgirls_vb_blame[] = "blame"; +// Actions on ranges of commits: +// constexpr char cgirls_vb_diff[] = "diff"; +// Actions on lists of commits: +// constexpr char cgirls_vb_log[] = "log"; +// constexpr char cgirls_vb_shortlog[] = "shortlog"; +// constexpr char cgirls_vb_atom[] = "atom"; +// constexpr char cgirls_vb_patch[] = "patch"; +typedef char const* cgirls_vb; +constexpr size_t cgirls_n·vbs = 3; +static cgirls_vb const cgirls_vbs[cgirls_n·vbs] = { + cgirls_vb_index, + cgirls_vb_unknown, + cgirls_vb_show, +}; +constexpr size_t cgirls_n·parsable·vbs = 2; +static cgirls_vb const cgirls_parsable·vbs[cgirls_n·parsable·vbs] = { + cgirls_vb_index, + cgirls_vb_show, }; -typedef enum cgirls_vb cgirls_vb; typedef struct cgirls_req_status cgirls_req_status; struct cgirls_req_status { @@ -45,7 +64,7 @@ struct cgirls_req_status { typedef struct cgirls_req cgirls_req; struct cgirls_req { cgirls_vb cgirls_action; - cgirls_mediatype cgirls_type; + cgirls_mtype cgirls_type; char* cgirls_project; char* cgirls_id; char** cgirls_subpath; diff --git a/test/pathinfo/09-canonical b/test/pathinfo/09-canonical index 1872535..e996361 100644 --- a/test/pathinfo/09-canonical +++ b/test/pathinfo/09-canonical @@ -1,3 +1,3 @@ # SPDX-FileCopyrightText: 2025 Lady # SPDX-License-Identifier: CC0-1.0 -p/log.txt/b..i/s +p/show.txt/b..i/s diff --git a/test/pathinfo/10-blushypath b/test/pathinfo/10-blushypath index 85d519e..c2cec3d 100644 --- a/test/pathinfo/10-blushypath +++ b/test/pathinfo/10-blushypath @@ -1,3 +1,3 @@ # SPDX-FileCopyrightText: 2025 Lady # SPDX-License-Identifier: CC0-1.0 -p/log/b..i//m//n///o//// +p/show/b..i//m//n///o////