]> Lady’s Gitweb - CGirls/commitdiff
Improve handling of strings
authorLady <redacted>
Wed, 19 Mar 2025 01:06:31 +0000 (21:06 -0400)
committerLady <redacted>
Thu, 20 Mar 2025 04:22:48 +0000 (00:22 -0400)
• String constants are now defined with `constexpr´. Because these are
(associated at runtime with) `char const*const´ values, they can be
compared more‐or‐less like the old enum values used to be; because
those pointers point to actual strings, the code for processing them
and serializing them is simplified quite a bit. A few arrays give the
list of available strings; these are ⹐not⹑ (cannot be) `constexpr´s
because while the strings themselves are known at compile time, the
pointers which point to them cannot be. Instead, they are
`static const*const´ arrays; the `static´ keyword keeps their
visibility internal.

⋯ Exceptionally, `cgirls_mtype_any´ is defined as `nullptr´ rather
  than a string of zero length; handling this should always be a
  special case.

• Most of the verbs have been commented out to reduce the amount of
  code needed for an initial working implementation.

• The path·info parsing code has been refactored a bit, making use of a
  new function, `cgirls_gobblepath´ to encapsulate the task of reading
  up thru the next slash. The serialization code has also been
  refactored here and there for tidiness.

• Some comments in `request.c´ used spaces instead of tabs. Whoops!

Note that Clang only supports `constexpr´ in version 19 and later.

expect/pathinfo/09-canonical
expect/pathinfo/10-blushypath
request.c
request.h
test/pathinfo/09-canonical
test/pathinfo/10-blushypath

index 18725357fc5e534c2a1258d7155571c58e56f76f..e99636195daa24d1804c0f9b2d3a94e43c2b34f4 100644 (file)
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log.txt/b..i/s
+p/show.txt/b..i/s
index 38db5b2c6961376bb042407f5d4f3476c8201bd4..1cd47a6f2c322c5cffd425fff3d10ca44adad54a 100644 (file)
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log/b..i/m/n/o
+p/show/b..i/m/n/o
index ca58f0b49a40740b572e02afad94666aeb3b3df3..25f962599b354cf4c588766d39d6f9cb5cceb2e7 100644 (file)
--- a/request.c
+++ b/request.c
@@ -20,13 +20,30 @@ void cgirls_freereq (cgirls_req req) {
        free(req.cgirls_status.cgirls_message);
 }
 
+char* cgirls_gobblepath(char const* ndx[1], char const*const end[1]) {
+       char const* eor = strchr(ndx[0], '/');
+       char* result = nullptr;
+       if (!eor) {
+               eor = end[0];
+       }
+       if (eor > ndx[0]) {
+               result = strndup(ndx[0], eor - ndx[0]);
+       }
+       if (eor < end[0]) {
+               ndx[0] = eor + 1;
+       } else {
+               ndx[0] = end[0];
+       }
+       return result;
+}
+
 cgirls_req cgirls_path2req(char const*const pathinfo) {
        assert(pathinfo != nullptr);
 
        // Initialize the result.
        cgirls_req req = {
-               .cgirls_action = cgirls_vb_index,
-               .cgirls_type = cgirls_mediatype_any,
+               .cgirls_action = cgirls_vb_unknown,
+               .cgirls_type = cgirls_mtype_any,
                .cgirls_project = nullptr,
                .cgirls_id = nullptr,
                .cgirls_subpath = nullptr,
@@ -37,126 +54,68 @@ cgirls_req cgirls_path2req(char const*const pathinfo) {
                },
        };
 
-       // `sont´ stores the start of the next term; `eopi´ stores the end of
-       // the `pathinfo´ string, excluding any extension.
-       char const* sont = pathinfo;
-       char const*const eopi = strchr(pathinfo, 0);
+       // `ndx´ stores the start of the next term; `end´ stores the end of
+       // the `pathinfo´ string.
+       char const* ndx[1] = { pathinfo };
+       char const*const end[1] = { strchr(pathinfo, 0) };
+       assert(end[0] != nullptr);
 
        // The portion of the pathinfo which precedes the first slash gives
        // the project of the request. If there is no first slash, the
        // project extends to the end of the string. An empty string is
        // equivalent to having no project.
-       char const* eopj = strchr(sont, '/');
-       if (!eopj) {
-               eopj = eopi;
-       }
-       if (eopj > sont) {
-               req.cgirls_project = strndup(sont, eopj - sont);
-       }
-       if (eopj < eopi) {
-               sont = eopj + 1;
-       } else {
-               sont = eopi;
-       }
+       req.cgirls_project = cgirls_gobblepath(ndx, end);
 
-  // The portion of the pathinfo which follows the first slash but
-  // precedes the second gives the verb of the request. If there is no
-  // second slash, the verb extends to the end of the string. If the
-  // verb is not present, or is the empty string, it is treated as
-  // `"index"´, unless the second slash is present, in which case it is
-  // treated as `"unknown"´.
-  //
-  // Verbs may be suffixed with one of a small number of extensions to
-  // request a specific type of response.
-  //
-  // Only a few verbs are recognized (corresponding to the `cgirls_vb´
-  // constants). If a verb is present, but unrecognized, it is assigned
-  // the special value `cgirls_vb_unknown´, which should generally be
-  // interpreted as an error.
-       char const* eovb = strchr(sont, '/');
-       if (!eovb) {
-               eovb = eopi;
-       }
-       char const*const eove = eovb;
-       char* verb = nullptr;
-       if (eovb - sont > 4) {
-               // If the verb is at least 5 characters, extract the extension if
-               // present (it will be the last 4), and then set the end of the
-               // verb to the start of the extension.
-               char const* exts = eovb - 4;
-               do {
-                       // This “loop” encapsulates extension checking for readability.
-                       // If an extension matches, `eovb´ is re·assigned to point to the
-                       // beginning of the extension. Otherwise, the loop exits early
-                       // and `eovb´ keeps pointing at the end of the string.
-                       if (strncmp(exts, ".txt", 4) == 0) {
-                               req.cgirls_type = cgirls_mediatype_txt;
-                       } else if (strncmp(exts, ".htm", 4) == 0) {
-                               req.cgirls_type = cgirls_mediatype_htm;
-                       } else if (strncmp(exts, ".xml", 4) == 0) {
-                               req.cgirls_type = cgirls_mediatype_xml;
-                       } else if (strncmp(exts, ".rdf", 4) == 0) {
-                               req.cgirls_type = cgirls_mediatype_rdf;
-                       } else {
-                               break; // do not re·assign `eovb´
+       // The portion of the pathinfo which follows the first slash but
+       // precedes the second gives the action of the request. If there is
+       // no second slash, the action extends to the end of the string. If
+       // the action is not present, or is the empty string, it is treated
+       // as `"index"´, unless the second slash is present, in which case it
+       // is treated as `"unknown"´.
+       //
+       // Actions consist of verbs optionally suffixed with one of a small
+       // number of extensions to request a specific type of response.
+       //
+       // Only a few verbs are recognized (corresponding to the `cgirls_vb´
+       // constants). If a verb is present, but unrecognized, it is assigned
+       // the special value `cgirls_vb_unknown´, which should generally be
+       // interpreted as an error.
+       char* soa = cgirls_gobblepath(ndx, end);
+       if (soa) {
+               char*const eoa = strchr(soa, 0);
+               if (eoa - soa > 4) {
+                       // If the verb is at least 5 characters, extract the extension if
+                       // present (it will be the last 4). Then set the first character
+                       // of the extension to null, effectively trimming the verb.
+                       char* ext = eoa - 4;
+                       for (size_t i = 0; i < cgirls_n·mtypes; ++i) {
+                               cgirls_mtype ixt = cgirls_mtypes[i];
+                               if (strncmp(ext, ixt, 4) == 0) {
+                                       req.cgirls_type = ixt;
+                                       ext[0] = 0;
+                                       break;
+                               }
+                       }
+               }
+               for (size_t i = 0; i < cgirls_n·parsable·vbs; ++i) {
+                       cgirls_vb ivb = cgirls_parsable·vbs[i];
+                       if (strcmp(soa, ivb) == 0) {
+                               req.cgirls_action = ivb;
+                               break;
                        }
-                       eovb = exts;
-               } while (false);
-       }
-       if (eovb > sont) {
-               verb = strndup(sont, eovb - sont);
-       }
-       if (eove < eopi) {
-               sont = eove + 1;
-       } else {
-               sont = eopi;
-       }
-       if (verb) {
-               if (strcmp(verb, "branches") == 0) {
-                       req.cgirls_action = cgirls_vb_branches;
-               } else if (strcmp(verb, "tags") == 0) {
-                       req.cgirls_action = cgirls_vb_tags;
-               } else if (strcmp(verb, "show") == 0) {
-                       req.cgirls_action = cgirls_vb_show;
-               } else if (strcmp(verb, "raw") == 0) {
-                       req.cgirls_action = cgirls_vb_raw;
-               } else if (strcmp(verb, "blame") == 0) {
-                       req.cgirls_action = cgirls_vb_blame;
-               } else if (strcmp(verb, "log") == 0) {
-                       req.cgirls_action = cgirls_vb_log;
-               } else if (strcmp(verb, "shortlog") == 0) {
-                       req.cgirls_action = cgirls_vb_shortlog;
-               } else if (strcmp(verb, "atom") == 0) {
-                       req.cgirls_action = cgirls_vb_atom;
-               } else if (strcmp(verb, "patch") == 0) {
-                       req.cgirls_action = cgirls_vb_patch;
-               } else if (strcmp(verb, "index") != 0) {
-                       req.cgirls_action = cgirls_vb_unknown;
                }
-               free(verb);
-       } else if (eovb < eopi) {
-               req.cgirls_action = cgirls_vb_unknown;
+               free(soa);
+       } else if (ndx[0] == end[0]) {
+               req.cgirls_action = cgirls_vb_index;
        }
 
-  // The portion of the pathinfo which follows the second slash but
-  // precedes the third identifies the identifiers for the request. If
-  // there is no third slash, the identifiers extend to the end of the
-  // string. A single identifier may be given, or two identifiers may
-  // be given separated by two periods. An empty string is equivalent
-  // to no identifier.
-       char const* eoid = strchr(sont, '/');
-       if (!eoid) {
-               eoid = eopi;
-       }
-       char* idid = nullptr;
-       if (eoid > sont) {
-               idid = strndup(sont, eoid - sont);
-       }
-       if (eoid < eopi) {
-               sont = eoid + 1;
-       } else {
-               sont = eopi;
-       }
+       // The portion of the pathinfo which follows the second slash but
+       // precedes the third identifies the identifiers for the request. If
+       // there is no third slash, the identifiers extend to the end of the
+       // string. A single identifier may be given, or two identifiers may
+       // be given separated by two periods. An empty string is equivalent
+       // to no identifier.
+       char* idid = cgirls_gobblepath(ndx, end);
        if (idid) {
                // If the identifier string contains two successive dots, the base
                // and target identifiers must be extracted and the original
@@ -181,126 +140,78 @@ cgirls_req cgirls_path2req(char const*const pathinfo) {
        // The portion of the pathinfo which follows the third slash is the
        // subpath of the request. An empty sting is equivalent to having no
        // subpath. Trailing and successive slashes are dropped.
-       char const* soct = sont;
-       char const* psep = nullptr;
-       size_t npth = 0;
-       while (eopi > soct) {
+       char const* sos = ndx[0];
+       char const* sep = nullptr;
+       size_t n·s = 0;
+       while (end[0] > sos) {
                // Count the number of segments in the pathinfo so that the correct
                // amount of space can be allocated.
-               psep = strchr(soct, '/');
-               if (!psep) {
-                       psep = eopi;
+               sep = strchr(sos, '/');
+               if (!sep) {
+                       sep = end[0];
                }
-               if (psep > soct) {
-                       ++npth;
+               if (sep > sos) {
+                       ++n·s;
                }
-               if (eopi > psep) {
-                       soct = psep + 1;
+               if (end[0] > sep) {
+                       sos = sep + 1;
                } else {
-                       soct = eopi;
+                       sos = end[0];
                }
        }
-       req.cgirls_subpath = calloc(npth + 1, sizeof(char*));
+       req.cgirls_subpath = calloc(n·s + 1, sizeof(char*));
        if (!req.cgirls_subpath) {
                return req;
        }
-       size_t pthi = 0;
-       while (eopi > sont) {
+       size_t i·s = 0;
+       while (end[0] > ndx[0]) {
                // Add the segments to the newly allocated array.
-               psep = strchr(sont, '/');
-               if (!psep) {
-                       psep = eopi;
+               sep = strchr(ndx[0], '/');
+               if (!sep) {
+                       sep = end[0];
                }
-               if (psep > sont) {
-                       req.cgirls_subpath[pthi++] = strndup(sont, psep - sont);
+               if (sep > ndx[0]) {
+                       req.cgirls_subpath[i·s++] = strndup(ndx[0], sep - ndx[0]);
                }
-               if (eopi > psep) {
-                       sont = psep + 1;
+               if (end[0] > sep) {
+                       ndx[0] = sep + 1;
                } else {
-                       sont = eopi;
+                       ndx[0] = end[0];
                }
        }
-       assert(pthi == npth);
-       req.cgirls_subpath[pthi] = nullptr;
+       assert(i·s == n·s);
+       req.cgirls_subpath[i·s] = nullptr;
 
        // Return the result.
        return req;
 }
 
 char* cgirls_req2path(cgirls_req req) {
-       char* action = "unknown";
-       char* extnsn = "";
-       size_t length = 8; // length of `action´ plus 1, to start
+       cgirls_vb vb = cgirls_vb_index;
+       bool has·ids = req.cgirls_baseid || req.cgirls_id;
+       bool has·type = req.cgirls_type;
+       bool has·subpath = req.cgirls_subpath && req.cgirls_subpath[0];
+       size_t length = 0;
 
-       // Get the length of the various parts, saving the verb and the
-       // extension. This length includes a trailing slash, but in practice
-       // this will be replaced by the final null byte.
-       switch (req.cgirls_action) {
-               case cgirls_vb_index:
-                       action = "index";
-                       length = 6;
-                       break;
-               case cgirls_vb_branches:
-                       action = "branches";
-                       length = 9;
-                       break;
-               case cgirls_vb_tags:
-                       action = "tags";
-                       length = 5;
-                       break;
-               case cgirls_vb_show:
-                       action = "show";
-                       length = 5;
-                       break;
-               case cgirls_vb_raw:
-                       action = "raw";
-                       length = 4;
-                       break;
-               case cgirls_vb_blame:
-                       action = "blame";
-                       length = 6;
-                       break;
-               case cgirls_vb_log:
-                       action = "log";
-                       length = 4;
-                       break;
-               case cgirls_vb_shortlog:
-                       action = "shortlog";
-                       length = 9;
-                       break;
-               case cgirls_vb_atom:
-                       action = "atom";
-                       length = 5;
-                       break;
-               case cgirls_vb_patch:
-                       action = "patch";
-                       length = 6;
-                       break;
-               default:
-                       break;
-       }
-       switch (req.cgirls_type) {
-               case cgirls_mediatype_txt:
-                       extnsn = ".txt";
-                       break;
-               case cgirls_mediatype_htm:
-                       extnsn = ".htm";
-                       break;
-               case cgirls_mediatype_xml:
-                       extnsn = ".xml";
-                       break;
-               case cgirls_mediatype_rdf:
-                       extnsn = ".rdf";
-                       break;
-               default:
-                       break;
-       }
+       // Get the length of the various parts. This length includes a
+       // trailing slash, but in practice this will be replaced by the final
+       // null byte.
        if (req.cgirls_project) {
                length += strlen(req.cgirls_project) + 1;
-               if (req.cgirls_type != cgirls_mediatype_any) {
-                       length += 4;
+               for (size_t i = 0; i < cgirls_n·vbs; ++i) {
+                       cgirls_vb ivb = cgirls_vbs[i];
+                       if (req.cgirls_action == ivb) {
+                               vb = ivb;
+                               break;
+                       }
+               }
+               if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) {
+                       length += strlen(vb) + 1;
+               }
+               if (has·type) {
+                       length += strlen(req.cgirls_type);
                }
-               if (req.cgirls_baseid || req.cgirls_id) {
+               if (has·ids) {
                        if (req.cgirls_baseid) {
                                length += strlen(req.cgirls_baseid) + 2;
                        }
@@ -308,10 +219,10 @@ char* cgirls_req2path(cgirls_req req) {
                                length += strlen(req.cgirls_id);
                        }
                        length += 1;
-               } else if (req.cgirls_subpath && req.cgirls_subpath[0]) {
+               } else if (has·subpath) {
                        length += 3;
                }
-               if (req.cgirls_subpath) {
+               if (has·subpath) {
                        size_t i = 0;
                        char* c = req.cgirls_subpath[i];
                        while (c) {
@@ -333,26 +244,26 @@ char* cgirls_req2path(cgirls_req req) {
        if (req.cgirls_project) {
                cursor = stpcpy(cursor, req.cgirls_project);
                (cursor++)[0] = '/';
-               cursor = stpcpy(cursor, action);
-               if (req.cgirls_type != cgirls_mediatype_any) {
-                       cursor = stpcpy(cursor, extnsn);
+               if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) {
+                       cursor = stpcpy(cursor, vb);
+                       if (has·type) {
+                               cursor = stpcpy(cursor, req.cgirls_type);
+                       }
+                       (cursor++)[0] = '/';
                }
-               (cursor++)[0] = '/';
-               if (req.cgirls_baseid || req.cgirls_id) {
+               if (has·ids) {
                        if (req.cgirls_baseid) {
                                cursor = stpcpy(cursor, req.cgirls_baseid);
-                               cursor[0] = '.';
-                               cursor[1] = '.';
-                               cursor += 2;
+                               cursor = stpcpy(cursor, "..");
                        }
                        if (req.cgirls_id) {
                                cursor = stpcpy(cursor, req.cgirls_id);
                        }
                        (cursor++)[0] = '/';
-               } else if (req.cgirls_subpath && req.cgirls_subpath[0]) {
+               } else if (has·subpath) {
                        cursor = stpcpy(cursor, "../");
                }
-               if (req.cgirls_subpath) {
+               if (has·subpath) {
                        size_t i = 0;
                        char* c = req.cgirls_subpath[i];
                        while (c) {
index 2ab70e0a01e87ae681a74ac6850dc0df34bb5c01..e1606dc275a717607f47027a29e1b633a54815cb 100644 (file)
--- a/request.h
+++ b/request.h
@@ -4,37 +4,56 @@
 #ifndef CGIRLS_REQUEST_H
 #define CGIRLS_REQUEST_H
 
-enum cgirls_mediatype : unsigned char {
-       // Unspecified media type
-       cgirls_mediatype_any = 0x00,
-       // Text media types
-       cgirls_mediatype_txt = 0x10,
-       cgirls_mediatype_htm = 0x11,
-       // X·M·L media types
-       cgirls_mediatype_xml = 0x20,
-       cgirls_mediatype_rdf = 0x21,
+/*
+The following constant expressions provide recognized media type
+extensions.
+*/
+constexpr char* cgirls_mtype_any = nullptr;
+constexpr char cgirls_mtype_txt[] = ".txt";
+constexpr char cgirls_mtype_htm[] = ".htm";
+constexpr char cgirls_mtype_xml[] = ".xml";
+constexpr char cgirls_mtype_rdf[] = ".rdf";
+typedef char const* cgirls_mtype;
+constexpr size_t cgirls_n·mtypes = 4;
+static cgirls_mtype const cgirls_mtypes[cgirls_n·mtypes] = {
+       cgirls_mtype_txt,
+       cgirls_mtype_htm,
+       cgirls_mtype_xml,
+       cgirls_mtype_rdf,
 };
-typedef enum cgirls_mediatype cgirls_mediatype;
 
-enum cgirls_vb : unsigned char {
-       // Actions in general
-       cgirls_vb_index = 0x00,
-       // Actions on projects
-       cgirls_vb_branches = 0x10,
-       cgirls_vb_tags = 0x11,
-       // Actions on single objects
-       cgirls_vb_show = 0x20,
-       cgirls_vb_raw = 0x21,
-       cgirls_vb_blame = 0x22,
-       // Actions on ranges of objects
-       cgirls_vb_log = 0x30,
-       cgirls_vb_shortlog = 0x31,
-       cgirls_vb_atom = 0x32,
-       cgirls_vb_patch = 0x33,
-       // Unknown verb
-       cgirls_vb_unknown = 0xFF,
+/*
+The following constant expressions provide recognized action verbs.
+*/
+// Actions in general:
+constexpr char cgirls_vb_index[] = "index";
+constexpr char cgirls_vb_unknown[] = "unknown";
+// Actions on projects:
+// constexpr char cgirls_vb_branches[] = "branches";
+// constexpr char cgirls_vb_tags[] = "tags";
+// Actions on single objects:
+constexpr char cgirls_vb_show[] = "show";
+// constexpr char cgirls_vb_raw[] = "raw";
+// constexpr char cgirls_vb_blame[] = "blame";
+// Actions on ranges of commits:
+// constexpr char cgirls_vb_diff[] = "diff";
+// Actions on lists of commits:
+// constexpr char cgirls_vb_log[] = "log";
+// constexpr char cgirls_vb_shortlog[] = "shortlog";
+// constexpr char cgirls_vb_atom[] = "atom";
+// constexpr char cgirls_vb_patch[] = "patch";
+typedef char const* cgirls_vb;
+constexpr size_t cgirls_n·vbs = 3;
+static cgirls_vb const cgirls_vbs[cgirls_n·vbs] = {
+       cgirls_vb_index,
+       cgirls_vb_unknown,
+       cgirls_vb_show,
+};
+constexpr size_t cgirls_n·parsable·vbs = 2;
+static cgirls_vb const cgirls_parsable·vbs[cgirls_n·parsable·vbs] = {
+       cgirls_vb_index,
+       cgirls_vb_show,
 };
-typedef enum cgirls_vb cgirls_vb;
 
 typedef struct cgirls_req_status cgirls_req_status;
 struct cgirls_req_status {
@@ -45,7 +64,7 @@ struct cgirls_req_status {
 typedef struct cgirls_req cgirls_req;
 struct cgirls_req {
        cgirls_vb cgirls_action;
-       cgirls_mediatype cgirls_type;
+       cgirls_mtype cgirls_type;
        char* cgirls_project;
        char* cgirls_id;
        char** cgirls_subpath;
index 18725357fc5e534c2a1258d7155571c58e56f76f..e99636195daa24d1804c0f9b2d3a94e43c2b34f4 100644 (file)
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log.txt/b..i/s
+p/show.txt/b..i/s
index 85d519e652b1194a733183687bfc52bab9fb972a..c2cec3d090f2cb44e1a614536dcd22f705f2df79 100644 (file)
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log/b..i//m//n///o////
+p/show/b..i//m//n///o////
This page took 0.345203 seconds and 4 git commands to generate.