From: Lady <redacted>
Date: Wed, 19 Mar 2025 01:06:31 +0000 (-0400)
Subject: Improve handling of strings
X-Git-Url: https://git.ladys.computer/CGirls/commitdiff_plain/f5006884a1f4f37bc3c27ab1f3b882d2b36cd052?ds=sidebyside;hp=f6f2fd79a596ecedaadd8b605b7de9d8c662151c

Improve handling of strings

• String constants are now defined with `constexpr´. Because these are
(associated at runtime with) `char const*const´ values, they can be
compared more‐or‐less like the old enum values used to be; because
those pointers point to actual strings, the code for processing them
and serializing them is simplified quite a bit. A few arrays give the
list of available strings; these are ⹐not⹑ (cannot be) `constexpr´s
because while the strings themselves are known at compile time, the
pointers which point to them cannot be. Instead, they are
`static const*const´ arrays; the `static´ keyword keeps their
visibility internal.

⋯ Exceptionally, `cgirls_mtype_any´ is defined as `nullptr´ rather
  than a string of zero length; handling this should always be a
  special case.

• Most of the verbs have been commented out to reduce the amount of
  code needed for an initial working implementation.

• The path·info parsing code has been refactored a bit, making use of a
  new function, `cgirls_gobblepath´ to encapsulate the task of reading
  up thru the next slash. The serialization code has also been
  refactored here and there for tidiness.

• Some comments in `request.c´ used spaces instead of tabs. Whoops!

Note that Clang only supports `constexpr´ in version 19 and later.
---

diff --git a/expect/pathinfo/09-canonical b/expect/pathinfo/09-canonical
index 1872535..e996361 100644
--- a/expect/pathinfo/09-canonical
+++ b/expect/pathinfo/09-canonical
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log.txt/b..i/s
+p/show.txt/b..i/s
diff --git a/expect/pathinfo/10-blushypath b/expect/pathinfo/10-blushypath
index 38db5b2..1cd47a6 100644
--- a/expect/pathinfo/10-blushypath
+++ b/expect/pathinfo/10-blushypath
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log/b..i/m/n/o
+p/show/b..i/m/n/o
diff --git a/request.c b/request.c
index ca58f0b..25f9625 100644
--- a/request.c
+++ b/request.c
@@ -20,13 +20,30 @@ void cgirls_freereq (cgirls_req req) {
 	free(req.cgirls_status.cgirls_message);
 }
 
+char* cgirls_gobblepath(char const* ndx[1], char const*const end[1]) {
+	char const* eor = strchr(ndx[0], '/');
+	char* result = nullptr;
+	if (!eor) {
+		eor = end[0];
+	}
+	if (eor > ndx[0]) {
+		result = strndup(ndx[0], eor - ndx[0]);
+	}
+	if (eor < end[0]) {
+		ndx[0] = eor + 1;
+	} else {
+		ndx[0] = end[0];
+	}
+	return result;
+}
+
 cgirls_req cgirls_path2req(char const*const pathinfo) {
 	assert(pathinfo != nullptr);
 
 	// Initialize the result.
 	cgirls_req req = {
-		.cgirls_action = cgirls_vb_index,
-		.cgirls_type = cgirls_mediatype_any,
+		.cgirls_action = cgirls_vb_unknown,
+		.cgirls_type = cgirls_mtype_any,
 		.cgirls_project = nullptr,
 		.cgirls_id = nullptr,
 		.cgirls_subpath = nullptr,
@@ -37,126 +54,68 @@ cgirls_req cgirls_path2req(char const*const pathinfo) {
 		},
 	};
 
-	// `sont´ stores the start of the next term; `eopi´ stores the end of
-	// the `pathinfo´ string, excluding any extension.
-	char const* sont = pathinfo;
-	char const*const eopi = strchr(pathinfo, 0);
+	// `ndx´ stores the start of the next term; `end´ stores the end of
+	// the `pathinfo´ string.
+	char const* ndx[1] = { pathinfo };
+	char const*const end[1] = { strchr(pathinfo, 0) };
+	assert(end[0] != nullptr);
 
 	// The portion of the pathinfo which precedes the first slash gives
 	// the project of the request. If there is no first slash, the
 	// project extends to the end of the string. An empty string is
 	// equivalent to having no project.
-	char const* eopj = strchr(sont, '/');
-	if (!eopj) {
-		eopj = eopi;
-	}
-	if (eopj > sont) {
-		req.cgirls_project = strndup(sont, eopj - sont);
-	}
-	if (eopj < eopi) {
-		sont = eopj + 1;
-	} else {
-		sont = eopi;
-	}
+	req.cgirls_project = cgirls_gobblepath(ndx, end);
 
-  // The portion of the pathinfo which follows the first slash but
-  // precedes the second gives the verb of the request. If there is no
-  // second slash, the verb extends to the end of the string. If the
-  // verb is not present, or is the empty string, it is treated as
-  // `"index"´, unless the second slash is present, in which case it is
-  // treated as `"unknown"´.
-  //
-  // Verbs may be suffixed with one of a small number of extensions to
-  // request a specific type of response.
-  //
-  // Only a few verbs are recognized (corresponding to the `cgirls_vb´
-  // constants). If a verb is present, but unrecognized, it is assigned
-  // the special value `cgirls_vb_unknown´, which should generally be
-  // interpreted as an error.
-	char const* eovb = strchr(sont, '/');
-	if (!eovb) {
-		eovb = eopi;
-	}
-	char const*const eove = eovb;
-	char* verb = nullptr;
-	if (eovb - sont > 4) {
-		// If the verb is at least 5 characters, extract the extension if
-		// present (it will be the last 4), and then set the end of the
-		// verb to the start of the extension.
-		char const* exts = eovb - 4;
-		do {
-			// This “loop” encapsulates extension checking for readability.
-			// If an extension matches, `eovb´ is re·assigned to point to the
-			// beginning of the extension. Otherwise, the loop exits early
-			// and `eovb´ keeps pointing at the end of the string.
-			if (strncmp(exts, ".txt", 4) == 0) {
-				req.cgirls_type = cgirls_mediatype_txt;
-			} else if (strncmp(exts, ".htm", 4) == 0) {
-				req.cgirls_type = cgirls_mediatype_htm;
-			} else if (strncmp(exts, ".xml", 4) == 0) {
-				req.cgirls_type = cgirls_mediatype_xml;
-			} else if (strncmp(exts, ".rdf", 4) == 0) {
-				req.cgirls_type = cgirls_mediatype_rdf;
-			} else {
-				break; // do not re·assign `eovb´
+	// The portion of the pathinfo which follows the first slash but
+	// precedes the second gives the action of the request. If there is
+	// no second slash, the action extends to the end of the string. If
+	// the action is not present, or is the empty string, it is treated
+	// as `"index"´, unless the second slash is present, in which case it
+	// is treated as `"unknown"´.
+	//
+	// Actions consist of verbs optionally suffixed with one of a small
+	// number of extensions to request a specific type of response.
+	//
+	// Only a few verbs are recognized (corresponding to the `cgirls_vb´
+	// constants). If a verb is present, but unrecognized, it is assigned
+	// the special value `cgirls_vb_unknown´, which should generally be
+	// interpreted as an error.
+	char* soa = cgirls_gobblepath(ndx, end);
+	if (soa) {
+		char*const eoa = strchr(soa, 0);
+		if (eoa - soa > 4) {
+			// If the verb is at least 5 characters, extract the extension if
+			// present (it will be the last 4). Then set the first character
+			// of the extension to null, effectively trimming the verb.
+			char* ext = eoa - 4;
+			for (size_t i = 0; i < cgirls_n·mtypes; ++i) {
+				cgirls_mtype ixt = cgirls_mtypes[i];
+				if (strncmp(ext, ixt, 4) == 0) {
+					req.cgirls_type = ixt;
+					ext[0] = 0;
+					break;
+				}
+			}
+		}
+		for (size_t i = 0; i < cgirls_n·parsable·vbs; ++i) {
+			cgirls_vb ivb = cgirls_parsable·vbs[i];
+			if (strcmp(soa, ivb) == 0) {
+				req.cgirls_action = ivb;
+				break;
 			}
-			eovb = exts;
-		} while (false);
-	}
-	if (eovb > sont) {
-		verb = strndup(sont, eovb - sont);
-	}
-	if (eove < eopi) {
-		sont = eove + 1;
-	} else {
-		sont = eopi;
-	}
-	if (verb) {
-		if (strcmp(verb, "branches") == 0) {
-			req.cgirls_action = cgirls_vb_branches;
-		} else if (strcmp(verb, "tags") == 0) {
-			req.cgirls_action = cgirls_vb_tags;
-		} else if (strcmp(verb, "show") == 0) {
-			req.cgirls_action = cgirls_vb_show;
-		} else if (strcmp(verb, "raw") == 0) {
-			req.cgirls_action = cgirls_vb_raw;
-		} else if (strcmp(verb, "blame") == 0) {
-			req.cgirls_action = cgirls_vb_blame;
-		} else if (strcmp(verb, "log") == 0) {
-			req.cgirls_action = cgirls_vb_log;
-		} else if (strcmp(verb, "shortlog") == 0) {
-			req.cgirls_action = cgirls_vb_shortlog;
-		} else if (strcmp(verb, "atom") == 0) {
-			req.cgirls_action = cgirls_vb_atom;
-		} else if (strcmp(verb, "patch") == 0) {
-			req.cgirls_action = cgirls_vb_patch;
-		} else if (strcmp(verb, "index") != 0) {
-			req.cgirls_action = cgirls_vb_unknown;
 		}
-		free(verb);
-	} else if (eovb < eopi) {
-		req.cgirls_action = cgirls_vb_unknown;
+		free(soa);
+	} else if (ndx[0] == end[0]) {
+		req.cgirls_action = cgirls_vb_index;
 	}
 
-  // The portion of the pathinfo which follows the second slash but
-  // precedes the third identifies the identifiers for the request. If
-  // there is no third slash, the identifiers extend to the end of the
-  // string. A single identifier may be given, or two identifiers may
-  // be given separated by two periods. An empty string is equivalent
-  // to no identifier.
-	char const* eoid = strchr(sont, '/');
-	if (!eoid) {
-		eoid = eopi;
-	}
-	char* idid = nullptr;
-	if (eoid > sont) {
-		idid = strndup(sont, eoid - sont);
-	}
-	if (eoid < eopi) {
-		sont = eoid + 1;
-	} else {
-		sont = eopi;
-	}
+	// The portion of the pathinfo which follows the second slash but
+	// precedes the third identifies the identifiers for the request. If
+	// there is no third slash, the identifiers extend to the end of the
+	// string. A single identifier may be given, or two identifiers may
+	// be given separated by two periods. An empty string is equivalent
+	// to no identifier.
+	char* idid = cgirls_gobblepath(ndx, end);
 	if (idid) {
 		// If the identifier string contains two successive dots, the base
 		// and target identifiers must be extracted and the original
@@ -181,126 +140,78 @@ cgirls_req cgirls_path2req(char const*const pathinfo) {
 	// The portion of the pathinfo which follows the third slash is the
 	// subpath of the request. An empty sting is equivalent to having no
 	// subpath. Trailing and successive slashes are dropped.
-	char const* soct = sont;
-	char const* psep = nullptr;
-	size_t npth = 0;
-	while (eopi > soct) {
+	char const* sos = ndx[0];
+	char const* sep = nullptr;
+	size_t n·s = 0;
+	while (end[0] > sos) {
 		// Count the number of segments in the pathinfo so that the correct
 		// amount of space can be allocated.
-		psep = strchr(soct, '/');
-		if (!psep) {
-			psep = eopi;
+		sep = strchr(sos, '/');
+		if (!sep) {
+			sep = end[0];
 		}
-		if (psep > soct) {
-			++npth;
+		if (sep > sos) {
+			++n·s;
 		}
-		if (eopi > psep) {
-			soct = psep + 1;
+		if (end[0] > sep) {
+			sos = sep + 1;
 		} else {
-			soct = eopi;
+			sos = end[0];
 		}
 	}
-	req.cgirls_subpath = calloc(npth + 1, sizeof(char*));
+	req.cgirls_subpath = calloc(n·s + 1, sizeof(char*));
 	if (!req.cgirls_subpath) {
 		return req;
 	}
-	size_t pthi = 0;
-	while (eopi > sont) {
+	size_t i·s = 0;
+	while (end[0] > ndx[0]) {
 		// Add the segments to the newly allocated array.
-		psep = strchr(sont, '/');
-		if (!psep) {
-			psep = eopi;
+		sep = strchr(ndx[0], '/');
+		if (!sep) {
+			sep = end[0];
 		}
-		if (psep > sont) {
-			req.cgirls_subpath[pthi++] = strndup(sont, psep - sont);
+		if (sep > ndx[0]) {
+			req.cgirls_subpath[i·s++] = strndup(ndx[0], sep - ndx[0]);
 		}
-		if (eopi > psep) {
-			sont = psep + 1;
+		if (end[0] > sep) {
+			ndx[0] = sep + 1;
 		} else {
-			sont = eopi;
+			ndx[0] = end[0];
 		}
 	}
-	assert(pthi == npth);
-	req.cgirls_subpath[pthi] = nullptr;
+	assert(i·s == n·s);
+	req.cgirls_subpath[i·s] = nullptr;
 
 	// Return the result.
 	return req;
 }
 
 char* cgirls_req2path(cgirls_req req) {
-	char* action = "unknown";
-	char* extnsn = "";
-	size_t length = 8; // length of `action´ plus 1, to start
+	cgirls_vb vb = cgirls_vb_index;
+	bool has·ids = req.cgirls_baseid || req.cgirls_id;
+	bool has·type = req.cgirls_type;
+	bool has·subpath = req.cgirls_subpath && req.cgirls_subpath[0];
+	size_t length = 0;
 
-	// Get the length of the various parts, saving the verb and the
-	// extension. This length includes a trailing slash, but in practice
-	// this will be replaced by the final null byte.
-	switch (req.cgirls_action) {
-		case cgirls_vb_index:
-			action = "index";
-			length = 6;
-			break;
-		case cgirls_vb_branches:
-			action = "branches";
-			length = 9;
-			break;
-		case cgirls_vb_tags:
-			action = "tags";
-			length = 5;
-			break;
-		case cgirls_vb_show:
-			action = "show";
-			length = 5;
-			break;
-		case cgirls_vb_raw:
-			action = "raw";
-			length = 4;
-			break;
-		case cgirls_vb_blame:
-			action = "blame";
-			length = 6;
-			break;
-		case cgirls_vb_log:
-			action = "log";
-			length = 4;
-			break;
-		case cgirls_vb_shortlog:
-			action = "shortlog";
-			length = 9;
-			break;
-		case cgirls_vb_atom:
-			action = "atom";
-			length = 5;
-			break;
-		case cgirls_vb_patch:
-			action = "patch";
-			length = 6;
-			break;
-		default:
-			break;
-	}
-	switch (req.cgirls_type) {
-		case cgirls_mediatype_txt:
-			extnsn = ".txt";
-			break;
-		case cgirls_mediatype_htm:
-			extnsn = ".htm";
-			break;
-		case cgirls_mediatype_xml:
-			extnsn = ".xml";
-			break;
-		case cgirls_mediatype_rdf:
-			extnsn = ".rdf";
-			break;
-		default:
-			break;
-	}
+	// Get the length of the various parts. This length includes a
+	// trailing slash, but in practice this will be replaced by the final
+	// null byte.
 	if (req.cgirls_project) {
 		length += strlen(req.cgirls_project) + 1;
-		if (req.cgirls_type != cgirls_mediatype_any) {
-			length += 4;
+		for (size_t i = 0; i < cgirls_n·vbs; ++i) {
+			cgirls_vb ivb = cgirls_vbs[i];
+			if (req.cgirls_action == ivb) {
+				vb = ivb;
+				break;
+			}
+		}
+		if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) {
+			length += strlen(vb) + 1;
+		}
+		if (has·type) {
+			length += strlen(req.cgirls_type);
 		}
-		if (req.cgirls_baseid || req.cgirls_id) {
+		if (has·ids) {
 			if (req.cgirls_baseid) {
 				length += strlen(req.cgirls_baseid) + 2;
 			}
@@ -308,10 +219,10 @@ char* cgirls_req2path(cgirls_req req) {
 				length += strlen(req.cgirls_id);
 			}
 			length += 1;
-		} else if (req.cgirls_subpath && req.cgirls_subpath[0]) {
+		} else if (has·subpath) {
 			length += 3;
 		}
-		if (req.cgirls_subpath) {
+		if (has·subpath) {
 			size_t i = 0;
 			char* c = req.cgirls_subpath[i];
 			while (c) {
@@ -333,26 +244,26 @@ char* cgirls_req2path(cgirls_req req) {
 	if (req.cgirls_project) {
 		cursor = stpcpy(cursor, req.cgirls_project);
 		(cursor++)[0] = '/';
-		cursor = stpcpy(cursor, action);
-		if (req.cgirls_type != cgirls_mediatype_any) {
-			cursor = stpcpy(cursor, extnsn);
+		if (vb != cgirls_vb_index || has·type || has·ids || has·subpath) {
+			cursor = stpcpy(cursor, vb);
+			if (has·type) {
+				cursor = stpcpy(cursor, req.cgirls_type);
+			}
+			(cursor++)[0] = '/';
 		}
-		(cursor++)[0] = '/';
-		if (req.cgirls_baseid || req.cgirls_id) {
+		if (has·ids) {
 			if (req.cgirls_baseid) {
 				cursor = stpcpy(cursor, req.cgirls_baseid);
-				cursor[0] = '.';
-				cursor[1] = '.';
-				cursor += 2;
+				cursor = stpcpy(cursor, "..");
 			}
 			if (req.cgirls_id) {
 				cursor = stpcpy(cursor, req.cgirls_id);
 			}
 			(cursor++)[0] = '/';
-		} else if (req.cgirls_subpath && req.cgirls_subpath[0]) {
+		} else if (has·subpath) {
 			cursor = stpcpy(cursor, "../");
 		}
-		if (req.cgirls_subpath) {
+		if (has·subpath) {
 			size_t i = 0;
 			char* c = req.cgirls_subpath[i];
 			while (c) {
diff --git a/request.h b/request.h
index 2ab70e0..e1606dc 100644
--- a/request.h
+++ b/request.h
@@ -4,37 +4,56 @@
 #ifndef CGIRLS_REQUEST_H
 #define CGIRLS_REQUEST_H
 
-enum cgirls_mediatype : unsigned char {
-	// Unspecified media type
-	cgirls_mediatype_any = 0x00,
-	// Text media types
-	cgirls_mediatype_txt = 0x10,
-	cgirls_mediatype_htm = 0x11,
-	// X·M·L media types
-	cgirls_mediatype_xml = 0x20,
-	cgirls_mediatype_rdf = 0x21,
+/*
+The following constant expressions provide recognized media type
+extensions.
+*/
+constexpr char* cgirls_mtype_any = nullptr;
+constexpr char cgirls_mtype_txt[] = ".txt";
+constexpr char cgirls_mtype_htm[] = ".htm";
+constexpr char cgirls_mtype_xml[] = ".xml";
+constexpr char cgirls_mtype_rdf[] = ".rdf";
+typedef char const* cgirls_mtype;
+constexpr size_t cgirls_n·mtypes = 4;
+static cgirls_mtype const cgirls_mtypes[cgirls_n·mtypes] = {
+	cgirls_mtype_txt,
+	cgirls_mtype_htm,
+	cgirls_mtype_xml,
+	cgirls_mtype_rdf,
 };
-typedef enum cgirls_mediatype cgirls_mediatype;
 
-enum cgirls_vb : unsigned char {
-	// Actions in general
-	cgirls_vb_index = 0x00,
-	// Actions on projects
-	cgirls_vb_branches = 0x10,
-	cgirls_vb_tags = 0x11,
-	// Actions on single objects
-	cgirls_vb_show = 0x20,
-	cgirls_vb_raw = 0x21,
-	cgirls_vb_blame = 0x22,
-	// Actions on ranges of objects
-	cgirls_vb_log = 0x30,
-	cgirls_vb_shortlog = 0x31,
-	cgirls_vb_atom = 0x32,
-	cgirls_vb_patch = 0x33,
-	// Unknown verb
-	cgirls_vb_unknown = 0xFF,
+/*
+The following constant expressions provide recognized action verbs.
+*/
+// Actions in general:
+constexpr char cgirls_vb_index[] = "index";
+constexpr char cgirls_vb_unknown[] = "unknown";
+// Actions on projects:
+// constexpr char cgirls_vb_branches[] = "branches";
+// constexpr char cgirls_vb_tags[] = "tags";
+// Actions on single objects:
+constexpr char cgirls_vb_show[] = "show";
+// constexpr char cgirls_vb_raw[] = "raw";
+// constexpr char cgirls_vb_blame[] = "blame";
+// Actions on ranges of commits:
+// constexpr char cgirls_vb_diff[] = "diff";
+// Actions on lists of commits:
+// constexpr char cgirls_vb_log[] = "log";
+// constexpr char cgirls_vb_shortlog[] = "shortlog";
+// constexpr char cgirls_vb_atom[] = "atom";
+// constexpr char cgirls_vb_patch[] = "patch";
+typedef char const* cgirls_vb;
+constexpr size_t cgirls_n·vbs = 3;
+static cgirls_vb const cgirls_vbs[cgirls_n·vbs] = {
+	cgirls_vb_index,
+	cgirls_vb_unknown,
+	cgirls_vb_show,
+};
+constexpr size_t cgirls_n·parsable·vbs = 2;
+static cgirls_vb const cgirls_parsable·vbs[cgirls_n·parsable·vbs] = {
+	cgirls_vb_index,
+	cgirls_vb_show,
 };
-typedef enum cgirls_vb cgirls_vb;
 
 typedef struct cgirls_req_status cgirls_req_status;
 struct cgirls_req_status {
@@ -45,7 +64,7 @@ struct cgirls_req_status {
 typedef struct cgirls_req cgirls_req;
 struct cgirls_req {
 	cgirls_vb cgirls_action;
-	cgirls_mediatype cgirls_type;
+	cgirls_mtype cgirls_type;
 	char* cgirls_project;
 	char* cgirls_id;
 	char** cgirls_subpath;
diff --git a/test/pathinfo/09-canonical b/test/pathinfo/09-canonical
index 1872535..e996361 100644
--- a/test/pathinfo/09-canonical
+++ b/test/pathinfo/09-canonical
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log.txt/b..i/s
+p/show.txt/b..i/s
diff --git a/test/pathinfo/10-blushypath b/test/pathinfo/10-blushypath
index 85d519e..c2cec3d 100644
--- a/test/pathinfo/10-blushypath
+++ b/test/pathinfo/10-blushypath
@@ -1,3 +1,3 @@
 # SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
 # SPDX-License-Identifier: CC0-1.0
-p/log/b..i//m//n///o////
+p/show/b..i//m//n///o////