]> Lady’s Gitweb - CGirls/blobdiff - request.c
Add request parsing and related tests
[CGirls] / request.c
diff --git a/request.c b/request.c
new file mode 100644 (file)
index 0000000..ca58f0b
--- /dev/null
+++ b/request.c
@@ -0,0 +1,375 @@
+// SPDX-FileCopyrightText: 2025 Lady <https://www.ladys.computer/about/#lady>
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "aa.h"
+#include "request.h"
+
+void cgirls_freereq (cgirls_req req) {
+       free(req.cgirls_project);
+       free(req.cgirls_id);
+       if (req.cgirls_subpath) {
+               size_t i = 0;
+               char* c = req.cgirls_subpath[i];
+               while (c) {
+                       free(c);
+                       c = req.cgirls_subpath[++i];
+               }
+               free(req.cgirls_subpath);
+       }
+       free(req.cgirls_baseid);
+       free(req.cgirls_status.cgirls_message);
+}
+
+cgirls_req cgirls_path2req(char const*const pathinfo) {
+       assert(pathinfo != nullptr);
+
+       // Initialize the result.
+       cgirls_req req = {
+               .cgirls_action = cgirls_vb_index,
+               .cgirls_type = cgirls_mediatype_any,
+               .cgirls_project = nullptr,
+               .cgirls_id = nullptr,
+               .cgirls_subpath = nullptr,
+               .cgirls_baseid = nullptr,
+               .cgirls_status = {
+                       .cgirls_code = 200,
+                       .cgirls_message = nullptr,
+               },
+       };
+
+       // `sont´ stores the start of the next term; `eopi´ stores the end of
+       // the `pathinfo´ string, excluding any extension.
+       char const* sont = pathinfo;
+       char const*const eopi = strchr(pathinfo, 0);
+
+       // The portion of the pathinfo which precedes the first slash gives
+       // the project of the request. If there is no first slash, the
+       // project extends to the end of the string. An empty string is
+       // equivalent to having no project.
+       char const* eopj = strchr(sont, '/');
+       if (!eopj) {
+               eopj = eopi;
+       }
+       if (eopj > sont) {
+               req.cgirls_project = strndup(sont, eopj - sont);
+       }
+       if (eopj < eopi) {
+               sont = eopj + 1;
+       } else {
+               sont = eopi;
+       }
+
+  // The portion of the pathinfo which follows the first slash but
+  // precedes the second gives the verb of the request. If there is no
+  // second slash, the verb extends to the end of the string. If the
+  // verb is not present, or is the empty string, it is treated as
+  // `"index"´, unless the second slash is present, in which case it is
+  // treated as `"unknown"´.
+  //
+  // Verbs may be suffixed with one of a small number of extensions to
+  // request a specific type of response.
+  //
+  // Only a few verbs are recognized (corresponding to the `cgirls_vb´
+  // constants). If a verb is present, but unrecognized, it is assigned
+  // the special value `cgirls_vb_unknown´, which should generally be
+  // interpreted as an error.
+       char const* eovb = strchr(sont, '/');
+       if (!eovb) {
+               eovb = eopi;
+       }
+       char const*const eove = eovb;
+       char* verb = nullptr;
+       if (eovb - sont > 4) {
+               // If the verb is at least 5 characters, extract the extension if
+               // present (it will be the last 4), and then set the end of the
+               // verb to the start of the extension.
+               char const* exts = eovb - 4;
+               do {
+                       // This “loop” encapsulates extension checking for readability.
+                       // If an extension matches, `eovb´ is re·assigned to point to the
+                       // beginning of the extension. Otherwise, the loop exits early
+                       // and `eovb´ keeps pointing at the end of the string.
+                       if (strncmp(exts, ".txt", 4) == 0) {
+                               req.cgirls_type = cgirls_mediatype_txt;
+                       } else if (strncmp(exts, ".htm", 4) == 0) {
+                               req.cgirls_type = cgirls_mediatype_htm;
+                       } else if (strncmp(exts, ".xml", 4) == 0) {
+                               req.cgirls_type = cgirls_mediatype_xml;
+                       } else if (strncmp(exts, ".rdf", 4) == 0) {
+                               req.cgirls_type = cgirls_mediatype_rdf;
+                       } else {
+                               break; // do not re·assign `eovb´
+                       }
+                       eovb = exts;
+               } while (false);
+       }
+       if (eovb > sont) {
+               verb = strndup(sont, eovb - sont);
+       }
+       if (eove < eopi) {
+               sont = eove + 1;
+       } else {
+               sont = eopi;
+       }
+       if (verb) {
+               if (strcmp(verb, "branches") == 0) {
+                       req.cgirls_action = cgirls_vb_branches;
+               } else if (strcmp(verb, "tags") == 0) {
+                       req.cgirls_action = cgirls_vb_tags;
+               } else if (strcmp(verb, "show") == 0) {
+                       req.cgirls_action = cgirls_vb_show;
+               } else if (strcmp(verb, "raw") == 0) {
+                       req.cgirls_action = cgirls_vb_raw;
+               } else if (strcmp(verb, "blame") == 0) {
+                       req.cgirls_action = cgirls_vb_blame;
+               } else if (strcmp(verb, "log") == 0) {
+                       req.cgirls_action = cgirls_vb_log;
+               } else if (strcmp(verb, "shortlog") == 0) {
+                       req.cgirls_action = cgirls_vb_shortlog;
+               } else if (strcmp(verb, "atom") == 0) {
+                       req.cgirls_action = cgirls_vb_atom;
+               } else if (strcmp(verb, "patch") == 0) {
+                       req.cgirls_action = cgirls_vb_patch;
+               } else if (strcmp(verb, "index") != 0) {
+                       req.cgirls_action = cgirls_vb_unknown;
+               }
+               free(verb);
+       } else if (eovb < eopi) {
+               req.cgirls_action = cgirls_vb_unknown;
+       }
+
+  // The portion of the pathinfo which follows the second slash but
+  // precedes the third identifies the identifiers for the request. If
+  // there is no third slash, the identifiers extend to the end of the
+  // string. A single identifier may be given, or two identifiers may
+  // be given separated by two periods. An empty string is equivalent
+  // to no identifier.
+       char const* eoid = strchr(sont, '/');
+       if (!eoid) {
+               eoid = eopi;
+       }
+       char* idid = nullptr;
+       if (eoid > sont) {
+               idid = strndup(sont, eoid - sont);
+       }
+       if (eoid < eopi) {
+               sont = eoid + 1;
+       } else {
+               sont = eopi;
+       }
+       if (idid) {
+               // If the identifier string contains two successive dots, the base
+               // and target identifiers must be extracted and the original
+               // identifier string freed. Otherwise, the identifier string is the
+               // target identifier, and there is no base.
+               char const*const dots = strstr(idid, "..");
+               if (dots) {
+                       char const*const eods = dots + 2;
+                       char const*const eoii = strchr(idid, 0);
+                       if (dots > idid) {
+                               req.cgirls_baseid = strndup(idid, dots - idid);
+                       }
+                       if (eods < eoii) {
+                               req.cgirls_id = strndup(eods, eoii - eods);
+                       }
+                       free(idid);
+               } else {
+                       req.cgirls_id = idid;
+               }
+       }
+
+       // The portion of the pathinfo which follows the third slash is the
+       // subpath of the request. An empty sting is equivalent to having no
+       // subpath. Trailing and successive slashes are dropped.
+       char const* soct = sont;
+       char const* psep = nullptr;
+       size_t npth = 0;
+       while (eopi > soct) {
+               // Count the number of segments in the pathinfo so that the correct
+               // amount of space can be allocated.
+               psep = strchr(soct, '/');
+               if (!psep) {
+                       psep = eopi;
+               }
+               if (psep > soct) {
+                       ++npth;
+               }
+               if (eopi > psep) {
+                       soct = psep + 1;
+               } else {
+                       soct = eopi;
+               }
+       }
+       req.cgirls_subpath = calloc(npth + 1, sizeof(char*));
+       if (!req.cgirls_subpath) {
+               return req;
+       }
+       size_t pthi = 0;
+       while (eopi > sont) {
+               // Add the segments to the newly allocated array.
+               psep = strchr(sont, '/');
+               if (!psep) {
+                       psep = eopi;
+               }
+               if (psep > sont) {
+                       req.cgirls_subpath[pthi++] = strndup(sont, psep - sont);
+               }
+               if (eopi > psep) {
+                       sont = psep + 1;
+               } else {
+                       sont = eopi;
+               }
+       }
+       assert(pthi == npth);
+       req.cgirls_subpath[pthi] = nullptr;
+
+       // Return the result.
+       return req;
+}
+
+char* cgirls_req2path(cgirls_req req) {
+       char* action = "unknown";
+       char* extnsn = "";
+       size_t length = 8; // length of `action´ plus 1, to start
+
+       // Get the length of the various parts, saving the verb and the
+       // extension. This length includes a trailing slash, but in practice
+       // this will be replaced by the final null byte.
+       switch (req.cgirls_action) {
+               case cgirls_vb_index:
+                       action = "index";
+                       length = 6;
+                       break;
+               case cgirls_vb_branches:
+                       action = "branches";
+                       length = 9;
+                       break;
+               case cgirls_vb_tags:
+                       action = "tags";
+                       length = 5;
+                       break;
+               case cgirls_vb_show:
+                       action = "show";
+                       length = 5;
+                       break;
+               case cgirls_vb_raw:
+                       action = "raw";
+                       length = 4;
+                       break;
+               case cgirls_vb_blame:
+                       action = "blame";
+                       length = 6;
+                       break;
+               case cgirls_vb_log:
+                       action = "log";
+                       length = 4;
+                       break;
+               case cgirls_vb_shortlog:
+                       action = "shortlog";
+                       length = 9;
+                       break;
+               case cgirls_vb_atom:
+                       action = "atom";
+                       length = 5;
+                       break;
+               case cgirls_vb_patch:
+                       action = "patch";
+                       length = 6;
+                       break;
+               default:
+                       break;
+       }
+       switch (req.cgirls_type) {
+               case cgirls_mediatype_txt:
+                       extnsn = ".txt";
+                       break;
+               case cgirls_mediatype_htm:
+                       extnsn = ".htm";
+                       break;
+               case cgirls_mediatype_xml:
+                       extnsn = ".xml";
+                       break;
+               case cgirls_mediatype_rdf:
+                       extnsn = ".rdf";
+                       break;
+               default:
+                       break;
+       }
+       if (req.cgirls_project) {
+               length += strlen(req.cgirls_project) + 1;
+               if (req.cgirls_type != cgirls_mediatype_any) {
+                       length += 4;
+               }
+               if (req.cgirls_baseid || req.cgirls_id) {
+                       if (req.cgirls_baseid) {
+                               length += strlen(req.cgirls_baseid) + 2;
+                       }
+                       if (req.cgirls_id) {
+                               length += strlen(req.cgirls_id);
+                       }
+                       length += 1;
+               } else if (req.cgirls_subpath && req.cgirls_subpath[0]) {
+                       length += 3;
+               }
+               if (req.cgirls_subpath) {
+                       size_t i = 0;
+                       char* c = req.cgirls_subpath[i];
+                       while (c) {
+                               length += strlen(c) + 1;
+                               c = req.cgirls_subpath[++i];
+                       }
+               }
+       } else {
+               // If there is no project, then the action must be removed, and the
+               // length is just that of the trailing slash.
+               length = 1;
+       }
+       // Create and compose the final path.
+       char* result = calloc(length, sizeof(char*));
+       if (!result) {
+               return nullptr;
+       }
+       char* cursor = result;
+       if (req.cgirls_project) {
+               cursor = stpcpy(cursor, req.cgirls_project);
+               (cursor++)[0] = '/';
+               cursor = stpcpy(cursor, action);
+               if (req.cgirls_type != cgirls_mediatype_any) {
+                       cursor = stpcpy(cursor, extnsn);
+               }
+               (cursor++)[0] = '/';
+               if (req.cgirls_baseid || req.cgirls_id) {
+                       if (req.cgirls_baseid) {
+                               cursor = stpcpy(cursor, req.cgirls_baseid);
+                               cursor[0] = '.';
+                               cursor[1] = '.';
+                               cursor += 2;
+                       }
+                       if (req.cgirls_id) {
+                               cursor = stpcpy(cursor, req.cgirls_id);
+                       }
+                       (cursor++)[0] = '/';
+               } else if (req.cgirls_subpath && req.cgirls_subpath[0]) {
+                       cursor = stpcpy(cursor, "../");
+               }
+               if (req.cgirls_subpath) {
+                       size_t i = 0;
+                       char* c = req.cgirls_subpath[i];
+                       while (c) {
+                               cursor = stpcpy(cursor, c);
+                               c = req.cgirls_subpath[++i];
+                               (cursor++)[0] = '/';
+                       }
+               }
+       } else {
+               (cursor++)[0] = '/';
+       }
+
+       // At this point, `cursor´ points one ⹐past⹑ the last element of the
+       // array (this is allowed in C), and the last element is a slash.
+       // Rewind and set it to the null byte, and assert that everything was
+       // done correctly.
+       (--cursor)[0] = 0;
+       assert((cursor + 1) - result == length);
+       return result;
+}
This page took 0.261604 seconds and 4 git commands to generate.