]> Lady’s Gitweb - Pisces/blobdiff - iri.js
More comprehensive support for RFC 3986 & RFC 3987
[Pisces] / iri.js
diff --git a/iri.js b/iri.js
new file mode 100644 (file)
index 0000000..f1abe78
--- /dev/null
+++ b/iri.js
@@ -0,0 +1,372 @@
+// ♓🌟 Piscēs ∷ iri.js
+// ====================================================================
+//
+// Copyright © 2020, 2022 Lady [@ Lady’s Computer].
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
+
+const sub·delims = String.raw`[!\$&'()*+,;=]`;
+const gen·delims = String.raw`[:/?#\[\]@]`;
+//deno-lint-ignore no-unused-vars
+const reserved = String.raw`${gen·delims}|${sub·delims}`;
+const unreserved = String.raw`[A-Za-z0-9\-\._~]`;
+const pct·encoded = String.raw`%[0-9A-Fa-f][0-9A-Fa-f]`;
+const dec·octet = String.raw
+  `[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]`;
+const IPv4address = String.raw
+  `(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})`;
+const h16 = String.raw`[0-9A-Fa-f]{1,4}`;
+const ls32 = String.raw`(?:${h16}):(?:${h16})|${IPv4address}`;
+const IPv6address = String.raw
+  `(?:(?:${h16}):){6}(?:${ls32})|::(?:(?:${h16}):){5}(?:${ls32})|(?:${h16})?::(?:(?:${h16}):){4}(?:${ls32})|(?:(?:(?:${h16}):){0,1}(?:${h16}))?::(?:(?:${h16}):){3}(?:${ls32})|(?:(?:(?:${h16}):){0,2}(?:${h16}))?::(?:(?:${h16}):){2}(?:${ls32})|(?:(?:(?:${h16}):){0,3}(?:${h16}))?::(?:${h16}):(?:${ls32})|(?:(?:(?:${h16}):){0,4}(?:${h16}))?::(?:${ls32})|(?:(?:(?:${h16}):){0,5}(?:${h16}))?::(?:${h16})|(?:(?:(?:${h16}):){0,6}(?:${h16}))?::`;
+const IPvFuture = String.raw
+  `v[0-9A-Fa-f]{1,}\.(?:${unreserved}|${sub·delims}|:)`;
+const IP·literal = String.raw`\[(?:${IPv6address}|${IPvFuture})\]`;
+const port = String.raw`[0-9]*`;
+const scheme = String.raw`[A-Za-z][A-Za-z0-9+\-\.]*`;
+const pchar = String.raw
+  `${unreserved}|${pct·encoded}|${sub·delims}|[:@]`;
+const fragment = String.raw`(?:${pchar}|[/?])*`;
+const query = String.raw`(?:${pchar}|[/?])*`;
+const segment·nz·nc = String.raw
+  `(?:${unreserved}|${pct·encoded}|${sub·delims}|@)+`;
+const segment·nz = String.raw`(?:${pchar})+`;
+const segment = String.raw`(?:${pchar})*`;
+const path·empty = String.raw``;
+const path·rootless = String.raw
+  `(?:${segment·nz})(?:/(?:${segment}))*`;
+const path·noscheme = String.raw
+  `(?:${segment·nz·nc})(?:/(?:${segment}))*`;
+const path·absolute = String.raw
+  `/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`;
+const path·abempty = String.raw`(?:/(?:${segment}))*`;
+//deno-lint-ignore no-unused-vars
+const path = String.raw
+  `${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`;
+const reg·name = String.raw
+  `(?:${unreserved}|${pct·encoded}|${sub·delims})*`;
+const host = String.raw`${IP·literal}|${IPv4address}|${reg·name}`;
+const userinfo = String.raw
+  `(?:${unreserved}|${pct·encoded}|${sub·delims}|:)*`;
+const authority = String.raw
+  `(?:(?:${userinfo})@)?(?:${host})(?::(?:${port}))?`;
+const relative·part = String.raw
+  `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·noscheme})|(?:${path·empty})`;
+const relative·ref = String.raw
+  `(?:${relative·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
+const hier·part = String.raw
+  `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·rootless})|(?:${path·empty})`;
+const absolute·URI = String.raw
+  `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?`;
+const URI = String.raw
+  `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
+const URI·reference = String.raw`(?:${URI})|(?:${relative·ref})`;
+
+const iprivate = String.raw
+  `[\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
+const ucschar = String.raw
+  `[\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E0000}-\u{EFFFD}]`;
+const iunreserved = String.raw`[A-Za-z0-9\-\._~]|${ucschar}`;
+const ipchar = String.raw
+  `${iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
+const ifragment = String.raw`(?:${ipchar}|[/?])*`;
+const iquery = String.raw`(?:${ipchar}|${iprivate}|[/?])*`;
+const isegment·nz·nc = String.raw
+  `(?:${iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
+const isegment·nz = String.raw`(?:${ipchar})+`;
+const isegment = String.raw`(?:${ipchar})*`;
+const ipath·empty = String.raw``;
+const ipath·rootless = String.raw
+  `(?:${isegment·nz})(?:/(?:${isegment}))*`;
+const ipath·noscheme = String.raw
+  `(?:${isegment·nz·nc})(?:/(?:${isegment}))*`;
+const ipath·absolute = String.raw
+  `/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`;
+const ipath·abempty = String.raw`(?:/(?:${isegment}))*`;
+//deno-lint-ignore no-unused-vars
+const ipath = String.raw
+  `${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`;
+const ireg·name = String.raw
+  `(?:${iunreserved}|${pct·encoded}|${sub·delims})*`;
+const ihost = String.raw`${IP·literal}|${IPv4address}|${ireg·name}`;
+const iuserinfo = String.raw
+  `(?:${iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
+const iauthority = String.raw
+  `(?:(?:${iuserinfo})@)?(?:${ihost})(?::(?:${port}))?`;
+const irelative·part = String.raw
+  `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})`;
+const irelative·ref = String.raw
+  `(?:${irelative·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
+const ihier·part = String.raw
+  `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})`;
+const absolute·IRI = String.raw
+  `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?`;
+const IRI = String.raw
+  `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
+const IRI·reference = String.raw`(?:${IRI})|(?:${irelative·ref})`;
+
+export const {
+  isAbsoluteURI, // U·R·I with no fragment
+  isURI,
+  isURIReference,
+  isAbsoluteIRI, // I·R·I with no fragment
+  isIRI,
+  isIRIReference,
+} = Object.fromEntries(
+  Object.entries({
+    isAbsoluteIRI: absolute·IRI,
+    isAbsoluteURI: absolute·URI,
+    isIRI: IRI,
+    isIRIReference: IRI·reference,
+    isURI: URI,
+    isURIReference: URI·reference,
+  }).map(([key, value]) => {
+    const regExp = new RegExp(`^(?:${value})$`, "u");
+    return [
+      key,
+      Object.defineProperties(
+        ($) => typeof $ == "string" && regExp.test($),
+        {
+          name: { value: key },
+          [Symbol.match]: {
+            configurable: true,
+            enumerable: false,
+            get: () => regExp[Symbol.match].bind(regExp),
+            set: undefined,
+          },
+        },
+      ),
+    ];
+  }),
+);
+
+/**
+ * Recomposes an I·R·I reference from its component parts.
+ *
+ * See §5.3 of R·F·C 3986.
+ */
+export const composeReference = ($) => {
+  const result = [];
+  const { scheme, authority, path, query, fragment } = $;
+  if (scheme != null) {
+    result.push(scheme, ":");
+  } else {
+    /* do nothing */
+  }
+  if (authority != null) {
+    result.push("//", authority);
+  } else {
+    /* do nothing */
+  }
+  result.push(path ?? "");
+  if (query != null) {
+    result.push("?", query);
+  } else {
+    /* do nothing */
+  }
+  if (fragment != null) {
+    result.push("#", fragment);
+  } else {
+    /* do nothing */
+  }
+  return result.join("");
+};
+
+/**
+ * Converts an I·R·I to the corresponding U·R·I by percent‐encoding
+ * unsupported characters.
+ *
+ * This does not punycode the authority.
+ */
+export const iri2uri = ($) =>
+  [...function* () {
+    const encoder = new TextEncoder();
+    for (const character of $) {
+      if (new RegExp(`${ucschar}|${iprivate}`, "u").test(character)) {
+        for (const byte of encoder.encode(character)) {
+          yield `%${byte.toString(0x10).toUpperCase()}`;
+        }
+      } else {
+        yield character;
+      }
+    }
+  }()].join("");
+
+/**
+ * Merges a reference path with a base path.
+ *
+ * See §5.2.3 of R·F·C 3986.
+ */
+export const mergePaths = (base, reference) => {
+  const baseStr = `${base}`;
+  return `${
+    baseStr.substring(0, baseStr.lastIndexOf("/") + 1)
+  }${reference}`;
+};
+
+/**
+ * Returns the `scheme`, `authority`, `path`, `query`, and `fragment`
+ * of the provided I·R·I reference.
+ */
+export const parseReference = ($) => {
+  const regExp = new RegExp(
+    String.raw
+      `^(?:(?<absolute·scheme>${scheme}):(?://(?<absolute·authority>${iauthority})(?<absolute·patha>${ipath·abempty})|(?<absolute·pathb>(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})))(?:\?(?<absolute·query>${iquery}))?(?:#(?<absolute·fragment>${ifragment}))?|(?://(?<relative·authority>${iauthority})(?<relative·patha>${ipath·abempty})|(?<relative·pathb>(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})))(?:\?(?<relative·query>${iquery}))?(?:#(?<relative·fragment>${ifragment}))?)$`,
+    "u",
+  );
+  const {
+    absolute·scheme,
+    absolute·authority,
+    absolute·patha,
+    absolute·pathb,
+    absolute·query,
+    absolute·fragment,
+    relative·authority,
+    relative·patha,
+    relative·pathb,
+    relative·query,
+    relative·fragment,
+  } = regExp.exec($)?.groups ?? {};
+  return {
+    scheme: absolute·scheme,
+    authority: absolute·authority ?? relative·authority,
+    path: absolute·patha ?? absolute·pathb ?? relative·patha ??
+      relative·pathb,
+    query: absolute·query ?? relative·query,
+    fragment: absolute·fragment ?? relative·fragment,
+  };
+};
+
+/**
+ * Removes all dot segments ("." or "..") from the provided I·R·I.
+ *
+ * See §5.2.4 of R·F·C 3986.
+ */
+export const removeDotSegments = ($) => {
+  const input = `${$}`;
+  const output = [];
+  const { length } = input;
+  let index = 0;
+  while (index < length) {
+    if (input.startsWith("../", index)) {
+      // The input starts with a double leader; drop it. This can only
+      // occur at the beginning of the input.
+      index += 3;
+    } else if (input.startsWith("./", index)) {
+      // The input starts with a single leader; drop it. This can only
+      // occur at the beginning of the input.
+      index += 2;
+    } else if (input.startsWith("/./", index)) {
+      // The input starts with a slash, single leader, and another
+      // slash. Ignore it, and move the input to just before the second
+      // slash.
+      index += 2;
+    } else if (input.startsWith("/.", index) && index + 2 == length) {
+      // The input starts with a slash and single leader, and this
+      // exhausts the string. Push an empty segment and move the index
+      // to the end of the string.
+      output.push("/");
+      index = length;
+    } else if (input.startsWith("/../", index)) {
+      // The input starts with a slash, double leader, and another
+      // slash. Drop a segment from the output, and move the input to
+      // just before the second slash.
+      index += 3;
+      output.splice(-1, 1);
+    } else if (input.startsWith("/..", index) && index + 3 == length) {
+      // The input starts with a slash and single leader, and this
+      // exhausts the string. Drop a segment from the output, push an
+      // empty segment, and move the index to the end of the string.
+      output.splice(-1, 1, "/");
+      index = length;
+    } else if (
+      input.startsWith(".", index) && index + 1 == length ||
+      input.startsWith("..", index) && index + 2 == length
+    ) {
+      // The input starts with a single or double leader, and this
+      // exhausts the string. Do nothing (this can only occur at the
+      // beginning of input) and move the index to the end of the
+      // string.
+      index = length;
+    } else {
+      // The input does not start with a leader. Advance the index to
+      // the position before the next slash and push the segment
+      // between the old and new positions.
+      const nextIndex = input.indexOf("/", index + 1);
+      if (nextIndex == -1) {
+        // No slash remains; set index to the end of the string.
+        output.push(input.substring(index));
+        index = length;
+      } else {
+        // There are further path segments.
+        output.push(input.substring(index, nextIndex));
+        index = nextIndex;
+      }
+    }
+  }
+  return output.join("");
+};
+
+/**
+ * Resolves the provided reference relative to the provided base I·R·I.
+ *
+ * See §5.2 of R·F·C 3986.
+ */
+export const resolveReference = (R, Base = location ?? "") => {
+  const {
+    scheme: Base·scheme,
+    authority: Base·authority,
+    path: Base·path,
+    query: Base·query,
+  } = parseReference(Base);
+  if (Base·scheme == null) {
+    throw new TypeError(
+      `Piscēs: Base I·R·I did not have a scheme: ${Base}.`,
+    );
+  } else {
+    const {
+      scheme: R·scheme,
+      authority: R·authority,
+      path: R·path,
+      query: R·query,
+      fragment: R·fragment,
+    } = parseReference(R);
+    return composeReference(
+      R·scheme != null
+        ? {
+          scheme: R·scheme,
+          authority: R·authority,
+          path: removeDotSegments(R·path),
+          query: R·query,
+          fragment: R·fragment,
+        }
+        : R·authority != null
+        ? {
+          scheme: Base·scheme,
+          authority: R·authority,
+          path: removeDotSegments(R·path),
+          query: R·query,
+          fragment: R·fragment,
+        }
+        : !R·path
+        ? {
+          scheme: Base·scheme,
+          authority: Base·authority,
+          path: Base·path,
+          query: R·query ?? Base·query,
+          fragment: R·fragment,
+        }
+        : {
+          scheme: Base·scheme,
+          authority: Base·authority,
+          path: R·path[0] == "/"
+            ? removeDotSegments(R·path)
+            : removeDotSegments(mergePaths(Base·path || "/", R·path)),
+          query: R·query,
+          fragment: R·fragment,
+        },
+    );
+  }
+};
This page took 0.029799 seconds and 4 git commands to generate.