X-Git-Url: https://git.ladys.computer/Pisces/blobdiff_plain/1c80eb9624a66d32800885321f37c4b05161d607..2aaa51c0d16852726d2402bfb7953fab182afc01:/iri.js diff --git a/iri.js b/iri.js index b9682d9..de23d5d 100644 --- a/iri.js +++ b/iri.js @@ -7,446 +7,586 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at . -const sub·delims = String.raw`[!\$&'()*+,;=]`; -const gen·delims = String.raw`[:/?#\[\]@]`; +import { push, splice } from "./collection.js"; +import { bind, call } from "./function.js"; +import { objectCreate } from "./object.js"; +import { + asciiUppercase, + getFirstSubstringIndex, + getLastSubstringIndex, + join, + Matcher, + rawString, + stringStartsWith, + substring, +} from "./string.js"; + +const sub·delims = rawString`[!\$&'()*+,;=]`; +const gen·delims = rawString`[:/?#\[\]@]`; //deno-lint-ignore no-unused-vars -const reserved = String.raw`${gen·delims}|${sub·delims}`; -const unreserved = String.raw`[A-Za-z0-9\-\._~]`; -const pct·encoded = String.raw`%[0-9A-Fa-f][0-9A-Fa-f]`; -const dec·octet = String.raw - `[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]`; -const IPv4address = String.raw - `(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})`; -const h16 = String.raw`[0-9A-Fa-f]{1,4}`; -const ls32 = String.raw`(?:${h16}):(?:${h16})|${IPv4address}`; -const IPv6address = String.raw - `(?:(?:${h16}):){6}(?:${ls32})|::(?:(?:${h16}):){5}(?:${ls32})|(?:${h16})?::(?:(?:${h16}):){4}(?:${ls32})|(?:(?:(?:${h16}):){0,1}(?:${h16}))?::(?:(?:${h16}):){3}(?:${ls32})|(?:(?:(?:${h16}):){0,2}(?:${h16}))?::(?:(?:${h16}):){2}(?:${ls32})|(?:(?:(?:${h16}):){0,3}(?:${h16}))?::(?:${h16}):(?:${ls32})|(?:(?:(?:${h16}):){0,4}(?:${h16}))?::(?:${ls32})|(?:(?:(?:${h16}):){0,5}(?:${h16}))?::(?:${h16})|(?:(?:(?:${h16}):){0,6}(?:${h16}))?::`; -const IPvFuture = String.raw - `v[0-9A-Fa-f]{1,}\.(?:${unreserved}|${sub·delims}|:)`; -const IP·literal = String.raw`\[(?:${IPv6address}|${IPvFuture})\]`; -const port = String.raw`[0-9]*`; -const scheme = String.raw`[A-Za-z][A-Za-z0-9+\-\.]*`; -const pchar = String.raw - `${unreserved}|${pct·encoded}|${sub·delims}|[:@]`; -const fragment = String.raw`(?:${pchar}|[/?])*`; -const query = String.raw`(?:${pchar}|[/?])*`; -const segment·nz·nc = String.raw - `(?:${unreserved}|${pct·encoded}|${sub·delims}|@)+`; -const segment·nz = String.raw`(?:${pchar})+`; -const segment = String.raw`(?:${pchar})*`; -const path·empty = String.raw``; -const path·rootless = String.raw - `(?:${segment·nz})(?:/(?:${segment}))*`; -const path·noscheme = String.raw - `(?:${segment·nz·nc})(?:/(?:${segment}))*`; -const path·absolute = String.raw - `/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`; -const path·abempty = String.raw`(?:/(?:${segment}))*`; -const path = String.raw - `${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`; -const reg·name = String.raw - `(?:${unreserved}|${pct·encoded}|${sub·delims})*`; -const host = String.raw`${IP·literal}|${IPv4address}|${reg·name}`; -const userinfo = String.raw - `(?:${unreserved}|${pct·encoded}|${sub·delims}|:)*`; -const authority = String.raw - `(?:(?:${userinfo})@)?(?:${host})(?::(?:${port}))?`; -const relative·part = String.raw - `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·noscheme})|(?:${path·empty})`; -const relative·ref = String.raw - `(?:${relative·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`; -const hier·part = String.raw - `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·rootless})|(?:${path·empty})`; -const absolute·URI = String.raw - `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?`; -const URI = String.raw - `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`; -const URI·reference = String.raw`(?:${URI})|(?:${relative·ref})`; +const reserved = rawString`${gen·delims}|${sub·delims}`; +const unreserved = rawString`[A-Za-z0-9\-\._~]`; +const pct·encoded = rawString`%[0-9A-Fa-f][0-9A-Fa-f]`; +const dec·octet = + rawString`[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]`; +const IPv4address = + rawString`(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})`; +const h16 = rawString`[0-9A-Fa-f]{1,4}`; +const ls32 = rawString`(?:${h16}):(?:${h16})|${IPv4address}`; +const IPv6address = + rawString`(?:(?:${h16}):){6}(?:${ls32})|::(?:(?:${h16}):){5}(?:${ls32})|(?:${h16})?::(?:(?:${h16}):){4}(?:${ls32})|(?:(?:(?:${h16}):){0,1}(?:${h16}))?::(?:(?:${h16}):){3}(?:${ls32})|(?:(?:(?:${h16}):){0,2}(?:${h16}))?::(?:(?:${h16}):){2}(?:${ls32})|(?:(?:(?:${h16}):){0,3}(?:${h16}))?::(?:${h16}):(?:${ls32})|(?:(?:(?:${h16}):){0,4}(?:${h16}))?::(?:${ls32})|(?:(?:(?:${h16}):){0,5}(?:${h16}))?::(?:${h16})|(?:(?:(?:${h16}):){0,6}(?:${h16}))?::`; +const IPvFuture = + rawString`v[0-9A-Fa-f]{1,}\.(?:${unreserved}|${sub·delims}|:)`; +const IP·literal = rawString`\[(?:${IPv6address}|${IPvFuture})\]`; +const port = rawString`[0-9]*`; +const scheme = rawString`[A-Za-z][A-Za-z0-9+\-\.]*`; +const pchar = + rawString`${unreserved}|${pct·encoded}|${sub·delims}|[:@]`; +const fragment = rawString`(?:${pchar}|[/?])*`; +const query = rawString`(?:${pchar}|[/?])*`; +const segment·nz·nc = + rawString`(?:${unreserved}|${pct·encoded}|${sub·delims}|@)+`; +const segment·nz = rawString`(?:${pchar})+`; +const segment = rawString`(?:${pchar})*`; +const path·empty = rawString``; +const path·rootless = rawString`(?:${segment·nz})(?:/(?:${segment}))*`; +const path·noscheme = + rawString`(?:${segment·nz·nc})(?:/(?:${segment}))*`; +const path·absolute = + rawString`/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`; +const path·abempty = rawString`(?:/(?:${segment}))*`; +const path = + rawString`${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`; +const reg·name = + rawString`(?:${unreserved}|${pct·encoded}|${sub·delims})*`; +const host = rawString`${IP·literal}|${IPv4address}|${reg·name}`; +const userinfo = + rawString`(?:${unreserved}|${pct·encoded}|${sub·delims}|:)*`; +const authority = + rawString`(?:(?:${userinfo})@)?(?:${host})(?::(?:${port}))?`; +const relative·part = + rawString`//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·noscheme})|(?:${path·empty})`; +const relative·ref = + rawString`(?:${relative·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`; +const hier·part = + rawString`//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·rootless})|(?:${path·empty})`; +const absolute·URI = + rawString`(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?`; +const URI = + rawString`(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`; +const URI·reference = rawString`(?:${URI})|(?:${relative·ref})`; -const iprivate = String.raw - `[\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`; -const ucschar = String.raw - `[\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E0000}-\u{EFFFD}]`; -const iunreserved = String.raw`[A-Za-z0-9\-\._~]|${ucschar}`; -const ipchar = String.raw - `${iunreserved}|${pct·encoded}|${sub·delims}|[:@]`; -const ifragment = String.raw`(?:${ipchar}|[/?])*`; -const iquery = String.raw`(?:${ipchar}|${iprivate}|[/?])*`; -const isegment·nz·nc = String.raw - `(?:${iunreserved}|${pct·encoded}|${sub·delims}|@)+`; -const isegment·nz = String.raw`(?:${ipchar})+`; -const isegment = String.raw`(?:${ipchar})*`; -const ipath·empty = String.raw``; -const ipath·rootless = String.raw - `(?:${isegment·nz})(?:/(?:${isegment}))*`; -const ipath·noscheme = String.raw - `(?:${isegment·nz·nc})(?:/(?:${isegment}))*`; -const ipath·absolute = String.raw - `/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`; -const ipath·abempty = String.raw`(?:/(?:${isegment}))*`; -const ipath = String.raw - `${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`; -const ireg·name = String.raw - `(?:${iunreserved}|${pct·encoded}|${sub·delims})*`; -const ihost = String.raw`${IP·literal}|${IPv4address}|${ireg·name}`; -const iuserinfo = String.raw - `(?:${iunreserved}|${pct·encoded}|${sub·delims}|:)*`; -const iauthority = String.raw - `(?:(?:${iuserinfo})@)?(?:${ihost})(?::(?:${port}))?`; -const irelative·part = String.raw - `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})`; -const irelative·ref = String.raw - `(?:${irelative·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`; -const ihier·part = String.raw - `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})`; -const absolute·IRI = String.raw - `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?`; -const IRI = String.raw - `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`; -const IRI·reference = String.raw`(?:${IRI})|(?:${irelative·ref})`; +const iprivate = + rawString`[\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`; +const ucschar = + rawString`[\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E0000}-\u{EFFFD}]`; +const iunreserved = rawString`[A-Za-z0-9\-\._~]|${ucschar}`; +const ipchar = + rawString`${iunreserved}|${pct·encoded}|${sub·delims}|[:@]`; +const ifragment = rawString`(?:${ipchar}|[/?])*`; +const iquery = rawString`(?:${ipchar}|${iprivate}|[/?])*`; +const isegment·nz·nc = + rawString`(?:${iunreserved}|${pct·encoded}|${sub·delims}|@)+`; +const isegment·nz = rawString`(?:${ipchar})+`; +const isegment = rawString`(?:${ipchar})*`; +const ipath·empty = rawString``; +const ipath·rootless = + rawString`(?:${isegment·nz})(?:/(?:${isegment}))*`; +const ipath·noscheme = + rawString`(?:${isegment·nz·nc})(?:/(?:${isegment}))*`; +const ipath·absolute = + rawString`/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`; +const ipath·abempty = rawString`(?:/(?:${isegment}))*`; +const ipath = + rawString`${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`; +const ireg·name = + rawString`(?:${iunreserved}|${pct·encoded}|${sub·delims})*`; +const ihost = rawString`${IP·literal}|${IPv4address}|${ireg·name}`; +const iuserinfo = + rawString`(?:${iunreserved}|${pct·encoded}|${sub·delims}|:)*`; +const iauthority = + rawString`(?:(?:${iuserinfo})@)?(?:${ihost})(?::(?:${port}))?`; +const irelative·part = + rawString`//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})`; +const irelative·ref = + rawString`(?:${irelative·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`; +const ihier·part = + rawString`//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})`; +const absolute·IRI = + rawString`(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?`; +const IRI = + rawString`(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`; +const IRI·reference = rawString`(?:${IRI})|(?:${irelative·ref})`; -const leiri·iprivate = String.raw - `[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`; -const leiri·ucschar = String.raw - `[ <>"{}|\\^${"`"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`; -const leiri·iunreserved = String.raw - `[A-Za-z0-9\-\._~]|${leiri·ucschar}`; -const leiri·ipchar = String.raw - `${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`; -const leiri·ifragment = String.raw`(?:${leiri·ipchar}|[/?])*`; -const leiri·iquery = String.raw - `(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`; -const leiri·isegment·nz·nc = String.raw - `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`; -const leiri·isegment·nz = String.raw`(?:${leiri·ipchar})+`; -const leiri·isegment = String.raw`(?:${leiri·ipchar})*`; -const leiri·ipath·empty = String.raw``; -const leiri·ipath·rootless = String.raw - `(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`; -const leiri·ipath·noscheme = String.raw - `(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`; -const leiri·ipath·absolute = String.raw - `/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`; -const leiri·ipath·abempty = String.raw`(?:/(?:${leiri·isegment}))*`; -const leiri·ipath = String.raw - `${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`; -const leiri·ireg·name = String.raw - `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`; -const leiri·ihost = String.raw - `${IP·literal}|${IPv4address}|${leiri·ireg·name}`; -const leiri·iuserinfo = String.raw - `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`; -const leiri·iauthority = String.raw - `(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`; -const leiri·irelative·part = String.raw - `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`; -const leiri·irelative·ref = String.raw - `(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`; -const leiri·ihier·part = String.raw - `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`; -const absolute·LEIRI = String.raw - `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`; -const LEIRI = String.raw - `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`; -const LEIRI·reference = String.raw - `(?:${LEIRI})|(?:${leiri·irelative·ref})`; +const leiri·iprivate = + rawString`[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`; +const leiri·ucschar = + rawString`[ <>"{}|\\^${"\x60"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`; +const leiri·iunreserved = + rawString`[A-Za-z0-9\-\._~]|${leiri·ucschar}`; +const leiri·ipchar = + rawString`${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`; +const leiri·ifragment = rawString`(?:${leiri·ipchar}|[/?])*`; +const leiri·iquery = + rawString`(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`; +const leiri·isegment·nz·nc = + rawString`(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`; +const leiri·isegment·nz = rawString`(?:${leiri·ipchar})+`; +const leiri·isegment = rawString`(?:${leiri·ipchar})*`; +const leiri·ipath·empty = rawString``; +const leiri·ipath·rootless = + rawString`(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`; +const leiri·ipath·noscheme = + rawString`(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`; +const leiri·ipath·absolute = + rawString`/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`; +const leiri·ipath·abempty = rawString`(?:/(?:${leiri·isegment}))*`; +const leiri·ipath = + rawString`${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`; +const leiri·ireg·name = + rawString`(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`; +const leiri·ihost = + rawString`${IP·literal}|${IPv4address}|${leiri·ireg·name}`; +const leiri·iuserinfo = + rawString`(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`; +const leiri·iauthority = + rawString`(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`; +const leiri·irelative·part = + rawString`//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`; +const leiri·irelative·ref = + rawString`(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`; +const leiri·ihier·part = + rawString`//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`; +const absolute·LEIRI = + rawString`(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`; +const LEIRI = + rawString`(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`; +const LEIRI·reference = + rawString`(?:${LEIRI})|(?:${leiri·irelative·ref})`; + +export const { + /** + * Recomposes an (L·E·)I·R·I reference from its component parts. + * + * See §5.3 of R·F·C 3986. + */ + composeReference, + + /** + * Converts an L·E·I·R·I to the corresponding I·R·I by + * percent‐encoding unsupported characters. + * + * This function is somewhat complex because the I·R·I syntax allows + * private·use characters *only* in the query. + */ + escapeForIRI, + + /** + * Converts an (L·E·)I·R·I to the corresponding U·R·I by + * percent‐encoding unsupported characters. + * + * This does not punycode the authority. + */ + escapeForURI, + + /** + * Removes all dot segments ("." or "..") from the provided + * (L·E·)I·R·I. + * + * See §5.2.4 of R·F·C 3986. + */ + removeDotSegments, +} = (() => { + const TE = TextEncoder; + const { iterator: iteratorSymbol } = Symbol; + const { toString: numberToString } = Number.prototype; + const { encode: teEncode } = TE.prototype; + + const { [iteratorSymbol]: arrayIterator } = Array.prototype; + const { + next: arrayIteratorNext, + } = Object.getPrototypeOf([][iteratorSymbol]()); + const { + next: generatorIteratorNext, + } = Object.getPrototypeOf(function* () {}.prototype); + const { [iteratorSymbol]: stringIterator } = String.prototype; + const { + next: stringIteratorNext, + } = Object.getPrototypeOf(""[iteratorSymbol]()); + + const iriCharacterIterablePrototype = { + [iteratorSymbol]() { + return { + next: bind( + stringIteratorNext, + call(stringIterator, this.source, []), + [], + ), + }; + }, + }; + const iriGeneratorIterablePrototype = { + [iteratorSymbol]() { + return { + next: bind(generatorIteratorNext, this.generator(), []), + }; + }, + }; + const iriSegmentIterablePrototype = { + [iteratorSymbol]() { + return { + next: bind( + arrayIteratorNext, + call(arrayIterator, this.segments, []), + [], + ), + }; + }, + }; + + return { + composeReference: ($) => + join( + objectCreate( + iriGeneratorIterablePrototype, + { + generator: { + value: function* () { + const { scheme, authority, path, query, fragment } = $; + if (scheme != null) { + // A scheme is present. + yield scheme; + yield ":"; + } else { + // No scheme is present. + /* do nothing */ + } + if (authority != null) { + // An authority is present. + yield "//"; + yield authority; + } else { + // No authority is present. + /* do nothing */ + } + yield path ?? ""; + if (query != null) { + // A query is present. + yield "?"; + yield query; + } else { + // No query is present. + /* do nothing */ + } + if (fragment != null) { + // A fragment is present. + yield "#"; + yield fragment; + } else { + // No fragment is present. + /* do nothing */ + } + }, + }, + }, + ), + "", + ), + escapeForIRI: ($) => { + const components = parseReference($); + + // The path will always be present (although perhaps empty) on a + // successful parse. If it isn’t (and parsing failed), treat the + // entire input as the path. + components.path ??= `${$}`; + + // Escape disallowed codepoints in each component and compose an + // I·R·I from the result. + const reference = objectCreate(null); + for (const componentName in components) { + const componentValue = components[componentName]; + reference[componentName] = componentValue == null + ? undefined + : join( + objectCreate( + iriGeneratorIterablePrototype, + { + generator: { + value: function* () { + const encoder = new TE(); + for ( + const character of objectCreate( + iriCharacterIterablePrototype, + { source: { value: componentValue } }, + ) + ) { + if ( + new Matcher( + `${leiri·ucschar}|${leiri·iprivate}`, + )(character) && + !new Matcher( + `${ucschar}${ + componentName == "query" + ? `|${iprivate}` + : "" + }`, + )(character) + ) { + // This codepoint needs to be escaped. + const encoded = call(teEncode, encoder, [ + character, + ]); + for ( + let index = 0; + index < encoded.length; + ++index + ) { + const byte = encoded[index]; + yield `%${byte < 0x10 ? "0" : ""}${ + asciiUppercase( + call(numberToString, byte, [0x10]), + ) + }`; + } + } else { + // This codepoint does not need escaping. + yield character; + } + } + }, + }, + }, + ), + "", + ); + } + return composeReference(reference); + }, + escapeForURI: ($) => + join( + objectCreate( + iriGeneratorIterablePrototype, + { + generator: { + value: function* () { + const encoder = new TE(); + for ( + const character of objectCreate( + iriCharacterIterablePrototype, + { source: { value: `${$}` } }, + ) + ) { + if ( + new Matcher( + `${leiri·ucschar}|${leiri·iprivate}`, + )(character) + ) { + // This codepoint needs to be escaped. + const encoded = call(teEncode, encoder, [ + character, + ]); + for ( + let index = 0; + index < encoded.length; + ++index + ) { + const byte = encoded[index]; + yield `%${byte < 0x10 ? "0" : ""}${ + asciiUppercase( + call(numberToString, byte, [0x10]), + ) + }`; + } + } else { + // This codepoint does not need escaping. + yield character; + } + } + }, + }, + }, + ), + "", + ), + removeDotSegments: ($) => { + const input = `${$}`; + const output = []; + const { length } = input; + let index = 0; + while (index < length) { + if (stringStartsWith(input, "../", index)) { + // The input starts with a double leader; drop it. This can + // only occur at the beginning of the input. + index += 3; + } else if (stringStartsWith(input, "./", index)) { + // The input starts with a single leader; drop it. This can + // only occur at the beginning of the input. + index += 2; + } else if (stringStartsWith(input, "/./", index)) { + // The input starts with a slash, single leader, and another + // slash. Ignore it, and move the input to just before the + // second slash. + index += 2; + } else if ( + stringStartsWith(input, "/.", index) && index + 2 == length + ) { + // The input starts with a slash and single leader, and this + // exhausts the string. Push an empty segment and move the + // index to the end of the string. + push(output, "/"); + index = length; + } else if (stringStartsWith(input, "/../", index)) { + // The input starts with a slash, double leader, and another + // slash. Drop a segment from the output, and move the input + // to just before the second slash. + index += 3; + splice(output, -1, 1); + } else if ( + stringStartsWith(input, "/..", index) && index + 3 == length + ) { + // The input starts with a slash and single leader, and this + // exhausts the string. Drop a segment from the output, push + // an empty segment, and move the index to the end of the + // string. + splice(output, -1, 1, "/"); + index = length; + } else if ( + stringStartsWith(input, ".", index) && index + 1 == length || + stringStartsWith(input, "..", index) && index + 2 == length + ) { + // The input starts with a single or double leader, and this + // exhausts the string. Do nothing (this can only occur at + // the beginning of input) and move the index to the end of + // the string. + index = length; + } else { + // The input does not start with a leader. Advance the index + // to the position before the next slash and push the segment + // between the old and new positions. + const nextIndex = getFirstSubstringIndex( + input, + "/", + index + 1, + ); + if (nextIndex == -1) { + // No slash remains; set index to the end of the string. + push(output, substring(input, index)); + index = length; + } else { + // There are further path segments. + push(output, substring(input, index, nextIndex)); + index = nextIndex; + } + } + } + return join( + objectCreate( + iriSegmentIterablePrototype, + { segments: { value: output } }, + ), + "", + ); + }, + }; +})(); export const { - isAbsoluteURI, // U·R·I with no fragment - isURI, - isURIPath, - isURIReference, - isURISuffix, // only authority, path, query, fragment isAbsoluteIRI, // I·R·I with no fragment + isAbsoluteLEIRI, // L·E·I·R·I with no fragment + isAbsoluteURI, // U·R·I with no fragment isIRI, isIRIPath, isIRIReference, isIRISuffix, // only authority, path, query, fragment - isAbsoluteLEIRI, // L·E·I·R·I with no fragment isLEIRI, isLEIRIPath, isLEIRIReference, isLEIRISuffix, // only authority, path, query, fragment + isURI, + isURIPath, + isURIReference, + isURISuffix, // only authority, path, query, fragment } = Object.fromEntries( Object.entries({ - isAbsoluteLEIRI: absolute·LEIRI, isAbsoluteIRI: absolute·IRI, + isAbsoluteLEIRI: absolute·LEIRI, isAbsoluteURI: absolute·URI, - isLEIRI: LEIRI, - isLEIRIPath: leiri·ipath, - isLEIRIReference: LEIRI·reference, - isLEIRISuffix: String.raw - `(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`, isIRI: IRI, isIRIPath: ipath, isIRIReference: IRI·reference, - isIRISuffix: String.raw - `(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`, + isIRISuffix: + rawString`(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`, + isLEIRI: LEIRI, + isLEIRIPath: leiri·ipath, + isLEIRIReference: LEIRI·reference, + isLEIRISuffix: + rawString`(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`, isURI: URI, isURIPath: path, isURIReference: URI·reference, - isURISuffix: String.raw - `(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`, - }).map(([key, value]) => { - const regExp = new RegExp(`^(?:${value})$`, "u"); - return [ - key, - Object.defineProperties( - ($) => typeof $ == "string" && regExp.test($), - { - name: { value: key }, - [Symbol.match]: { - configurable: true, - enumerable: false, - get: () => regExp[Symbol.match].bind(regExp), - set: undefined, - }, - }, - ), - ]; - }), + isURISuffix: + rawString`(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`, + }).map( + ([key, value]) => [key, new Matcher(rawString`^(?:${value})$`)], + ), ); /** - * Recomposes an (L·E·)I·R·I reference from its component parts. - * - * See §5.3 of R·F·C 3986. - */ -export const composeReference = ($) => { - const result = []; - const { scheme, authority, path, query, fragment } = $; - if (scheme != null) { - // A scheme is present. - result.push(scheme, ":"); - } else { - // No scheme is present. - /* do nothing */ - } - if (authority != null) { - // An authority is present. - result.push("//", authority); - } else { - // No authority is present. - /* do nothing */ - } - result.push(path ?? ""); - if (query != null) { - // A query is present. - result.push("?", query); - } else { - // No query is present. - /* do nothing */ - } - if (fragment != null) { - // A fragment is present. - result.push("#", fragment); - } else { - // No fragment is present. - /* do nothing */ - } - return result.join(""); -}; - -/** - * Converts an L·E·I·R·I to the corresponding I·R·I by percent‐encoding - * unsupported characters. - * - * This function is somewhat complex because the I·R·I syntax allows - * private·use characters *only* in the query. - */ -export const escapeForIRI = ($) => { - const components = parseReference($); - const encoder = new TextEncoder(); - - // The path will always be present (although perhaps empty) on a - // successful parse. If it isn’t (and parsing failed), treat the - // entire input as the path. - components.path ??= `${$}`; - - // Escape disallowed codepoints in each component and compose an - // I·R·I from the result. - return composeReference( - Object.fromEntries( - Object.entries(components).map( - ([componentName, componentValue]) => [ - componentName, - componentValue == null ? undefined : [...function* () { - for (const character of componentValue) { - if ( - new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u") - .test( - character, - ) && - !new RegExp( - `${ucschar}${ - componentName == "query" ? `|${iprivate}` : "" - }`, - "u", - ).test(character) - ) { - // This codepoint needs to be escaped. - for (const byte of encoder.encode(character)) { - yield `%${byte < 0x10 ? "0" : ""}${ - byte.toString(0x10).toUpperCase() - }`; - } - } else { - // This codepoint does not need escaping. - yield character; - } - } - }()].join(""), - ], - ), - ), - ); -}; - -/** - * Converts an (L·E·)I·R·I to the corresponding U·R·I by - * percent‐encoding unsupported characters. - * - * This does not punycode the authority. - */ -export const escapeForURI = ($) => - [...function* () { - const encoder = new TextEncoder(); - for (const character of `${$}`) { - if ( - new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u").test( - character, - ) - ) { - // This codepoint needs to be escaped. - for (const byte of encoder.encode(character)) { - yield `%${byte < 0x10 ? "0" : ""}${ - byte.toString(0x10).toUpperCase() - }`; - } - } else { - // This codepoint doesn’t need escaping. - yield character; - } - } - }()].join(""); - -/** - * Merges a reference path with a base path. + * Returns the result of merging the provided reference path with the + * provided base path. * * See §5.2.3 of R·F·C 3986. */ export const mergePaths = (base, reference) => { - const baseStr = `${base}`; + const baseStr = `${base}` || "/"; return `${ - baseStr.substring(0, baseStr.lastIndexOf("/") + 1) + substring(baseStr, 0, getLastSubstringIndex(baseStr, "/") + 1) }${reference}`; }; -/** - * Returns the `scheme`, `authority`, `path`, `query`, and `fragment` - * of the provided (L·E·)I·R·I reference. - * - * `path` will always be defined for valid references, and will be - * undefined for values which are not valid L·E·I·R·Is. - */ -export const parseReference = ($) => { - const regExp = new RegExp( - String.raw - `^(?:(?${scheme}):(?://(?${leiri·iauthority})(?${leiri·ipath·abempty})|(?(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?${leiri·iquery}))?(?:#(?${leiri·ifragment}))?|(?://(?${leiri·iauthority})(?${leiri·ipath·abempty})|(?(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?${leiri·iquery}))?(?:#(?${leiri·ifragment}))?)$`, - "u", - ); - const { - absolute·scheme, - absolute·authority, - absolute·patha, - absolute·pathb, - absolute·query, - absolute·fragment, - relative·authority, - relative·patha, - relative·pathb, - relative·query, - relative·fragment, - } = regExp.exec($)?.groups ?? {}; +export const { + /** + * Returns the `scheme`, `authority`, `path`, `query`, and `fragment` + * of the provided (L·E·)I·R·I reference. + * + * `path` will always be defined for valid references, and will be + * undefined for values which are not valid L·E·I·R·Is. + */ + parseReference, +} = (() => { + const RE = RegExp; + const { prototype: rePrototype } = RE; + const { exec: reExec } = rePrototype; return { - scheme: absolute·scheme, - authority: absolute·authority ?? relative·authority, - path: absolute·patha ?? absolute·pathb ?? relative·patha ?? - relative·pathb, - query: absolute·query ?? relative·query, - fragment: absolute·fragment ?? relative·fragment, + parseReference: ($) => { + const re = new RE( + rawString`^(?:(?${scheme}):(?://(?${leiri·iauthority})(?${leiri·ipath·abempty})|(?(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?${leiri·iquery}))?(?:#(?${leiri·ifragment}))?|(?://(?${leiri·iauthority})(?${leiri·ipath·abempty})|(?(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?${leiri·iquery}))?(?:#(?${leiri·ifragment}))?)$`, + "u", + ); + const { + absolute·scheme, + absolute·authority, + absolute·patha, + absolute·pathb, + absolute·query, + absolute·fragment, + relative·authority, + relative·patha, + relative·pathb, + relative·query, + relative·fragment, + } = call(reExec, re, [$])?.groups ?? {}; + return { + scheme: absolute·scheme, + authority: absolute·authority ?? relative·authority, + path: absolute·patha ?? absolute·pathb ?? relative·patha ?? + relative·pathb, + query: absolute·query ?? relative·query, + fragment: absolute·fragment ?? relative·fragment, + }; + }, }; -}; - -/** - * Removes all dot segments ("." or "..") from the provided (L·E·)I·R·I. - * - * See §5.2.4 of R·F·C 3986. - */ -export const removeDotSegments = ($) => { - const input = `${$}`; - const output = []; - const { length } = input; - let index = 0; - while (index < length) { - if (input.startsWith("../", index)) { - // The input starts with a double leader; drop it. This can only - // occur at the beginning of the input. - index += 3; - } else if (input.startsWith("./", index)) { - // The input starts with a single leader; drop it. This can only - // occur at the beginning of the input. - index += 2; - } else if (input.startsWith("/./", index)) { - // The input starts with a slash, single leader, and another - // slash. Ignore it, and move the input to just before the second - // slash. - index += 2; - } else if (input.startsWith("/.", index) && index + 2 == length) { - // The input starts with a slash and single leader, and this - // exhausts the string. Push an empty segment and move the index - // to the end of the string. - output.push("/"); - index = length; - } else if (input.startsWith("/../", index)) { - // The input starts with a slash, double leader, and another - // slash. Drop a segment from the output, and move the input to - // just before the second slash. - index += 3; - output.splice(-1, 1); - } else if (input.startsWith("/..", index) && index + 3 == length) { - // The input starts with a slash and single leader, and this - // exhausts the string. Drop a segment from the output, push an - // empty segment, and move the index to the end of the string. - output.splice(-1, 1, "/"); - index = length; - } else if ( - input.startsWith(".", index) && index + 1 == length || - input.startsWith("..", index) && index + 2 == length - ) { - // The input starts with a single or double leader, and this - // exhausts the string. Do nothing (this can only occur at the - // beginning of input) and move the index to the end of the - // string. - index = length; - } else { - // The input does not start with a leader. Advance the index to - // the position before the next slash and push the segment - // between the old and new positions. - const nextIndex = input.indexOf("/", index + 1); - if (nextIndex == -1) { - // No slash remains; set index to the end of the string. - output.push(input.substring(index)); - index = length; - } else { - // There are further path segments. - output.push(input.substring(index, nextIndex)); - index = nextIndex; - } - } - } - return output.join(""); -}; +})(); /** * Resolves the provided reference relative to the provided base @@ -506,7 +646,7 @@ export const resolveReference = (R, Base = location ?? "") => { authority: Base·authority, path: R·path[0] == "/" ? removeDotSegments(R·path) - : removeDotSegments(mergePaths(Base·path || "/", R·path)), + : removeDotSegments(mergePaths(Base·path, R·path)), query: R·query, fragment: R·fragment, },