From: Lady Date: Sat, 21 May 2022 01:09:20 +0000 (-0700) Subject: L·E·I·R·I support X-Git-Tag: 0.1.0~11 X-Git-Url: https://git.ladys.computer/Pisces/commitdiff_plain/1c80eb9624a66d32800885321f37c4b05161d607?hp=edbef3a179c4404eb02a83860a76c1f11b9b63d2 L·E·I·R·I support --- diff --git a/iri.js b/iri.js index f1abe78..b9682d9 100644 --- a/iri.js +++ b/iri.js @@ -42,7 +42,6 @@ const path·noscheme = String.raw const path·absolute = String.raw `/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`; const path·abempty = String.raw`(?:/(?:${segment}))*`; -//deno-lint-ignore no-unused-vars const path = String.raw `${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`; const reg·name = String.raw @@ -85,7 +84,6 @@ const ipath·noscheme = String.raw const ipath·absolute = String.raw `/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`; const ipath·abempty = String.raw`(?:/(?:${isegment}))*`; -//deno-lint-ignore no-unused-vars const ipath = String.raw `${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`; const ireg·name = String.raw @@ -107,21 +105,88 @@ const IRI = String.raw `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`; const IRI·reference = String.raw`(?:${IRI})|(?:${irelative·ref})`; +const leiri·iprivate = String.raw + `[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`; +const leiri·ucschar = String.raw + `[ <>"{}|\\^${"`"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`; +const leiri·iunreserved = String.raw + `[A-Za-z0-9\-\._~]|${leiri·ucschar}`; +const leiri·ipchar = String.raw + `${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`; +const leiri·ifragment = String.raw`(?:${leiri·ipchar}|[/?])*`; +const leiri·iquery = String.raw + `(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`; +const leiri·isegment·nz·nc = String.raw + `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`; +const leiri·isegment·nz = String.raw`(?:${leiri·ipchar})+`; +const leiri·isegment = String.raw`(?:${leiri·ipchar})*`; +const leiri·ipath·empty = String.raw``; +const leiri·ipath·rootless = String.raw + `(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`; +const leiri·ipath·noscheme = String.raw + `(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`; +const leiri·ipath·absolute = String.raw + `/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`; +const leiri·ipath·abempty = String.raw`(?:/(?:${leiri·isegment}))*`; +const leiri·ipath = String.raw + `${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`; +const leiri·ireg·name = String.raw + `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`; +const leiri·ihost = String.raw + `${IP·literal}|${IPv4address}|${leiri·ireg·name}`; +const leiri·iuserinfo = String.raw + `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`; +const leiri·iauthority = String.raw + `(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`; +const leiri·irelative·part = String.raw + `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`; +const leiri·irelative·ref = String.raw + `(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`; +const leiri·ihier·part = String.raw + `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`; +const absolute·LEIRI = String.raw + `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`; +const LEIRI = String.raw + `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`; +const LEIRI·reference = String.raw + `(?:${LEIRI})|(?:${leiri·irelative·ref})`; + export const { isAbsoluteURI, // U·R·I with no fragment isURI, + isURIPath, isURIReference, + isURISuffix, // only authority, path, query, fragment isAbsoluteIRI, // I·R·I with no fragment isIRI, + isIRIPath, isIRIReference, + isIRISuffix, // only authority, path, query, fragment + isAbsoluteLEIRI, // L·E·I·R·I with no fragment + isLEIRI, + isLEIRIPath, + isLEIRIReference, + isLEIRISuffix, // only authority, path, query, fragment } = Object.fromEntries( Object.entries({ + isAbsoluteLEIRI: absolute·LEIRI, isAbsoluteIRI: absolute·IRI, isAbsoluteURI: absolute·URI, + isLEIRI: LEIRI, + isLEIRIPath: leiri·ipath, + isLEIRIReference: LEIRI·reference, + isLEIRISuffix: String.raw + `(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`, isIRI: IRI, + isIRIPath: ipath, isIRIReference: IRI·reference, + isIRISuffix: String.raw + `(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`, isURI: URI, + isURIPath: path, isURIReference: URI·reference, + isURISuffix: String.raw + `(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`, }).map(([key, value]) => { const regExp = new RegExp(`^(?:${value})$`, "u"); return [ @@ -143,7 +208,7 @@ export const { ); /** - * Recomposes an I·R·I reference from its component parts. + * Recomposes an (L·E·)I·R·I reference from its component parts. * * See §5.3 of R·F·C 3986. */ @@ -151,44 +216,115 @@ export const composeReference = ($) => { const result = []; const { scheme, authority, path, query, fragment } = $; if (scheme != null) { + // A scheme is present. result.push(scheme, ":"); } else { + // No scheme is present. /* do nothing */ } if (authority != null) { + // An authority is present. result.push("//", authority); } else { + // No authority is present. /* do nothing */ } result.push(path ?? ""); if (query != null) { + // A query is present. result.push("?", query); } else { + // No query is present. /* do nothing */ } if (fragment != null) { + // A fragment is present. result.push("#", fragment); } else { + // No fragment is present. /* do nothing */ } return result.join(""); }; /** - * Converts an I·R·I to the corresponding U·R·I by percent‐encoding + * Converts an L·E·I·R·I to the corresponding I·R·I by percent‐encoding * unsupported characters. * + * This function is somewhat complex because the I·R·I syntax allows + * private·use characters *only* in the query. + */ +export const escapeForIRI = ($) => { + const components = parseReference($); + const encoder = new TextEncoder(); + + // The path will always be present (although perhaps empty) on a + // successful parse. If it isn’t (and parsing failed), treat the + // entire input as the path. + components.path ??= `${$}`; + + // Escape disallowed codepoints in each component and compose an + // I·R·I from the result. + return composeReference( + Object.fromEntries( + Object.entries(components).map( + ([componentName, componentValue]) => [ + componentName, + componentValue == null ? undefined : [...function* () { + for (const character of componentValue) { + if ( + new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u") + .test( + character, + ) && + !new RegExp( + `${ucschar}${ + componentName == "query" ? `|${iprivate}` : "" + }`, + "u", + ).test(character) + ) { + // This codepoint needs to be escaped. + for (const byte of encoder.encode(character)) { + yield `%${byte < 0x10 ? "0" : ""}${ + byte.toString(0x10).toUpperCase() + }`; + } + } else { + // This codepoint does not need escaping. + yield character; + } + } + }()].join(""), + ], + ), + ), + ); +}; + +/** + * Converts an (L·E·)I·R·I to the corresponding U·R·I by + * percent‐encoding unsupported characters. + * * This does not punycode the authority. */ -export const iri2uri = ($) => +export const escapeForURI = ($) => [...function* () { const encoder = new TextEncoder(); - for (const character of $) { - if (new RegExp(`${ucschar}|${iprivate}`, "u").test(character)) { + for (const character of `${$}`) { + if ( + new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u").test( + character, + ) + ) { + // This codepoint needs to be escaped. for (const byte of encoder.encode(character)) { - yield `%${byte.toString(0x10).toUpperCase()}`; + yield `%${byte < 0x10 ? "0" : ""}${ + byte.toString(0x10).toUpperCase() + }`; } } else { + // This codepoint doesn’t need escaping. yield character; } } @@ -208,12 +344,15 @@ export const mergePaths = (base, reference) => { /** * Returns the `scheme`, `authority`, `path`, `query`, and `fragment` - * of the provided I·R·I reference. + * of the provided (L·E·)I·R·I reference. + * + * `path` will always be defined for valid references, and will be + * undefined for values which are not valid L·E·I·R·Is. */ export const parseReference = ($) => { const regExp = new RegExp( String.raw - `^(?:(?${scheme}):(?://(?${iauthority})(?${ipath·abempty})|(?(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})))(?:\?(?${iquery}))?(?:#(?${ifragment}))?|(?://(?${iauthority})(?${ipath·abempty})|(?(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})))(?:\?(?${iquery}))?(?:#(?${ifragment}))?)$`, + `^(?:(?${scheme}):(?://(?${leiri·iauthority})(?${leiri·ipath·abempty})|(?(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?${leiri·iquery}))?(?:#(?${leiri·ifragment}))?|(?://(?${leiri·iauthority})(?${leiri·ipath·abempty})|(?(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?${leiri·iquery}))?(?:#(?${leiri·ifragment}))?)$`, "u", ); const { @@ -240,7 +379,7 @@ export const parseReference = ($) => { }; /** - * Removes all dot segments ("." or "..") from the provided I·R·I. + * Removes all dot segments ("." or "..") from the provided (L·E·)I·R·I. * * See §5.2.4 of R·F·C 3986. */ @@ -310,7 +449,8 @@ export const removeDotSegments = ($) => { }; /** - * Resolves the provided reference relative to the provided base I·R·I. + * Resolves the provided reference relative to the provided base + * (L·E·)I·R·I. * * See §5.2 of R·F·C 3986. */ @@ -322,10 +462,13 @@ export const resolveReference = (R, Base = location ?? "") => { query: Base·query, } = parseReference(Base); if (Base·scheme == null) { + // Base I·R·I’s must be valid I·R·I’s, meaning they must have a + // scheme. throw new TypeError( - `Piscēs: Base I·R·I did not have a scheme: ${Base}.`, + `Piscēs: Base did not have a scheme: ${Base}.`, ); } else { + // The provided Base I·R·I is valid. const { scheme: R·scheme, authority: R·authority, diff --git a/iri.test.js b/iri.test.js index 51749cd..a586629 100644 --- a/iri.test.js +++ b/iri.test.js @@ -9,24 +9,24 @@ import { composeReference, - iri2uri, + escapeForIRI, + escapeForURI, isAbsoluteIRI, + isAbsoluteLEIRI, isAbsoluteURI, isIRI, isIRIReference, + isLEIRI, + isLEIRIReference, isURI, isURIReference, parseReference, removeDotSegments, resolveReference, } from "./iri.js"; -import { - assert, - assertEquals, - assertStrictEquals, -} from "./dev-deps.js"; +import { assertEquals, assertStrictEquals } from "./dev-deps.js"; -const exampleURIs = { +const exampleURIReferences = { "ftp://ftp.is.co.za/rfc/rfc1808.txt": { scheme: "ftp", authority: "ftp.is.co.za", @@ -71,17 +71,13 @@ const exampleURIs = { query: "name=ferret", fragment: "nose", }, -}; - -const exampleURIReferences = { - ...exampleURIs, "./this:that": { path: "./this:that", }, }; -const exampleIRIs = { - ...exampleURIs, +const exampleIRIReferences = { + ...exampleURIReferences, "http://ヒキワリ.ナットウ.ニホン": { scheme: "http", authority: "ヒキワリ.ナットウ.ニホン", @@ -92,18 +88,40 @@ const exampleIRIs = { authority: "JP納豆.例.jp", path: "/dir1/引き割り.html", }, + "/dir1/引き割り.html": { + path: "/dir1/引き割り.html", + }, }; -const exampleIRIReferences = { - ...exampleURIReferences, - ...exampleIRIs, +const exampleLEIRIReferences = { + ...exampleIRIReferences, + "http://example.com/ foo /": { + scheme: "http", + authority: "example.com", + path: "/ foo /", + }, + "\0": { + path: "\0", + }, +}; + +const exampleReferences = { + ...exampleLEIRIReferences, + "\uD800": {}, + "\uFFFE": {}, + "\uFFFF": {}, }; Deno.test({ name: "Identifies U·R·Is.", fn: () => { - for (const uri of Object.keys(exampleURIs)) { - assert(isURI(uri)); + for ( + const [uri, { scheme }] of Object.entries(exampleReferences) + ) { + assertStrictEquals( + isURI(uri), + uri in exampleURIReferences && scheme != null, + ); } }, }); @@ -111,8 +129,16 @@ Deno.test({ Deno.test({ name: "Identifies absolute U·R·Is.", fn: () => { - for (const [uri, { fragment }] of Object.entries(exampleURIs)) { - assertStrictEquals(isAbsoluteURI(uri), fragment == null); + for ( + const [uri, { scheme, fragment }] of Object.entries( + exampleReferences, + ) + ) { + assertStrictEquals( + isAbsoluteURI(uri), + uri in exampleURIReferences && scheme != null && + fragment == null, + ); } }, }); @@ -120,8 +146,11 @@ Deno.test({ Deno.test({ name: "Identifies U·R·I references.", fn: () => { - for (const uri of Object.keys(exampleURIReferences)) { - assert(isURIReference(uri)); + for (const uri of Object.keys(exampleReferences)) { + assertStrictEquals( + isURIReference(uri), + uri in exampleURIReferences, + ); } }, }); @@ -129,8 +158,13 @@ Deno.test({ Deno.test({ name: "Identifies I·R·Is.", fn: () => { - for (const iri of Object.keys(exampleIRIs)) { - assert(isIRI(iri)); + for ( + const [iri, { scheme }] of Object.entries(exampleReferences) + ) { + assertStrictEquals( + isIRI(iri), + iri in exampleIRIReferences && scheme != null, + ); } }, }); @@ -138,8 +172,16 @@ Deno.test({ Deno.test({ name: "Identifies absolute I·R·Is.", fn: () => { - for (const [iri, { fragment }] of Object.entries(exampleIRIs)) { - assertStrictEquals(isAbsoluteIRI(iri), fragment == null); + for ( + const [iri, { scheme, fragment }] of Object.entries( + exampleReferences, + ) + ) { + assertStrictEquals( + isAbsoluteIRI(iri), + iri in exampleIRIReferences && scheme != null && + fragment == null, + ); } }, }); @@ -147,8 +189,54 @@ Deno.test({ Deno.test({ name: "Identifies I·R·I references.", fn: () => { - for (const iri of Object.keys(exampleIRIReferences)) { - assert(isIRIReference(iri)); + for (const iri of Object.keys(exampleReferences)) { + assertStrictEquals( + isIRIReference(iri), + iri in exampleIRIReferences, + ); + } + }, +}); + +Deno.test({ + name: "Identifies L·E·I·R·Is.", + fn: () => { + for ( + const [leiri, { scheme }] of Object.entries(exampleReferences) + ) { + assertStrictEquals( + isLEIRI(leiri), + leiri in exampleLEIRIReferences && scheme != null, + ); + } + }, +}); + +Deno.test({ + name: "Identifies absolute L·E·I·R·Is.", + fn: () => { + for ( + const [leiri, { scheme, fragment }] of Object.entries( + exampleReferences, + ) + ) { + assertStrictEquals( + isAbsoluteLEIRI(leiri), + leiri in exampleLEIRIReferences && scheme != null && + fragment == null, + ); + } + }, +}); + +Deno.test({ + name: "Identifies L·E·I·R·I references.", + fn: () => { + for (const leiri of Object.keys(exampleReferences)) { + assertStrictEquals( + isLEIRIReference(leiri), + leiri in exampleLEIRIReferences, + ); } }, }); @@ -156,7 +244,7 @@ Deno.test({ Deno.test({ name: "Correctly parses references.", fn: () => { - for (const [iri, value] of Object.entries(exampleIRIReferences)) { + for (const [iri, value] of Object.entries(exampleReferences)) { assertEquals(parseReference(iri), { scheme: undefined, authority: undefined, @@ -172,19 +260,43 @@ Deno.test({ Deno.test({ name: "Correctly composes references.", fn: () => { - for (const [iri, value] of Object.entries(exampleIRIReferences)) { + for ( + const [iri, value] of Object.entries(exampleLEIRIReferences) + ) { assertStrictEquals(composeReference(value), iri); } }, }); Deno.test({ - name: "Converts IRIs to URIs.", + name: "Converts (L·E·)I·R·Is to U·R·Is.", fn: () => { assertStrictEquals( - iri2uri("/dir1/引き割り.html"), + escapeForURI("/dir1/引き割り.html"), "/dir1/%E5%BC%95%E3%81%8D%E5%89%B2%E3%82%8A.html", ); + assertStrictEquals( + escapeForURI(" æ\0"), + "%20%C3%A6%00", + ); + assertStrictEquals( + escapeForURI("\u{F0000}?\u{F0000}#\u{F0000}"), + "%F3%B0%80%80?%F3%B0%80%80#%F3%B0%80%80", + ); + }, +}); + +Deno.test({ + name: "Converts L·E·I·R·Is to I·R·Is.", + fn: () => { + assertStrictEquals( + escapeForIRI(" æ\0"), + "%20æ%00", + ); + assertStrictEquals( + escapeForIRI("\u{F0000}?\u{F0000}#\u{F0000}"), + "%F3%B0%80%80?\u{F0000}#%F3%B0%80%80", + ); }, });