const path·absolute = String.raw
`/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`;
const path·abempty = String.raw`(?:/(?:${segment}))*`;
-//deno-lint-ignore no-unused-vars
const path = String.raw
`${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`;
const reg·name = String.raw
const ipath·absolute = String.raw
`/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`;
const ipath·abempty = String.raw`(?:/(?:${isegment}))*`;
-//deno-lint-ignore no-unused-vars
const ipath = String.raw
`${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`;
const ireg·name = String.raw
`(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
const IRI·reference = String.raw`(?:${IRI})|(?:${irelative·ref})`;
+const leiri·iprivate = String.raw
+ `[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
+const leiri·ucschar = String.raw
+ `[ <>"{}|\\^${"`"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`;
+const leiri·iunreserved = String.raw
+ `[A-Za-z0-9\-\._~]|${leiri·ucschar}`;
+const leiri·ipchar = String.raw
+ `${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
+const leiri·ifragment = String.raw`(?:${leiri·ipchar}|[/?])*`;
+const leiri·iquery = String.raw
+ `(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`;
+const leiri·isegment·nz·nc = String.raw
+ `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
+const leiri·isegment·nz = String.raw`(?:${leiri·ipchar})+`;
+const leiri·isegment = String.raw`(?:${leiri·ipchar})*`;
+const leiri·ipath·empty = String.raw``;
+const leiri·ipath·rootless = String.raw
+ `(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`;
+const leiri·ipath·noscheme = String.raw
+ `(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`;
+const leiri·ipath·absolute = String.raw
+ `/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`;
+const leiri·ipath·abempty = String.raw`(?:/(?:${leiri·isegment}))*`;
+const leiri·ipath = String.raw
+ `${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`;
+const leiri·ireg·name = String.raw
+ `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`;
+const leiri·ihost = String.raw
+ `${IP·literal}|${IPv4address}|${leiri·ireg·name}`;
+const leiri·iuserinfo = String.raw
+ `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
+const leiri·iauthority = String.raw
+ `(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`;
+const leiri·irelative·part = String.raw
+ `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`;
+const leiri·irelative·ref = String.raw
+ `(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
+const leiri·ihier·part = String.raw
+ `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`;
+const absolute·LEIRI = String.raw
+ `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`;
+const LEIRI = String.raw
+ `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
+const LEIRI·reference = String.raw
+ `(?:${LEIRI})|(?:${leiri·irelative·ref})`;
+
export const {
isAbsoluteURI, // U·R·I with no fragment
isURI,
+ isURIPath,
isURIReference,
+ isURISuffix, // only authority, path, query, fragment
isAbsoluteIRI, // I·R·I with no fragment
isIRI,
+ isIRIPath,
isIRIReference,
+ isIRISuffix, // only authority, path, query, fragment
+ isAbsoluteLEIRI, // L·E·I·R·I with no fragment
+ isLEIRI,
+ isLEIRIPath,
+ isLEIRIReference,
+ isLEIRISuffix, // only authority, path, query, fragment
} = Object.fromEntries(
Object.entries({
+ isAbsoluteLEIRI: absolute·LEIRI,
isAbsoluteIRI: absolute·IRI,
isAbsoluteURI: absolute·URI,
+ isLEIRI: LEIRI,
+ isLEIRIPath: leiri·ipath,
+ isLEIRIReference: LEIRI·reference,
+ isLEIRISuffix: String.raw
+ `(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`,
isIRI: IRI,
+ isIRIPath: ipath,
isIRIReference: IRI·reference,
+ isIRISuffix: String.raw
+ `(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`,
isURI: URI,
+ isURIPath: path,
isURIReference: URI·reference,
+ isURISuffix: String.raw
+ `(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`,
}).map(([key, value]) => {
const regExp = new RegExp(`^(?:${value})$`, "u");
return [
);
/**
- * Recomposes an I·R·I reference from its component parts.
+ * Recomposes an (L·E·)I·R·I reference from its component parts.
*
* See §5.3 of R·F·C 3986.
*/
const result = [];
const { scheme, authority, path, query, fragment } = $;
if (scheme != null) {
+ // A scheme is present.
result.push(scheme, ":");
} else {
+ // No scheme is present.
/* do nothing */
}
if (authority != null) {
+ // An authority is present.
result.push("//", authority);
} else {
+ // No authority is present.
/* do nothing */
}
result.push(path ?? "");
if (query != null) {
+ // A query is present.
result.push("?", query);
} else {
+ // No query is present.
/* do nothing */
}
if (fragment != null) {
+ // A fragment is present.
result.push("#", fragment);
} else {
+ // No fragment is present.
/* do nothing */
}
return result.join("");
};
/**
- * Converts an I·R·I to the corresponding U·R·I by percent‐encoding
+ * Converts an L·E·I·R·I to the corresponding I·R·I by percent‐encoding
* unsupported characters.
*
+ * This function is somewhat complex because the I·R·I syntax allows
+ * private·use characters *only* in the query.
+ */
+export const escapeForIRI = ($) => {
+ const components = parseReference($);
+ const encoder = new TextEncoder();
+
+ // The path will always be present (although perhaps empty) on a
+ // successful parse. If it isn’t (and parsing failed), treat the
+ // entire input as the path.
+ components.path ??= `${$}`;
+
+ // Escape disallowed codepoints in each component and compose an
+ // I·R·I from the result.
+ return composeReference(
+ Object.fromEntries(
+ Object.entries(components).map(
+ ([componentName, componentValue]) => [
+ componentName,
+ componentValue == null ? undefined : [...function* () {
+ for (const character of componentValue) {
+ if (
+ new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u")
+ .test(
+ character,
+ ) &&
+ !new RegExp(
+ `${ucschar}${
+ componentName == "query" ? `|${iprivate}` : ""
+ }`,
+ "u",
+ ).test(character)
+ ) {
+ // This codepoint needs to be escaped.
+ for (const byte of encoder.encode(character)) {
+ yield `%${byte < 0x10 ? "0" : ""}${
+ byte.toString(0x10).toUpperCase()
+ }`;
+ }
+ } else {
+ // This codepoint does not need escaping.
+ yield character;
+ }
+ }
+ }()].join(""),
+ ],
+ ),
+ ),
+ );
+};
+
+/**
+ * Converts an (L·E·)I·R·I to the corresponding U·R·I by
+ * percent‐encoding unsupported characters.
+ *
* This does not punycode the authority.
*/
-export const iri2uri = ($) =>
+export const escapeForURI = ($) =>
[...function* () {
const encoder = new TextEncoder();
- for (const character of $) {
- if (new RegExp(`${ucschar}|${iprivate}`, "u").test(character)) {
+ for (const character of `${$}`) {
+ if (
+ new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u").test(
+ character,
+ )
+ ) {
+ // This codepoint needs to be escaped.
for (const byte of encoder.encode(character)) {
- yield `%${byte.toString(0x10).toUpperCase()}`;
+ yield `%${byte < 0x10 ? "0" : ""}${
+ byte.toString(0x10).toUpperCase()
+ }`;
}
} else {
+ // This codepoint doesn’t need escaping.
yield character;
}
}
/**
* Returns the `scheme`, `authority`, `path`, `query`, and `fragment`
- * of the provided I·R·I reference.
+ * of the provided (L·E·)I·R·I reference.
+ *
+ * `path` will always be defined for valid references, and will be
+ * undefined for values which are not valid L·E·I·R·Is.
*/
export const parseReference = ($) => {
const regExp = new RegExp(
String.raw
- `^(?:(?<absolute·scheme>${scheme}):(?://(?<absolute·authority>${iauthority})(?<absolute·patha>${ipath·abempty})|(?<absolute·pathb>(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})))(?:\?(?<absolute·query>${iquery}))?(?:#(?<absolute·fragment>${ifragment}))?|(?://(?<relative·authority>${iauthority})(?<relative·patha>${ipath·abempty})|(?<relative·pathb>(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})))(?:\?(?<relative·query>${iquery}))?(?:#(?<relative·fragment>${ifragment}))?)$`,
+ `^(?:(?<absolute·scheme>${scheme}):(?://(?<absolute·authority>${leiri·iauthority})(?<absolute·patha>${leiri·ipath·abempty})|(?<absolute·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?<absolute·query>${leiri·iquery}))?(?:#(?<absolute·fragment>${leiri·ifragment}))?|(?://(?<relative·authority>${leiri·iauthority})(?<relative·patha>${leiri·ipath·abempty})|(?<relative·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?<relative·query>${leiri·iquery}))?(?:#(?<relative·fragment>${leiri·ifragment}))?)$`,
"u",
);
const {
};
/**
- * Removes all dot segments ("." or "..") from the provided I·R·I.
+ * Removes all dot segments ("." or "..") from the provided (L·E·)I·R·I.
*
* See §5.2.4 of R·F·C 3986.
*/
};
/**
- * Resolves the provided reference relative to the provided base I·R·I.
+ * Resolves the provided reference relative to the provided base
+ * (L·E·)I·R·I.
*
* See §5.2 of R·F·C 3986.
*/
query: Base·query,
} = parseReference(Base);
if (Base·scheme == null) {
+ // Base I·R·I’s must be valid I·R·I’s, meaning they must have a
+ // scheme.
throw new TypeError(
- `Piscēs: Base I·R·I did not have a scheme: ${Base}.`,
+ `Piscēs: Base did not have a scheme: ${Base}.`,
);
} else {
+ // The provided Base I·R·I is valid.
const {
scheme: R·scheme,
authority: R·authority,
import {
composeReference,
- iri2uri,
+ escapeForIRI,
+ escapeForURI,
isAbsoluteIRI,
+ isAbsoluteLEIRI,
isAbsoluteURI,
isIRI,
isIRIReference,
+ isLEIRI,
+ isLEIRIReference,
isURI,
isURIReference,
parseReference,
removeDotSegments,
resolveReference,
} from "./iri.js";
-import {
- assert,
- assertEquals,
- assertStrictEquals,
-} from "./dev-deps.js";
+import { assertEquals, assertStrictEquals } from "./dev-deps.js";
-const exampleURIs = {
+const exampleURIReferences = {
"ftp://ftp.is.co.za/rfc/rfc1808.txt": {
scheme: "ftp",
authority: "ftp.is.co.za",
query: "name=ferret",
fragment: "nose",
},
-};
-
-const exampleURIReferences = {
- ...exampleURIs,
"./this:that": {
path: "./this:that",
},
};
-const exampleIRIs = {
- ...exampleURIs,
+const exampleIRIReferences = {
+ ...exampleURIReferences,
"http://ヒキワリ.ナットウ.ニホン": {
scheme: "http",
authority: "ヒキワリ.ナットウ.ニホン",
authority: "JP納豆.例.jp",
path: "/dir1/引き割り.html",
},
+ "/dir1/引き割り.html": {
+ path: "/dir1/引き割り.html",
+ },
};
-const exampleIRIReferences = {
- ...exampleURIReferences,
- ...exampleIRIs,
+const exampleLEIRIReferences = {
+ ...exampleIRIReferences,
+ "http://example.com/ foo /": {
+ scheme: "http",
+ authority: "example.com",
+ path: "/ foo /",
+ },
+ "\0": {
+ path: "\0",
+ },
+};
+
+const exampleReferences = {
+ ...exampleLEIRIReferences,
+ "\uD800": {},
+ "\uFFFE": {},
+ "\uFFFF": {},
};
Deno.test({
name: "Identifies U·R·Is.",
fn: () => {
- for (const uri of Object.keys(exampleURIs)) {
- assert(isURI(uri));
+ for (
+ const [uri, { scheme }] of Object.entries(exampleReferences)
+ ) {
+ assertStrictEquals(
+ isURI(uri),
+ uri in exampleURIReferences && scheme != null,
+ );
}
},
});
Deno.test({
name: "Identifies absolute U·R·Is.",
fn: () => {
- for (const [uri, { fragment }] of Object.entries(exampleURIs)) {
- assertStrictEquals(isAbsoluteURI(uri), fragment == null);
+ for (
+ const [uri, { scheme, fragment }] of Object.entries(
+ exampleReferences,
+ )
+ ) {
+ assertStrictEquals(
+ isAbsoluteURI(uri),
+ uri in exampleURIReferences && scheme != null &&
+ fragment == null,
+ );
}
},
});
Deno.test({
name: "Identifies U·R·I references.",
fn: () => {
- for (const uri of Object.keys(exampleURIReferences)) {
- assert(isURIReference(uri));
+ for (const uri of Object.keys(exampleReferences)) {
+ assertStrictEquals(
+ isURIReference(uri),
+ uri in exampleURIReferences,
+ );
}
},
});
Deno.test({
name: "Identifies I·R·Is.",
fn: () => {
- for (const iri of Object.keys(exampleIRIs)) {
- assert(isIRI(iri));
+ for (
+ const [iri, { scheme }] of Object.entries(exampleReferences)
+ ) {
+ assertStrictEquals(
+ isIRI(iri),
+ iri in exampleIRIReferences && scheme != null,
+ );
}
},
});
Deno.test({
name: "Identifies absolute I·R·Is.",
fn: () => {
- for (const [iri, { fragment }] of Object.entries(exampleIRIs)) {
- assertStrictEquals(isAbsoluteIRI(iri), fragment == null);
+ for (
+ const [iri, { scheme, fragment }] of Object.entries(
+ exampleReferences,
+ )
+ ) {
+ assertStrictEquals(
+ isAbsoluteIRI(iri),
+ iri in exampleIRIReferences && scheme != null &&
+ fragment == null,
+ );
}
},
});
Deno.test({
name: "Identifies I·R·I references.",
fn: () => {
- for (const iri of Object.keys(exampleIRIReferences)) {
- assert(isIRIReference(iri));
+ for (const iri of Object.keys(exampleReferences)) {
+ assertStrictEquals(
+ isIRIReference(iri),
+ iri in exampleIRIReferences,
+ );
+ }
+ },
+});
+
+Deno.test({
+ name: "Identifies L·E·I·R·Is.",
+ fn: () => {
+ for (
+ const [leiri, { scheme }] of Object.entries(exampleReferences)
+ ) {
+ assertStrictEquals(
+ isLEIRI(leiri),
+ leiri in exampleLEIRIReferences && scheme != null,
+ );
+ }
+ },
+});
+
+Deno.test({
+ name: "Identifies absolute L·E·I·R·Is.",
+ fn: () => {
+ for (
+ const [leiri, { scheme, fragment }] of Object.entries(
+ exampleReferences,
+ )
+ ) {
+ assertStrictEquals(
+ isAbsoluteLEIRI(leiri),
+ leiri in exampleLEIRIReferences && scheme != null &&
+ fragment == null,
+ );
+ }
+ },
+});
+
+Deno.test({
+ name: "Identifies L·E·I·R·I references.",
+ fn: () => {
+ for (const leiri of Object.keys(exampleReferences)) {
+ assertStrictEquals(
+ isLEIRIReference(leiri),
+ leiri in exampleLEIRIReferences,
+ );
}
},
});
Deno.test({
name: "Correctly parses references.",
fn: () => {
- for (const [iri, value] of Object.entries(exampleIRIReferences)) {
+ for (const [iri, value] of Object.entries(exampleReferences)) {
assertEquals(parseReference(iri), {
scheme: undefined,
authority: undefined,
Deno.test({
name: "Correctly composes references.",
fn: () => {
- for (const [iri, value] of Object.entries(exampleIRIReferences)) {
+ for (
+ const [iri, value] of Object.entries(exampleLEIRIReferences)
+ ) {
assertStrictEquals(composeReference(value), iri);
}
},
});
Deno.test({
- name: "Converts IRIs to URIs.",
+ name: "Converts (L·E·)I·R·Is to U·R·Is.",
fn: () => {
assertStrictEquals(
- iri2uri("/dir1/引き割り.html"),
+ escapeForURI("/dir1/引き割り.html"),
"/dir1/%E5%BC%95%E3%81%8D%E5%89%B2%E3%82%8A.html",
);
+ assertStrictEquals(
+ escapeForURI(" æ\0"),
+ "%20%C3%A6%00",
+ );
+ assertStrictEquals(
+ escapeForURI("\u{F0000}?\u{F0000}#\u{F0000}"),
+ "%F3%B0%80%80?%F3%B0%80%80#%F3%B0%80%80",
+ );
+ },
+});
+
+Deno.test({
+ name: "Converts L·E·I·R·Is to I·R·Is.",
+ fn: () => {
+ assertStrictEquals(
+ escapeForIRI(" æ\0"),
+ "%20æ%00",
+ );
+ assertStrictEquals(
+ escapeForIRI("\u{F0000}?\u{F0000}#\u{F0000}"),
+ "%F3%B0%80%80?\u{F0000}#%F3%B0%80%80",
+ );
},
});