]> Lady’s Gitweb - Pisces/blob - iri.js
a3e454689346b998df91fb413316af89219ef033
[Pisces] / iri.js
1 // ♓🌟 Piscēs ∷ iri.js
2 // ====================================================================
3 //
4 // Copyright © 2020, 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind } from "./function.js";
11 import {
12 defineOwnProperties,
13 namedEntries,
14 objectFromEntries,
15 } from "./object.js";
16
17 const sub·delims = String.raw`[!\$&'()*+,;=]`;
18 const gen·delims = String.raw`[:/?#\[\]@]`;
19 //deno-lint-ignore no-unused-vars
20 const reserved = String.raw`${gen·delims}|${sub·delims}`;
21 const unreserved = String.raw`[A-Za-z0-9\-\._~]`;
22 const pct·encoded = String.raw`%[0-9A-Fa-f][0-9A-Fa-f]`;
23 const dec·octet = String.raw
24 `[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]`;
25 const IPv4address = String.raw
26 `(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})`;
27 const h16 = String.raw`[0-9A-Fa-f]{1,4}`;
28 const ls32 = String.raw`(?:${h16}):(?:${h16})|${IPv4address}`;
29 const IPv6address = String.raw
30 `(?:(?:${h16}):){6}(?:${ls32})|::(?:(?:${h16}):){5}(?:${ls32})|(?:${h16})?::(?:(?:${h16}):){4}(?:${ls32})|(?:(?:(?:${h16}):){0,1}(?:${h16}))?::(?:(?:${h16}):){3}(?:${ls32})|(?:(?:(?:${h16}):){0,2}(?:${h16}))?::(?:(?:${h16}):){2}(?:${ls32})|(?:(?:(?:${h16}):){0,3}(?:${h16}))?::(?:${h16}):(?:${ls32})|(?:(?:(?:${h16}):){0,4}(?:${h16}))?::(?:${ls32})|(?:(?:(?:${h16}):){0,5}(?:${h16}))?::(?:${h16})|(?:(?:(?:${h16}):){0,6}(?:${h16}))?::`;
31 const IPvFuture = String.raw
32 `v[0-9A-Fa-f]{1,}\.(?:${unreserved}|${sub·delims}|:)`;
33 const IP·literal = String.raw`\[(?:${IPv6address}|${IPvFuture})\]`;
34 const port = String.raw`[0-9]*`;
35 const scheme = String.raw`[A-Za-z][A-Za-z0-9+\-\.]*`;
36 const pchar = String.raw
37 `${unreserved}|${pct·encoded}|${sub·delims}|[:@]`;
38 const fragment = String.raw`(?:${pchar}|[/?])*`;
39 const query = String.raw`(?:${pchar}|[/?])*`;
40 const segment·nz·nc = String.raw
41 `(?:${unreserved}|${pct·encoded}|${sub·delims}|@)+`;
42 const segment·nz = String.raw`(?:${pchar})+`;
43 const segment = String.raw`(?:${pchar})*`;
44 const path·empty = String.raw``;
45 const path·rootless = String.raw
46 `(?:${segment·nz})(?:/(?:${segment}))*`;
47 const path·noscheme = String.raw
48 `(?:${segment·nz·nc})(?:/(?:${segment}))*`;
49 const path·absolute = String.raw
50 `/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`;
51 const path·abempty = String.raw`(?:/(?:${segment}))*`;
52 const path = String.raw
53 `${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`;
54 const reg·name = String.raw
55 `(?:${unreserved}|${pct·encoded}|${sub·delims})*`;
56 const host = String.raw`${IP·literal}|${IPv4address}|${reg·name}`;
57 const userinfo = String.raw
58 `(?:${unreserved}|${pct·encoded}|${sub·delims}|:)*`;
59 const authority = String.raw
60 `(?:(?:${userinfo})@)?(?:${host})(?::(?:${port}))?`;
61 const relative·part = String.raw
62 `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·noscheme})|(?:${path·empty})`;
63 const relative·ref = String.raw
64 `(?:${relative·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
65 const hier·part = String.raw
66 `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·rootless})|(?:${path·empty})`;
67 const absolute·URI = String.raw
68 `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?`;
69 const URI = String.raw
70 `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
71 const URI·reference = String.raw`(?:${URI})|(?:${relative·ref})`;
72
73 const iprivate = String.raw
74 `[\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
75 const ucschar = String.raw
76 `[\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E0000}-\u{EFFFD}]`;
77 const iunreserved = String.raw`[A-Za-z0-9\-\._~]|${ucschar}`;
78 const ipchar = String.raw
79 `${iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
80 const ifragment = String.raw`(?:${ipchar}|[/?])*`;
81 const iquery = String.raw`(?:${ipchar}|${iprivate}|[/?])*`;
82 const isegment·nz·nc = String.raw
83 `(?:${iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
84 const isegment·nz = String.raw`(?:${ipchar})+`;
85 const isegment = String.raw`(?:${ipchar})*`;
86 const ipath·empty = String.raw``;
87 const ipath·rootless = String.raw
88 `(?:${isegment·nz})(?:/(?:${isegment}))*`;
89 const ipath·noscheme = String.raw
90 `(?:${isegment·nz·nc})(?:/(?:${isegment}))*`;
91 const ipath·absolute = String.raw
92 `/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`;
93 const ipath·abempty = String.raw`(?:/(?:${isegment}))*`;
94 const ipath = String.raw
95 `${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`;
96 const ireg·name = String.raw
97 `(?:${iunreserved}|${pct·encoded}|${sub·delims})*`;
98 const ihost = String.raw`${IP·literal}|${IPv4address}|${ireg·name}`;
99 const iuserinfo = String.raw
100 `(?:${iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
101 const iauthority = String.raw
102 `(?:(?:${iuserinfo})@)?(?:${ihost})(?::(?:${port}))?`;
103 const irelative·part = String.raw
104 `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})`;
105 const irelative·ref = String.raw
106 `(?:${irelative·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
107 const ihier·part = String.raw
108 `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})`;
109 const absolute·IRI = String.raw
110 `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?`;
111 const IRI = String.raw
112 `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
113 const IRI·reference = String.raw`(?:${IRI})|(?:${irelative·ref})`;
114
115 const leiri·iprivate = String.raw
116 `[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
117 const leiri·ucschar = String.raw
118 `[ <>"{}|\\^${"`"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`;
119 const leiri·iunreserved = String.raw
120 `[A-Za-z0-9\-\._~]|${leiri·ucschar}`;
121 const leiri·ipchar = String.raw
122 `${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
123 const leiri·ifragment = String.raw`(?:${leiri·ipchar}|[/?])*`;
124 const leiri·iquery = String.raw
125 `(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`;
126 const leiri·isegment·nz·nc = String.raw
127 `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
128 const leiri·isegment·nz = String.raw`(?:${leiri·ipchar})+`;
129 const leiri·isegment = String.raw`(?:${leiri·ipchar})*`;
130 const leiri·ipath·empty = String.raw``;
131 const leiri·ipath·rootless = String.raw
132 `(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`;
133 const leiri·ipath·noscheme = String.raw
134 `(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`;
135 const leiri·ipath·absolute = String.raw
136 `/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`;
137 const leiri·ipath·abempty = String.raw`(?:/(?:${leiri·isegment}))*`;
138 const leiri·ipath = String.raw
139 `${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`;
140 const leiri·ireg·name = String.raw
141 `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`;
142 const leiri·ihost = String.raw
143 `${IP·literal}|${IPv4address}|${leiri·ireg·name}`;
144 const leiri·iuserinfo = String.raw
145 `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
146 const leiri·iauthority = String.raw
147 `(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`;
148 const leiri·irelative·part = String.raw
149 `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`;
150 const leiri·irelative·ref = String.raw
151 `(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
152 const leiri·ihier·part = String.raw
153 `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`;
154 const absolute·LEIRI = String.raw
155 `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`;
156 const LEIRI = String.raw
157 `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
158 const LEIRI·reference = String.raw
159 `(?:${LEIRI})|(?:${leiri·irelative·ref})`;
160
161 export const {
162 isAbsoluteURI, // U·R·I with no fragment
163 isURI,
164 isURIPath,
165 isURIReference,
166 isURISuffix, // only authority, path, query, fragment
167 isAbsoluteIRI, // I·R·I with no fragment
168 isIRI,
169 isIRIPath,
170 isIRIReference,
171 isIRISuffix, // only authority, path, query, fragment
172 isAbsoluteLEIRI, // L·E·I·R·I with no fragment
173 isLEIRI,
174 isLEIRIPath,
175 isLEIRIReference,
176 isLEIRISuffix, // only authority, path, query, fragment
177 } = objectFromEntries(
178 namedEntries({
179 isAbsoluteLEIRI: absolute·LEIRI,
180 isAbsoluteIRI: absolute·IRI,
181 isAbsoluteURI: absolute·URI,
182 isLEIRI: LEIRI,
183 isLEIRIPath: leiri·ipath,
184 isLEIRIReference: LEIRI·reference,
185 isLEIRISuffix: String.raw
186 `(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`,
187 isIRI: IRI,
188 isIRIPath: ipath,
189 isIRIReference: IRI·reference,
190 isIRISuffix: String.raw
191 `(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`,
192 isURI: URI,
193 isURIPath: path,
194 isURIReference: URI·reference,
195 isURISuffix: String.raw
196 `(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`,
197 }).map(([key, value]) => {
198 const regExp = new RegExp(`^(?:${value})$`, "u");
199 return [
200 key,
201 defineOwnProperties(
202 ($) => typeof $ == "string" && regExp.test($),
203 {
204 name: { value: key },
205 [Symbol.match]: {
206 configurable: true,
207 enumerable: false,
208 get: () => bind(regExp[Symbol.match], regExp, []),
209 set: undefined,
210 },
211 },
212 ),
213 ];
214 }),
215 );
216
217 /**
218 * Recomposes an (L·E·)I·R·I reference from its component parts.
219 *
220 * See §5.3 of R·F·C 3986.
221 */
222 export const composeReference = ($) => {
223 const result = [];
224 const { scheme, authority, path, query, fragment } = $;
225 if (scheme != null) {
226 // A scheme is present.
227 result.push(scheme, ":");
228 } else {
229 // No scheme is present.
230 /* do nothing */
231 }
232 if (authority != null) {
233 // An authority is present.
234 result.push("//", authority);
235 } else {
236 // No authority is present.
237 /* do nothing */
238 }
239 result.push(path ?? "");
240 if (query != null) {
241 // A query is present.
242 result.push("?", query);
243 } else {
244 // No query is present.
245 /* do nothing */
246 }
247 if (fragment != null) {
248 // A fragment is present.
249 result.push("#", fragment);
250 } else {
251 // No fragment is present.
252 /* do nothing */
253 }
254 return result.join("");
255 };
256
257 /**
258 * Converts an L·E·I·R·I to the corresponding I·R·I by percent‐encoding
259 * unsupported characters.
260 *
261 * This function is somewhat complex because the I·R·I syntax allows
262 * private·use characters *only* in the query.
263 */
264 export const escapeForIRI = ($) => {
265 const components = parseReference($);
266 const encoder = new TextEncoder();
267
268 // The path will always be present (although perhaps empty) on a
269 // successful parse. If it isn’t (and parsing failed), treat the
270 // entire input as the path.
271 components.path ??= `${$}`;
272
273 // Escape disallowed codepoints in each component and compose an
274 // I·R·I from the result.
275 return composeReference(
276 objectFromEntries(
277 namedEntries(components).map(
278 ([componentName, componentValue]) => [
279 componentName,
280 componentValue == null ? undefined : [...function* () {
281 for (const character of componentValue) {
282 if (
283 new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u")
284 .test(
285 character,
286 ) &&
287 !new RegExp(
288 `${ucschar}${
289 componentName == "query" ? `|${iprivate}` : ""
290 }`,
291 "u",
292 ).test(character)
293 ) {
294 // This codepoint needs to be escaped.
295 for (const byte of encoder.encode(character)) {
296 yield `%${byte < 0x10 ? "0" : ""}${
297 byte.toString(0x10).toUpperCase()
298 }`;
299 }
300 } else {
301 // This codepoint does not need escaping.
302 yield character;
303 }
304 }
305 }()].join(""),
306 ],
307 ),
308 ),
309 );
310 };
311
312 /**
313 * Converts an (L·E·)I·R·I to the corresponding U·R·I by
314 * percent‐encoding unsupported characters.
315 *
316 * This does not punycode the authority.
317 */
318 export const escapeForURI = ($) =>
319 [...function* () {
320 const encoder = new TextEncoder();
321 for (const character of `${$}`) {
322 if (
323 new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u").test(
324 character,
325 )
326 ) {
327 // This codepoint needs to be escaped.
328 for (const byte of encoder.encode(character)) {
329 yield `%${byte < 0x10 ? "0" : ""}${
330 byte.toString(0x10).toUpperCase()
331 }`;
332 }
333 } else {
334 // This codepoint doesn’t need escaping.
335 yield character;
336 }
337 }
338 }()].join("");
339
340 /**
341 * Merges a reference path with a base path.
342 *
343 * See §5.2.3 of R·F·C 3986.
344 */
345 export const mergePaths = (base, reference) => {
346 const baseStr = `${base}`;
347 return `${
348 baseStr.substring(0, baseStr.lastIndexOf("/") + 1)
349 }${reference}`;
350 };
351
352 /**
353 * Returns the `scheme`, `authority`, `path`, `query`, and `fragment`
354 * of the provided (L·E·)I·R·I reference.
355 *
356 * `path` will always be defined for valid references, and will be
357 * undefined for values which are not valid L·E·I·R·Is.
358 */
359 export const parseReference = ($) => {
360 const regExp = new RegExp(
361 String.raw
362 `^(?:(?<absolute·scheme>${scheme}):(?://(?<absolute·authority>${leiri·iauthority})(?<absolute·patha>${leiri·ipath·abempty})|(?<absolute·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?<absolute·query>${leiri·iquery}))?(?:#(?<absolute·fragment>${leiri·ifragment}))?|(?://(?<relative·authority>${leiri·iauthority})(?<relative·patha>${leiri·ipath·abempty})|(?<relative·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?<relative·query>${leiri·iquery}))?(?:#(?<relative·fragment>${leiri·ifragment}))?)$`,
363 "u",
364 );
365 const {
366 absolute·scheme,
367 absolute·authority,
368 absolute·patha,
369 absolute·pathb,
370 absolute·query,
371 absolute·fragment,
372 relative·authority,
373 relative·patha,
374 relative·pathb,
375 relative·query,
376 relative·fragment,
377 } = regExp.exec($)?.groups ?? {};
378 return {
379 scheme: absolute·scheme,
380 authority: absolute·authority ?? relative·authority,
381 path: absolute·patha ?? absolute·pathb ?? relative·patha ??
382 relative·pathb,
383 query: absolute·query ?? relative·query,
384 fragment: absolute·fragment ?? relative·fragment,
385 };
386 };
387
388 /**
389 * Removes all dot segments ("." or "..") from the provided (L·E·)I·R·I.
390 *
391 * See §5.2.4 of R·F·C 3986.
392 */
393 export const removeDotSegments = ($) => {
394 const input = `${$}`;
395 const output = [];
396 const { length } = input;
397 let index = 0;
398 while (index < length) {
399 if (input.startsWith("../", index)) {
400 // The input starts with a double leader; drop it. This can only
401 // occur at the beginning of the input.
402 index += 3;
403 } else if (input.startsWith("./", index)) {
404 // The input starts with a single leader; drop it. This can only
405 // occur at the beginning of the input.
406 index += 2;
407 } else if (input.startsWith("/./", index)) {
408 // The input starts with a slash, single leader, and another
409 // slash. Ignore it, and move the input to just before the second
410 // slash.
411 index += 2;
412 } else if (input.startsWith("/.", index) && index + 2 == length) {
413 // The input starts with a slash and single leader, and this
414 // exhausts the string. Push an empty segment and move the index
415 // to the end of the string.
416 output.push("/");
417 index = length;
418 } else if (input.startsWith("/../", index)) {
419 // The input starts with a slash, double leader, and another
420 // slash. Drop a segment from the output, and move the input to
421 // just before the second slash.
422 index += 3;
423 output.splice(-1, 1);
424 } else if (input.startsWith("/..", index) && index + 3 == length) {
425 // The input starts with a slash and single leader, and this
426 // exhausts the string. Drop a segment from the output, push an
427 // empty segment, and move the index to the end of the string.
428 output.splice(-1, 1, "/");
429 index = length;
430 } else if (
431 input.startsWith(".", index) && index + 1 == length ||
432 input.startsWith("..", index) && index + 2 == length
433 ) {
434 // The input starts with a single or double leader, and this
435 // exhausts the string. Do nothing (this can only occur at the
436 // beginning of input) and move the index to the end of the
437 // string.
438 index = length;
439 } else {
440 // The input does not start with a leader. Advance the index to
441 // the position before the next slash and push the segment
442 // between the old and new positions.
443 const nextIndex = input.indexOf("/", index + 1);
444 if (nextIndex == -1) {
445 // No slash remains; set index to the end of the string.
446 output.push(input.substring(index));
447 index = length;
448 } else {
449 // There are further path segments.
450 output.push(input.substring(index, nextIndex));
451 index = nextIndex;
452 }
453 }
454 }
455 return output.join("");
456 };
457
458 /**
459 * Resolves the provided reference relative to the provided base
460 * (L·E·)I·R·I.
461 *
462 * See §5.2 of R·F·C 3986.
463 */
464 export const resolveReference = (R, Base = location ?? "") => {
465 const {
466 scheme: Base·scheme,
467 authority: Base·authority,
468 path: Base·path,
469 query: Base·query,
470 } = parseReference(Base);
471 if (Base·scheme == null) {
472 // Base I·R·I’s must be valid I·R·I’s, meaning they must have a
473 // scheme.
474 throw new TypeError(
475 `Piscēs: Base did not have a scheme: ${Base}.`,
476 );
477 } else {
478 // The provided Base I·R·I is valid.
479 const {
480 scheme: R·scheme,
481 authority: R·authority,
482 path: R·path,
483 query: R·query,
484 fragment: R·fragment,
485 } = parseReference(R);
486 return composeReference(
487 R·scheme != null
488 ? {
489 scheme: R·scheme,
490 authority: R·authority,
491 path: removeDotSegments(R·path),
492 query: R·query,
493 fragment: R·fragment,
494 }
495 : R·authority != null
496 ? {
497 scheme: Base·scheme,
498 authority: R·authority,
499 path: removeDotSegments(R·path),
500 query: R·query,
501 fragment: R·fragment,
502 }
503 : !R·path
504 ? {
505 scheme: Base·scheme,
506 authority: Base·authority,
507 path: Base·path,
508 query: R·query ?? Base·query,
509 fragment: R·fragment,
510 }
511 : {
512 scheme: Base·scheme,
513 authority: Base·authority,
514 path: R·path[0] == "/"
515 ? removeDotSegments(R·path)
516 : removeDotSegments(mergePaths(Base·path || "/", R·path)),
517 query: R·query,
518 fragment: R·fragment,
519 },
520 );
521 }
522 };
This page took 0.118605 seconds and 3 git commands to generate.