]> Lady’s Gitweb - Pisces/blob - iri.js
b9682d96e91d0fc8754b66544da0a1b20166f455
[Pisces] / iri.js
1 // ♓🌟 Piscēs ∷ iri.js
2 // ====================================================================
3 //
4 // Copyright © 2020, 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 const sub·delims = String.raw`[!\$&'()*+,;=]`;
11 const gen·delims = String.raw`[:/?#\[\]@]`;
12 //deno-lint-ignore no-unused-vars
13 const reserved = String.raw`${gen·delims}|${sub·delims}`;
14 const unreserved = String.raw`[A-Za-z0-9\-\._~]`;
15 const pct·encoded = String.raw`%[0-9A-Fa-f][0-9A-Fa-f]`;
16 const dec·octet = String.raw
17 `[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]`;
18 const IPv4address = String.raw
19 `(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})`;
20 const h16 = String.raw`[0-9A-Fa-f]{1,4}`;
21 const ls32 = String.raw`(?:${h16}):(?:${h16})|${IPv4address}`;
22 const IPv6address = String.raw
23 `(?:(?:${h16}):){6}(?:${ls32})|::(?:(?:${h16}):){5}(?:${ls32})|(?:${h16})?::(?:(?:${h16}):){4}(?:${ls32})|(?:(?:(?:${h16}):){0,1}(?:${h16}))?::(?:(?:${h16}):){3}(?:${ls32})|(?:(?:(?:${h16}):){0,2}(?:${h16}))?::(?:(?:${h16}):){2}(?:${ls32})|(?:(?:(?:${h16}):){0,3}(?:${h16}))?::(?:${h16}):(?:${ls32})|(?:(?:(?:${h16}):){0,4}(?:${h16}))?::(?:${ls32})|(?:(?:(?:${h16}):){0,5}(?:${h16}))?::(?:${h16})|(?:(?:(?:${h16}):){0,6}(?:${h16}))?::`;
24 const IPvFuture = String.raw
25 `v[0-9A-Fa-f]{1,}\.(?:${unreserved}|${sub·delims}|:)`;
26 const IP·literal = String.raw`\[(?:${IPv6address}|${IPvFuture})\]`;
27 const port = String.raw`[0-9]*`;
28 const scheme = String.raw`[A-Za-z][A-Za-z0-9+\-\.]*`;
29 const pchar = String.raw
30 `${unreserved}|${pct·encoded}|${sub·delims}|[:@]`;
31 const fragment = String.raw`(?:${pchar}|[/?])*`;
32 const query = String.raw`(?:${pchar}|[/?])*`;
33 const segment·nz·nc = String.raw
34 `(?:${unreserved}|${pct·encoded}|${sub·delims}|@)+`;
35 const segment·nz = String.raw`(?:${pchar})+`;
36 const segment = String.raw`(?:${pchar})*`;
37 const path·empty = String.raw``;
38 const path·rootless = String.raw
39 `(?:${segment·nz})(?:/(?:${segment}))*`;
40 const path·noscheme = String.raw
41 `(?:${segment·nz·nc})(?:/(?:${segment}))*`;
42 const path·absolute = String.raw
43 `/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`;
44 const path·abempty = String.raw`(?:/(?:${segment}))*`;
45 const path = String.raw
46 `${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`;
47 const reg·name = String.raw
48 `(?:${unreserved}|${pct·encoded}|${sub·delims})*`;
49 const host = String.raw`${IP·literal}|${IPv4address}|${reg·name}`;
50 const userinfo = String.raw
51 `(?:${unreserved}|${pct·encoded}|${sub·delims}|:)*`;
52 const authority = String.raw
53 `(?:(?:${userinfo})@)?(?:${host})(?::(?:${port}))?`;
54 const relative·part = String.raw
55 `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·noscheme})|(?:${path·empty})`;
56 const relative·ref = String.raw
57 `(?:${relative·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
58 const hier·part = String.raw
59 `//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·rootless})|(?:${path·empty})`;
60 const absolute·URI = String.raw
61 `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?`;
62 const URI = String.raw
63 `(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
64 const URI·reference = String.raw`(?:${URI})|(?:${relative·ref})`;
65
66 const iprivate = String.raw
67 `[\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
68 const ucschar = String.raw
69 `[\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E0000}-\u{EFFFD}]`;
70 const iunreserved = String.raw`[A-Za-z0-9\-\._~]|${ucschar}`;
71 const ipchar = String.raw
72 `${iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
73 const ifragment = String.raw`(?:${ipchar}|[/?])*`;
74 const iquery = String.raw`(?:${ipchar}|${iprivate}|[/?])*`;
75 const isegment·nz·nc = String.raw
76 `(?:${iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
77 const isegment·nz = String.raw`(?:${ipchar})+`;
78 const isegment = String.raw`(?:${ipchar})*`;
79 const ipath·empty = String.raw``;
80 const ipath·rootless = String.raw
81 `(?:${isegment·nz})(?:/(?:${isegment}))*`;
82 const ipath·noscheme = String.raw
83 `(?:${isegment·nz·nc})(?:/(?:${isegment}))*`;
84 const ipath·absolute = String.raw
85 `/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`;
86 const ipath·abempty = String.raw`(?:/(?:${isegment}))*`;
87 const ipath = String.raw
88 `${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`;
89 const ireg·name = String.raw
90 `(?:${iunreserved}|${pct·encoded}|${sub·delims})*`;
91 const ihost = String.raw`${IP·literal}|${IPv4address}|${ireg·name}`;
92 const iuserinfo = String.raw
93 `(?:${iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
94 const iauthority = String.raw
95 `(?:(?:${iuserinfo})@)?(?:${ihost})(?::(?:${port}))?`;
96 const irelative·part = String.raw
97 `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})`;
98 const irelative·ref = String.raw
99 `(?:${irelative·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
100 const ihier·part = String.raw
101 `//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})`;
102 const absolute·IRI = String.raw
103 `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?`;
104 const IRI = String.raw
105 `(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
106 const IRI·reference = String.raw`(?:${IRI})|(?:${irelative·ref})`;
107
108 const leiri·iprivate = String.raw
109 `[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
110 const leiri·ucschar = String.raw
111 `[ <>"{}|\\^${"`"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`;
112 const leiri·iunreserved = String.raw
113 `[A-Za-z0-9\-\._~]|${leiri·ucschar}`;
114 const leiri·ipchar = String.raw
115 `${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
116 const leiri·ifragment = String.raw`(?:${leiri·ipchar}|[/?])*`;
117 const leiri·iquery = String.raw
118 `(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`;
119 const leiri·isegment·nz·nc = String.raw
120 `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
121 const leiri·isegment·nz = String.raw`(?:${leiri·ipchar})+`;
122 const leiri·isegment = String.raw`(?:${leiri·ipchar})*`;
123 const leiri·ipath·empty = String.raw``;
124 const leiri·ipath·rootless = String.raw
125 `(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`;
126 const leiri·ipath·noscheme = String.raw
127 `(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`;
128 const leiri·ipath·absolute = String.raw
129 `/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`;
130 const leiri·ipath·abempty = String.raw`(?:/(?:${leiri·isegment}))*`;
131 const leiri·ipath = String.raw
132 `${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`;
133 const leiri·ireg·name = String.raw
134 `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`;
135 const leiri·ihost = String.raw
136 `${IP·literal}|${IPv4address}|${leiri·ireg·name}`;
137 const leiri·iuserinfo = String.raw
138 `(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
139 const leiri·iauthority = String.raw
140 `(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`;
141 const leiri·irelative·part = String.raw
142 `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`;
143 const leiri·irelative·ref = String.raw
144 `(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
145 const leiri·ihier·part = String.raw
146 `//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`;
147 const absolute·LEIRI = String.raw
148 `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`;
149 const LEIRI = String.raw
150 `(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
151 const LEIRI·reference = String.raw
152 `(?:${LEIRI})|(?:${leiri·irelative·ref})`;
153
154 export const {
155 isAbsoluteURI, // U·R·I with no fragment
156 isURI,
157 isURIPath,
158 isURIReference,
159 isURISuffix, // only authority, path, query, fragment
160 isAbsoluteIRI, // I·R·I with no fragment
161 isIRI,
162 isIRIPath,
163 isIRIReference,
164 isIRISuffix, // only authority, path, query, fragment
165 isAbsoluteLEIRI, // L·E·I·R·I with no fragment
166 isLEIRI,
167 isLEIRIPath,
168 isLEIRIReference,
169 isLEIRISuffix, // only authority, path, query, fragment
170 } = Object.fromEntries(
171 Object.entries({
172 isAbsoluteLEIRI: absolute·LEIRI,
173 isAbsoluteIRI: absolute·IRI,
174 isAbsoluteURI: absolute·URI,
175 isLEIRI: LEIRI,
176 isLEIRIPath: leiri·ipath,
177 isLEIRIReference: LEIRI·reference,
178 isLEIRISuffix: String.raw
179 `(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`,
180 isIRI: IRI,
181 isIRIPath: ipath,
182 isIRIReference: IRI·reference,
183 isIRISuffix: String.raw
184 `(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`,
185 isURI: URI,
186 isURIPath: path,
187 isURIReference: URI·reference,
188 isURISuffix: String.raw
189 `(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`,
190 }).map(([key, value]) => {
191 const regExp = new RegExp(`^(?:${value})$`, "u");
192 return [
193 key,
194 Object.defineProperties(
195 ($) => typeof $ == "string" && regExp.test($),
196 {
197 name: { value: key },
198 [Symbol.match]: {
199 configurable: true,
200 enumerable: false,
201 get: () => regExp[Symbol.match].bind(regExp),
202 set: undefined,
203 },
204 },
205 ),
206 ];
207 }),
208 );
209
210 /**
211 * Recomposes an (L·E·)I·R·I reference from its component parts.
212 *
213 * See §5.3 of R·F·C 3986.
214 */
215 export const composeReference = ($) => {
216 const result = [];
217 const { scheme, authority, path, query, fragment } = $;
218 if (scheme != null) {
219 // A scheme is present.
220 result.push(scheme, ":");
221 } else {
222 // No scheme is present.
223 /* do nothing */
224 }
225 if (authority != null) {
226 // An authority is present.
227 result.push("//", authority);
228 } else {
229 // No authority is present.
230 /* do nothing */
231 }
232 result.push(path ?? "");
233 if (query != null) {
234 // A query is present.
235 result.push("?", query);
236 } else {
237 // No query is present.
238 /* do nothing */
239 }
240 if (fragment != null) {
241 // A fragment is present.
242 result.push("#", fragment);
243 } else {
244 // No fragment is present.
245 /* do nothing */
246 }
247 return result.join("");
248 };
249
250 /**
251 * Converts an L·E·I·R·I to the corresponding I·R·I by percent‐encoding
252 * unsupported characters.
253 *
254 * This function is somewhat complex because the I·R·I syntax allows
255 * private·use characters *only* in the query.
256 */
257 export const escapeForIRI = ($) => {
258 const components = parseReference($);
259 const encoder = new TextEncoder();
260
261 // The path will always be present (although perhaps empty) on a
262 // successful parse. If it isn’t (and parsing failed), treat the
263 // entire input as the path.
264 components.path ??= `${$}`;
265
266 // Escape disallowed codepoints in each component and compose an
267 // I·R·I from the result.
268 return composeReference(
269 Object.fromEntries(
270 Object.entries(components).map(
271 ([componentName, componentValue]) => [
272 componentName,
273 componentValue == null ? undefined : [...function* () {
274 for (const character of componentValue) {
275 if (
276 new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u")
277 .test(
278 character,
279 ) &&
280 !new RegExp(
281 `${ucschar}${
282 componentName == "query" ? `|${iprivate}` : ""
283 }`,
284 "u",
285 ).test(character)
286 ) {
287 // This codepoint needs to be escaped.
288 for (const byte of encoder.encode(character)) {
289 yield `%${byte < 0x10 ? "0" : ""}${
290 byte.toString(0x10).toUpperCase()
291 }`;
292 }
293 } else {
294 // This codepoint does not need escaping.
295 yield character;
296 }
297 }
298 }()].join(""),
299 ],
300 ),
301 ),
302 );
303 };
304
305 /**
306 * Converts an (L·E·)I·R·I to the corresponding U·R·I by
307 * percent‐encoding unsupported characters.
308 *
309 * This does not punycode the authority.
310 */
311 export const escapeForURI = ($) =>
312 [...function* () {
313 const encoder = new TextEncoder();
314 for (const character of `${$}`) {
315 if (
316 new RegExp(`${leiri·ucschar}|${leiri·iprivate}`, "u").test(
317 character,
318 )
319 ) {
320 // This codepoint needs to be escaped.
321 for (const byte of encoder.encode(character)) {
322 yield `%${byte < 0x10 ? "0" : ""}${
323 byte.toString(0x10).toUpperCase()
324 }`;
325 }
326 } else {
327 // This codepoint doesn’t need escaping.
328 yield character;
329 }
330 }
331 }()].join("");
332
333 /**
334 * Merges a reference path with a base path.
335 *
336 * See §5.2.3 of R·F·C 3986.
337 */
338 export const mergePaths = (base, reference) => {
339 const baseStr = `${base}`;
340 return `${
341 baseStr.substring(0, baseStr.lastIndexOf("/") + 1)
342 }${reference}`;
343 };
344
345 /**
346 * Returns the `scheme`, `authority`, `path`, `query`, and `fragment`
347 * of the provided (L·E·)I·R·I reference.
348 *
349 * `path` will always be defined for valid references, and will be
350 * undefined for values which are not valid L·E·I·R·Is.
351 */
352 export const parseReference = ($) => {
353 const regExp = new RegExp(
354 String.raw
355 `^(?:(?<absolute·scheme>${scheme}):(?://(?<absolute·authority>${leiri·iauthority})(?<absolute·patha>${leiri·ipath·abempty})|(?<absolute·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?<absolute·query>${leiri·iquery}))?(?:#(?<absolute·fragment>${leiri·ifragment}))?|(?://(?<relative·authority>${leiri·iauthority})(?<relative·patha>${leiri·ipath·abempty})|(?<relative·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?<relative·query>${leiri·iquery}))?(?:#(?<relative·fragment>${leiri·ifragment}))?)$`,
356 "u",
357 );
358 const {
359 absolute·scheme,
360 absolute·authority,
361 absolute·patha,
362 absolute·pathb,
363 absolute·query,
364 absolute·fragment,
365 relative·authority,
366 relative·patha,
367 relative·pathb,
368 relative·query,
369 relative·fragment,
370 } = regExp.exec($)?.groups ?? {};
371 return {
372 scheme: absolute·scheme,
373 authority: absolute·authority ?? relative·authority,
374 path: absolute·patha ?? absolute·pathb ?? relative·patha ??
375 relative·pathb,
376 query: absolute·query ?? relative·query,
377 fragment: absolute·fragment ?? relative·fragment,
378 };
379 };
380
381 /**
382 * Removes all dot segments ("." or "..") from the provided (L·E·)I·R·I.
383 *
384 * See §5.2.4 of R·F·C 3986.
385 */
386 export const removeDotSegments = ($) => {
387 const input = `${$}`;
388 const output = [];
389 const { length } = input;
390 let index = 0;
391 while (index < length) {
392 if (input.startsWith("../", index)) {
393 // The input starts with a double leader; drop it. This can only
394 // occur at the beginning of the input.
395 index += 3;
396 } else if (input.startsWith("./", index)) {
397 // The input starts with a single leader; drop it. This can only
398 // occur at the beginning of the input.
399 index += 2;
400 } else if (input.startsWith("/./", index)) {
401 // The input starts with a slash, single leader, and another
402 // slash. Ignore it, and move the input to just before the second
403 // slash.
404 index += 2;
405 } else if (input.startsWith("/.", index) && index + 2 == length) {
406 // The input starts with a slash and single leader, and this
407 // exhausts the string. Push an empty segment and move the index
408 // to the end of the string.
409 output.push("/");
410 index = length;
411 } else if (input.startsWith("/../", index)) {
412 // The input starts with a slash, double leader, and another
413 // slash. Drop a segment from the output, and move the input to
414 // just before the second slash.
415 index += 3;
416 output.splice(-1, 1);
417 } else if (input.startsWith("/..", index) && index + 3 == length) {
418 // The input starts with a slash and single leader, and this
419 // exhausts the string. Drop a segment from the output, push an
420 // empty segment, and move the index to the end of the string.
421 output.splice(-1, 1, "/");
422 index = length;
423 } else if (
424 input.startsWith(".", index) && index + 1 == length ||
425 input.startsWith("..", index) && index + 2 == length
426 ) {
427 // The input starts with a single or double leader, and this
428 // exhausts the string. Do nothing (this can only occur at the
429 // beginning of input) and move the index to the end of the
430 // string.
431 index = length;
432 } else {
433 // The input does not start with a leader. Advance the index to
434 // the position before the next slash and push the segment
435 // between the old and new positions.
436 const nextIndex = input.indexOf("/", index + 1);
437 if (nextIndex == -1) {
438 // No slash remains; set index to the end of the string.
439 output.push(input.substring(index));
440 index = length;
441 } else {
442 // There are further path segments.
443 output.push(input.substring(index, nextIndex));
444 index = nextIndex;
445 }
446 }
447 }
448 return output.join("");
449 };
450
451 /**
452 * Resolves the provided reference relative to the provided base
453 * (L·E·)I·R·I.
454 *
455 * See §5.2 of R·F·C 3986.
456 */
457 export const resolveReference = (R, Base = location ?? "") => {
458 const {
459 scheme: Base·scheme,
460 authority: Base·authority,
461 path: Base·path,
462 query: Base·query,
463 } = parseReference(Base);
464 if (Base·scheme == null) {
465 // Base I·R·I’s must be valid I·R·I’s, meaning they must have a
466 // scheme.
467 throw new TypeError(
468 `Piscēs: Base did not have a scheme: ${Base}.`,
469 );
470 } else {
471 // The provided Base I·R·I is valid.
472 const {
473 scheme: R·scheme,
474 authority: R·authority,
475 path: R·path,
476 query: R·query,
477 fragment: R·fragment,
478 } = parseReference(R);
479 return composeReference(
480 R·scheme != null
481 ? {
482 scheme: R·scheme,
483 authority: R·authority,
484 path: removeDotSegments(R·path),
485 query: R·query,
486 fragment: R·fragment,
487 }
488 : R·authority != null
489 ? {
490 scheme: Base·scheme,
491 authority: R·authority,
492 path: removeDotSegments(R·path),
493 query: R·query,
494 fragment: R·fragment,
495 }
496 : !R·path
497 ? {
498 scheme: Base·scheme,
499 authority: Base·authority,
500 path: Base·path,
501 query: R·query ?? Base·query,
502 fragment: R·fragment,
503 }
504 : {
505 scheme: Base·scheme,
506 authority: Base·authority,
507 path: R·path[0] == "/"
508 ? removeDotSegments(R·path)
509 : removeDotSegments(mergePaths(Base·path || "/", R·path)),
510 query: R·query,
511 fragment: R·fragment,
512 },
513 );
514 }
515 };
This page took 0.101545 seconds and 3 git commands to generate.