2 // ====================================================================
4 // Copyright © 2020, 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
10 import { push
, splice
} from "./collection.js";
11 import { bind
, call
} from "./function.js";
12 import { objectCreate
} from "./object.js";
15 getFirstSubstringIndex
,
16 getLastSubstringIndex
,
23 import { ITERATOR
} from "./value.js";
25 const sub
·delims
= rawString
`[!\$&'()*+,;=]`;
26 const gen
·delims
= rawString
`[:/?#\[\]@]`;
27 //deno-lint-ignore no-unused-vars
28 const reserved
= rawString
`${gen·delims}|${sub·delims}`;
29 const unreserved
= rawString
`[A-Za-z0-9\-\._~]`;
30 const pct
·encoded
= rawString
`%[0-9A-Fa-f][0-9A-Fa-f]`;
32 rawString
`[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]`;
34 rawString
`(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})\.(?:${dec·octet})`;
35 const h16
= rawString
`[0-9A-Fa-f]{1,4}`;
36 const ls32
= rawString
`(?:${h16}):(?:${h16})|${IPv4address}`;
38 rawString
`(?:(?:${h16}):){6}(?:${ls32})|::(?:(?:${h16}):){5}(?:${ls32})|(?:${h16})?::(?:(?:${h16}):){4}(?:${ls32})|(?:(?:(?:${h16}):){0,1}(?:${h16}))?::(?:(?:${h16}):){3}(?:${ls32})|(?:(?:(?:${h16}):){0,2}(?:${h16}))?::(?:(?:${h16}):){2}(?:${ls32})|(?:(?:(?:${h16}):){0,3}(?:${h16}))?::(?:${h16}):(?:${ls32})|(?:(?:(?:${h16}):){0,4}(?:${h16}))?::(?:${ls32})|(?:(?:(?:${h16}):){0,5}(?:${h16}))?::(?:${h16})|(?:(?:(?:${h16}):){0,6}(?:${h16}))?::`;
40 rawString
`v[0-9A-Fa-f]{1,}\.(?:${unreserved}|${sub·delims}|:)`;
41 const IP
·literal
= rawString
`\[(?:${IPv6address}|${IPvFuture})\]`;
42 const port
= rawString
`[0-9]*`;
43 const scheme
= rawString
`[A-Za-z][A-Za-z0-9+\-\.]*`;
45 rawString
`${unreserved}|${pct·encoded}|${sub·delims}|[:@]`;
46 const fragment
= rawString
`(?:${pchar}|[/?])*`;
47 const query
= rawString
`(?:${pchar}|[/?])*`;
49 rawString
`(?:${unreserved}|${pct·encoded}|${sub·delims}|@)+`;
50 const segment
·nz
= rawString
`(?:${pchar})+`;
51 const segment
= rawString
`(?:${pchar})*`;
52 const path
·empty
= rawString
``;
53 const path
·rootless
= rawString
`(?:${segment·nz})(?:/(?:${segment}))*`;
55 rawString
`(?:${segment·nz·nc})(?:/(?:${segment}))*`;
57 rawString
`/(?:(?:${segment·nz})(?:/(?:${segment}))*)?`;
58 const path
·abempty
= rawString
`(?:/(?:${segment}))*`;
60 rawString
`${path·abempty}|${path·absolute}|${path·noscheme}|${path·rootless}|${path·empty}`;
62 rawString
`(?:${unreserved}|${pct·encoded}|${sub·delims})*`;
63 const host
= rawString
`${IP·literal}|${IPv4address}|${reg·name}`;
65 rawString
`(?:${unreserved}|${pct·encoded}|${sub·delims}|:)*`;
67 rawString
`(?:(?:${userinfo})@)?(?:${host})(?::(?:${port}))?`;
69 rawString
`//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·noscheme})|(?:${path·empty})`;
71 rawString
`(?:${relative·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
73 rawString
`//(?:${authority})(?:${path·abempty})|(?:${path·absolute})|(?:${path·rootless})|(?:${path·empty})`;
75 rawString
`(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?`;
77 rawString
`(?:${scheme}):(?:${hier·part})(?:\?(?:${query}))?(?:#(?:${fragment}))?`;
78 const URI
·reference
= rawString
`(?:${URI})|(?:${relative·ref})`;
81 rawString
`[\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
83 rawString
`[\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E0000}-\u{EFFFD}]`;
84 const iunreserved
= rawString
`[A-Za-z0-9\-\._~]|${ucschar}`;
86 rawString
`${iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
87 const ifragment
= rawString
`(?:${ipchar}|[/?])*`;
88 const iquery
= rawString
`(?:${ipchar}|${iprivate}|[/?])*`;
89 const isegment
·nz
·nc
=
90 rawString
`(?:${iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
91 const isegment
·nz
= rawString
`(?:${ipchar})+`;
92 const isegment
= rawString
`(?:${ipchar})*`;
93 const ipath
·empty
= rawString
``;
94 const ipath
·rootless
=
95 rawString
`(?:${isegment·nz})(?:/(?:${isegment}))*`;
96 const ipath
·noscheme
=
97 rawString
`(?:${isegment·nz·nc})(?:/(?:${isegment}))*`;
98 const ipath
·absolute
=
99 rawString
`/(?:(?:${isegment·nz})(?:/(?:${isegment}))*)?`;
100 const ipath
·abempty
= rawString
`(?:/(?:${isegment}))*`;
102 rawString
`${ipath·abempty}|${ipath·absolute}|${ipath·noscheme}|${ipath·rootless}|${ipath·empty}`;
104 rawString
`(?:${iunreserved}|${pct·encoded}|${sub·delims})*`;
105 const ihost
= rawString
`${IP·literal}|${IPv4address}|${ireg·name}`;
107 rawString
`(?:${iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
109 rawString
`(?:(?:${iuserinfo})@)?(?:${ihost})(?::(?:${port}))?`;
110 const irelative
·part
=
111 rawString
`//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·noscheme})|(?:${ipath·empty})`;
112 const irelative
·ref
=
113 rawString
`(?:${irelative·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
115 rawString
`//(?:${iauthority})(?:${ipath·abempty})|(?:${ipath·absolute})|(?:${ipath·rootless})|(?:${ipath·empty})`;
117 rawString
`(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?`;
119 rawString
`(?:${scheme}):(?:${ihier·part})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`;
120 const IRI
·reference
= rawString
`(?:${IRI})|(?:${irelative·ref})`;
122 const leiri
·iprivate
=
123 rawString
`[\u{E000}-\u{F8FF}\u{E0000}-\u{E0FFF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}]`;
124 const leiri
·ucschar
=
125 rawString
`[ <>"{}|\\^${"\x60"}\u{0}-\u{1F}\u{7F}-\u{D7FF}\u{E000}-\u{FFFD}\u{10000}-\u{10FFFF}]`;
126 const leiri
·iunreserved
=
127 rawString
`[A-Za-z0-9\-\._~]|${leiri·ucschar}`;
129 rawString
`${leiri·iunreserved}|${pct·encoded}|${sub·delims}|[:@]`;
130 const leiri
·ifragment
= rawString
`(?:${leiri·ipchar}|[/?])*`;
132 rawString
`(?:${leiri·ipchar}|${leiri·iprivate}|[/?])*`;
133 const leiri
·isegment
·nz
·nc
=
134 rawString
`(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|@)+`;
135 const leiri
·isegment
·nz
= rawString
`(?:${leiri·ipchar})+`;
136 const leiri
·isegment
= rawString
`(?:${leiri·ipchar})*`;
137 const leiri
·ipath
·empty
= rawString
``;
138 const leiri
·ipath
·rootless
=
139 rawString
`(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*`;
140 const leiri
·ipath
·noscheme
=
141 rawString
`(?:${leiri·isegment·nz·nc})(?:/(?:${leiri·isegment}))*`;
142 const leiri
·ipath
·absolute
=
143 rawString
`/(?:(?:${leiri·isegment·nz})(?:/(?:${leiri·isegment}))*)?`;
144 const leiri
·ipath
·abempty
= rawString
`(?:/(?:${leiri·isegment}))*`;
146 rawString
`${leiri·ipath·abempty}|${leiri·ipath·absolute}|${leiri·ipath·noscheme}|${leiri·ipath·rootless}|${leiri·ipath·empty}`;
147 const leiri
·ireg
·name
=
148 rawString
`(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims})*`;
150 rawString
`${IP·literal}|${IPv4address}|${leiri·ireg·name}`;
151 const leiri
·iuserinfo
=
152 rawString
`(?:${leiri·iunreserved}|${pct·encoded}|${sub·delims}|:)*`;
153 const leiri
·iauthority
=
154 rawString
`(?:(?:${leiri·iuserinfo})@)?(?:${leiri·ihost})(?::(?:${port}))?`;
155 const leiri
·irelative
·part
=
156 rawString
`//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})`;
157 const leiri
·irelative
·ref
=
158 rawString
`(?:${leiri·irelative·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
159 const leiri
·ihier
·part
=
160 rawString
`//(?:${leiri·iauthority})(?:${leiri·ipath·abempty})|(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})`;
161 const absolute
·LEIRI
=
162 rawString
`(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?`;
164 rawString
`(?:${scheme}):(?:${leiri·ihier·part})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`;
165 const LEIRI
·reference
=
166 rawString
`(?:${LEIRI})|(?:${leiri·irelative·ref})`;
170 * Recomposes an (L·E·)I·R·I reference from its component parts.
172 * See §5.3 of R·F·C 3986.
177 * Converts an L·E·I·R·I to the corresponding I·R·I by
178 * percent‐encoding unsupported characters.
180 * This function is somewhat complex because the I·R·I syntax allows
181 * private·use characters *only* in the query.
186 * Converts an (L·E·)I·R·I to the corresponding U·R·I by
187 * percent‐encoding unsupported characters.
189 * This does not punycode the authority.
194 * Removes all dot segments ("." or "..") from the provided
197 * See §5.2.4 of R·F·C 3986.
201 const TE
= TextEncoder
;
202 const { toString
: numberToString
} = Number
.prototype;
203 const { encode
: teEncode
} = TE
.prototype;
205 const { [ITERATOR
]: arrayIterator
} = Array
.prototype;
207 next
: arrayIteratorNext
,
208 } = Object
.getPrototypeOf([][ITERATOR
]());
210 next
: generatorIteratorNext
,
211 } = Object
.getPrototypeOf(function* () {}.prototype);
212 const { [ITERATOR
]: stringIterator
} = String
.prototype;
214 next
: stringIteratorNext
,
215 } = Object
.getPrototypeOf(""[ITERATOR
]());
217 const iriCharacterIterablePrototype
= {
222 call(stringIterator
, this.source
, []),
228 const iriGeneratorIterablePrototype
= {
231 next
: bind(generatorIteratorNext
, this.generator(), []),
235 const iriSegmentIterablePrototype
= {
240 call(arrayIterator
, this.segments
, []),
248 composeReference
: ($) =>
251 iriGeneratorIterablePrototype
,
254 value
: function* () {
255 const { scheme
, authority
, path
, query
, fragment
} = $;
256 if (scheme
!= null) {
257 // A scheme is present.
261 // No scheme is present.
264 if (authority
!= null) {
265 // An authority is present.
269 // No authority is present.
274 // A query is present.
278 // No query is present.
281 if (fragment
!= null) {
282 // A fragment is present.
286 // No fragment is present.
295 escapeForIRI
: ($) => {
296 const components
= parseReference($);
298 // The path will always be present (although perhaps empty) on a
299 // successful parse. If it isn’t (and parsing failed), treat the
300 // entire input as the path.
301 components
.path
??= `${$}`;
303 // Escape disallowed codepoints in each component and compose an
304 // I·R·I from the result.
305 const reference
= objectCreate(null);
306 for (const componentName
in components
) {
307 const componentValue
= components
[componentName
];
308 reference
[componentName
] = componentValue
== null
312 iriGeneratorIterablePrototype
,
315 value
: function* () {
316 const encoder
= new TE();
318 const character
of objectCreate(
319 iriCharacterIterablePrototype
,
320 { source
: { value
: componentValue
} },
325 `${leiri·ucschar}|${leiri·iprivate}`,
329 componentName == "query"
335 // This codepoint needs to be escaped.
336 const encoded
= call(teEncode
, encoder
, [
341 index
< encoded
.length
;
344 const byte = encoded
[index
];
345 yield `%${byte < 0x10 ? "0" : ""}${
347 call(numberToString, byte, [0x10]),
352 // This codepoint does not need escaping.
363 return composeReference(reference
);
368 iriGeneratorIterablePrototype
,
371 value
: function* () {
372 const encoder
= new TE();
374 const character
of objectCreate(
375 iriCharacterIterablePrototype
,
376 { source
: { value
: `${$}` } },
381 `${leiri·ucschar}|${leiri·iprivate}`,
384 // This codepoint needs to be escaped.
385 const encoded
= call(teEncode
, encoder
, [
390 index
< encoded
.length
;
393 const byte = encoded
[index
];
394 yield `%${byte < 0x10 ? "0" : ""}${
396 call(numberToString, byte, [0x10]),
401 // This codepoint does not need escaping.
411 removeDotSegments
: ($) => {
412 const input
= `${$}`;
414 const { length
} = input
;
416 while (index
< length
) {
417 if (stringStartsWith(input
, "../", index
)) {
418 // The input starts with a double leader; drop it. This can
419 // only occur at the beginning of the input.
421 } else if (stringStartsWith(input
, "./", index
)) {
422 // The input starts with a single leader; drop it. This can
423 // only occur at the beginning of the input.
425 } else if (stringStartsWith(input
, "/./", index
)) {
426 // The input starts with a slash, single leader, and another
427 // slash. Ignore it, and move the input to just before the
431 stringStartsWith(input
, "/.", index
) && index
+ 2 == length
433 // The input starts with a slash and single leader, and this
434 // exhausts the string. Push an empty segment and move the
435 // index to the end of the string.
438 } else if (stringStartsWith(input
, "/../", index
)) {
439 // The input starts with a slash, double leader, and another
440 // slash. Drop a segment from the output, and move the input
441 // to just before the second slash.
443 splice(output
, -1, 1);
445 stringStartsWith(input
, "/..", index
) && index
+ 3 == length
447 // The input starts with a slash and single leader, and this
448 // exhausts the string. Drop a segment from the output, push
449 // an empty segment, and move the index to the end of the
451 splice(output
, -1, 1, "/");
454 stringStartsWith(input
, ".", index
) && index
+ 1 == length
||
455 stringStartsWith(input
, "..", index
) && index
+ 2 == length
457 // The input starts with a single or double leader, and this
458 // exhausts the string. Do nothing (this can only occur at
459 // the beginning of input) and move the index to the end of
463 // The input does not start with a leader. Advance the index
464 // to the position before the next slash and push the segment
465 // between the old and new positions.
466 const nextIndex
= getFirstSubstringIndex(
471 if (nextIndex
== -1) {
472 // No slash remains; set index to the end of the string.
473 push(output
, substring(input
, index
));
476 // There are further path segments.
477 push(output
, substring(input
, index
, nextIndex
));
484 iriSegmentIterablePrototype
,
485 { segments
: { value
: output
} },
494 isAbsoluteIRI
, // I·R·I with no fragment
495 isAbsoluteLEIRI
, // L·E·I·R·I with no fragment
496 isAbsoluteURI
, // U·R·I with no fragment
500 isIRISuffix
, // only authority, path, query, fragment
504 isLEIRISuffix
, // only authority, path, query, fragment
508 isURISuffix
, // only authority, path, query, fragment
509 } = Object
.fromEntries(
511 isAbsoluteIRI
: absolute
·IRI
,
512 isAbsoluteLEIRI
: absolute
·LEIRI
,
513 isAbsoluteURI
: absolute
·URI
,
516 isIRIReference
: IRI
·reference
,
518 rawString
`(?:${iauthority})(?:${ipath·abempty})(?:\?(?:${iquery}))?(?:#(?:${ifragment}))?`,
520 isLEIRIPath
: leiri
·ipath
,
521 isLEIRIReference
: LEIRI
·reference
,
523 rawString
`(?:${leiri·iauthority})(?:${leiri·ipath·abempty})(?:\?(?:${leiri·iquery}))?(?:#(?:${leiri·ifragment}))?`,
526 isURIReference
: URI
·reference
,
528 rawString
`(?:${authority})(?:${path·abempty})(?:\?(?:${query}))?(?:#(?:${fragment}))?`,
530 ([key
, value
]) => [key
, new Matcher(rawString
`^(?:${value})$`)],
535 * Returns the result of merging the provided reference path with the
536 * provided base path.
538 * See §5.2.3 of R·F·C 3986.
540 export const mergePaths
= (base
, reference
) => {
541 const baseStr
= `${base}` || "/";
543 substring(baseStr, 0, getLastSubstringIndex(baseStr, "/") + 1)
549 * Returns the `scheme`, `authority`, `path`, `query`, and `fragment`
550 * of the provided (L·E·)I·R·I reference.
552 * `path` will always be defined for valid references, and will be
553 * undefined for values which are not valid L·E·I·R·Is.
558 const { prototype: rePrototype
} = RE
;
559 const { exec
: reExec
} = rePrototype
;
561 parseReference
: ($) => {
563 rawString
`^(?:(?<absolute·scheme>${scheme}):(?://(?<absolute·authority>${leiri·iauthority})(?<absolute·patha>${leiri·ipath·abempty})|(?<absolute·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·rootless})|(?:${leiri·ipath·empty})))(?:\?(?<absolute·query>${leiri·iquery}))?(?:#(?<absolute·fragment>${leiri·ifragment}))?|(?://(?<relative·authority>${leiri·iauthority})(?<relative·patha>${leiri·ipath·abempty})|(?<relative·pathb>(?:${leiri·ipath·absolute})|(?:${leiri·ipath·noscheme})|(?:${leiri·ipath·empty})))(?:\?(?<relative·query>${leiri·iquery}))?(?:#(?<relative·fragment>${leiri·ifragment}))?)$`,
578 } = call(reExec
, re
, [$])?.groups
?? {};
580 scheme
: absolute
·scheme
,
581 authority
: absolute
·authority
?? relative
·authority
,
582 path
: absolute
·patha
?? absolute
·pathb
?? relative
·patha
??
584 query
: absolute
·query
?? relative
·query
,
585 fragment
: absolute
·fragment
?? relative
·fragment
,
592 * Resolves the provided reference relative to the provided base
595 * See §5.2 of R·F·C 3986.
597 export const resolveReference
= (R
, Base
= location
?? "") => {
600 authority
: Base
·authority
,
603 } = parseReference(Base
);
604 if (Base
·scheme
== null) {
605 // Base I·R·I’s must be valid I·R·I’s, meaning they must have a
608 `Piscēs: Base did not have a scheme: ${Base}.`,
611 // The provided Base I·R·I is valid.
614 authority
: R
·authority
,
617 fragment
: R
·fragment
,
618 } = parseReference(R
);
619 return composeReference(
623 authority
: R
·authority
,
624 path
: removeDotSegments(R
·path
),
626 fragment
: R
·fragment
,
628 : R
·authority
!= null
631 authority
: R
·authority
,
632 path
: removeDotSegments(R
·path
),
634 fragment
: R
·fragment
,
639 authority
: Base
·authority
,
641 query
: R
·query
?? Base
·query
,
642 fragment
: R
·fragment
,
646 authority
: Base
·authority
,
647 path
: R
·path
[0] == "/"
648 ? removeDotSegments(R
·path
)
649 : removeDotSegments(mergePaths(Base
·path
, R
·path
)),
651 fragment
: R
·fragment
,