]> Lady’s Gitweb - Pisces/blob - string.js
Add ::flags override to Matcher
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getOwnPropertyDescriptors,
14 getPrototype,
15 objectCreate,
16 setPrototype,
17 } from "./object.js";
18 import { type } from "./value.js";
19
20 export const {
21 /**
22 * A RegExp·like object which only matches entire strings, and may
23 * have additional constraints specified.
24 *
25 * Matchers are callable objects and will return true if they are
26 * called with a string that they match, and false otherwise.
27 * Matchers will always return false if called with nonstrings,
28 * although other methods like `exec` coerce their arguments and may
29 * still return true.
30 */
31 Matcher,
32 } = (() => {
33 const RE = RegExp;
34 const { prototype: rePrototype } = RE;
35 const { exec: reExec, toString: reToString } = rePrototype;
36 const getDotAll =
37 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
38 const getFlags =
39 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
40 const getGlobal =
41 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
42 const getHasIndices =
43 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
44 const getIgnoreCase =
45 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
46 const getMultiline =
47 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
48 const getSource =
49 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
50 const getSticky =
51 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
52 const getUnicode =
53 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
54
55 const Matcher = class extends identity {
56 #constraint;
57 #regExp;
58
59 /**
60 * Constructs a new Matcher from the provided source.
61 *
62 * If the provided source is a regular expression, then it must
63 * have the unicode flag set. Otherwise, it is interpreted as the
64 * string source of a regular expression with the unicode flag set.
65 *
66 * Other flags are taken from the provided regular expression
67 * object, if any are present.
68 *
69 * A name for the matcher may be provided as the second argument.
70 *
71 * A callable constraint on acceptable inputs may be provided as a
72 * third argument. If provided, it will be called with three
73 * arguments whenever a match appears successful: first, the string
74 * being matched, second, the match result, and third, the Matcher
75 * object itself. If the return value of this call is falsey, then
76 * the match will be considered a failure.
77 *
78 * ☡ If the provided source regular expression uses nongreedy
79 * quantifiers, it may not match the whole string even if a match
80 * with the whole string is possible. Surround the regular
81 * expression with `^(?:` and `)$` if you don’t want nongreedy
82 * regular expressions to fail when shorter matches are possible.
83 */
84 constructor(source, name = undefined, constraint = null) {
85 super(
86 ($) => {
87 if (typeof $ !== "string") {
88 // The provided value is not a string.
89 return false;
90 } else {
91 // The provided value is a string. Set the `lastIndex` of
92 // the regular expression to 0 and see if the first attempt
93 // at a match matches the whole string and passes the
94 // provided constraint (if present).
95 regExp.lastIndex = 0;
96 const result = call(reExec, regExp, [$]);
97 return result?.[0] === $ &&
98 (constraint === null || constraint($, result, this));
99 }
100 },
101 );
102 const regExp = this.#regExp = (() => {
103 try {
104 call(reExec, source, [""]); // throws if source not a RegExp
105 } catch {
106 return new RE(`${source}`, "u");
107 }
108 const unicode = call(getUnicode, source, []);
109 if (!unicode) {
110 // The provided regular expression does not have a unicode
111 // flag.
112 throw new TypeError(
113 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
114 );
115 } else {
116 // The provided regular expression has a unicode flag.
117 return new RE(source);
118 }
119 })();
120 if (constraint !== null && typeof constraint !== "function") {
121 throw new TypeError(
122 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
123 );
124 } else {
125 this.#constraint = constraint;
126 return defineOwnProperties(
127 setPrototype(this, matcherPrototype),
128 {
129 lastIndex: {
130 configurable: false,
131 enumerable: false,
132 value: 0,
133 writable: false,
134 },
135 name: {
136 value: name != null
137 ? `${name}`
138 : `Matcher(${call(reToString, regExp, [])})`,
139 },
140 },
141 );
142 }
143 }
144
145 /** Gets whether the dotAll flag is present on this Matcher. */
146 get dotAll() {
147 return call(getDotAll, this.#regExp, []);
148 }
149
150 /**
151 * Executes this Matcher on the provided value and returns the
152 * result if there is a match, or null otherwise.
153 *
154 * Matchers only match if they can match the entire value on the
155 * first attempt.
156 *
157 * ☡ The match result returned by this method will be the same as
158 * that passed to the constraint function—and may have been
159 * modified by said function prior to being returned.
160 */
161 exec($) {
162 const regExp = this.#regExp;
163 const constraint = this.#constraint;
164 const string = `${$}`;
165 regExp.lastIndex = 0;
166 const result = call(reExec, regExp, [string]);
167 if (
168 result?.[0] === string &&
169 (constraint === null || constraint(string, result, this))
170 ) {
171 // The entire string was matched and the constraint, if
172 // present, returned a truthy value.
173 return result;
174 } else {
175 // The entire string was not matched or the constraint returned
176 // a falsey value.
177 return null;
178 }
179 }
180
181 /**
182 * Gets the flags present on this Matcher.
183 *
184 * ※ This needs to be defined because the internal RegExp object
185 * may have flags which are not yet recognized by ♓🌟 Piscēs.
186 */
187 get flags() {
188 return call(getFlags, this.#regExp, []);
189 }
190
191 /** Gets whether the global flag is present on this Matcher. */
192 get global() {
193 return call(getGlobal, this.#regExp, []);
194 }
195
196 /** Gets whether the hasIndices flag is present on this Matcher. */
197 get hasIndices() {
198 return call(getHasIndices, this.#regExp, []);
199 }
200
201 /** Gets whether the ignoreCase flag is present on this Matcher. */
202 get ignoreCase() {
203 return call(getIgnoreCase, this.#regExp, []);
204 }
205
206 /** Gets whether the multiline flag is present on this Matcher. */
207 get multiline() {
208 return call(getMultiline, this.#regExp, []);
209 }
210
211 /** Gets the regular expression source for this Matcher. */
212 get source() {
213 return call(getSource, this.#regExp, []);
214 }
215
216 /** Gets whether the sticky flag is present on this Matcher. */
217 get sticky() {
218 return call(getSticky, this.#regExp, []);
219 }
220
221 /**
222 * Gets whether the unicode flag is present on this Matcher.
223 *
224 * ※ This will always be true.
225 */
226 get unicode() {
227 return call(getUnicode, this.#regExp, []);
228 }
229 };
230
231 const matcherConstructor = defineOwnProperties(
232 class extends RegExp {
233 constructor(...args) {
234 return new Matcher(...args);
235 }
236 },
237 {
238 name: { value: "Matcher" },
239 length: { value: 1 },
240 },
241 );
242 const matcherPrototype = defineOwnProperties(
243 matcherConstructor.prototype,
244 getOwnPropertyDescriptors(Matcher.prototype),
245 { constructor: { value: matcherConstructor } },
246 );
247
248 return { Matcher: matcherConstructor };
249 })();
250
251 export const {
252 /**
253 * Returns the result of converting the provided value to A·S·C·I·I
254 * lowercase.
255 */
256 asciiLowercase,
257
258 /**
259 * Returns the result of converting the provided value to A·S·C·I·I
260 * uppercase.
261 */
262 asciiUppercase,
263 } = (() => {
264 const {
265 toLowerCase: stringToLowercase,
266 toUpperCase: stringToUppercase,
267 } = String.prototype;
268 return {
269 asciiLowercase: ($) =>
270 stringReplaceAll(
271 `${$}`,
272 /[A-Z]/gu,
273 makeCallable(stringToLowercase),
274 ),
275 asciiUppercase: ($) =>
276 stringReplaceAll(
277 `${$}`,
278 /[a-z]/gu,
279 makeCallable(stringToUppercase),
280 ),
281 };
282 })();
283
284 export const {
285 /**
286 * Returns an iterator over the code units in the string
287 * representation of the provided value.
288 */
289 codeUnits,
290
291 /**
292 * Returns an iterator over the codepoints in the string
293 * representation of the provided value.
294 */
295 codepoints,
296
297 /**
298 * Returns an iterator over the scalar values in the string
299 * representation of the provided value.
300 *
301 * Codepoints which are not valid Unicode scalar values are replaced
302 * with U+FFFF.
303 */
304 scalarValues,
305
306 /**
307 * Returns the result of converting the provided value to a string of
308 * scalar values by replacing (unpaired) surrogate values with
309 * U+FFFD.
310 */
311 scalarValueString,
312 } = (() => {
313 const {
314 iterator: iteratorSymbol,
315 toStringTag: toStringTagSymbol,
316 } = Symbol;
317 const { [iteratorSymbol]: arrayIterator } = Array.prototype;
318 const arrayIteratorPrototype = Object.getPrototypeOf(
319 [][iteratorSymbol](),
320 );
321 const { next: arrayIteratorNext } = arrayIteratorPrototype;
322 const iteratorPrototype = Object.getPrototypeOf(
323 arrayIteratorPrototype,
324 );
325 const { [iteratorSymbol]: stringIterator } = String.prototype;
326 const stringIteratorPrototype = Object.getPrototypeOf(
327 ""[iteratorSymbol](),
328 );
329 const { next: stringIteratorNext } = stringIteratorPrototype;
330
331 /**
332 * An iterator object for iterating over code values (either code
333 * units or codepoints) in a string.
334 *
335 * ※ This class is not exposed, although its methods are (through
336 * the prototypes of string code value iterator objects).
337 */
338 const StringCodeValueIterator = class extends identity {
339 #allowSurrogates;
340 #baseIterator;
341
342 /**
343 * Constructs a new string code value iterator from the provided
344 * base iterator.
345 *
346 * If the provided base iterator is an array iterator, this is a
347 * code unit iterator. If the provided iterator is a string
348 * iterator and surrogates are allowed, this is a codepoint
349 * iterator. If the provided iterator is a string iterator and
350 * surrogates are not allowed, this is a scalar value iterator.
351 */
352 constructor(baseIterator, allowSurrogates = true) {
353 super(objectCreate(stringCodeValueIteratorPrototype));
354 this.#allowSurrogates = !!allowSurrogates;
355 this.#baseIterator = baseIterator;
356 }
357
358 /** Provides the next code value in the iterator. */
359 next() {
360 const baseIterator = this.#baseIterator;
361 switch (getPrototype(baseIterator)) {
362 case arrayIteratorPrototype: {
363 // The base iterator is iterating over U·C·S characters.
364 const {
365 value: ucsCharacter,
366 done,
367 } = call(arrayIteratorNext, baseIterator, []);
368 return done
369 ? { value: undefined, done: true }
370 : { value: getCodeUnit(ucsCharacter, 0), done: false };
371 }
372 case stringIteratorPrototype: {
373 // The base iterator is iterating over Unicode characters.
374 const {
375 value: character,
376 done,
377 } = call(stringIteratorNext, baseIterator, []);
378 if (done) {
379 // The base iterator has been exhausted.
380 return { value: undefined, done: true };
381 } else {
382 // The base iterator provided a character; yield the
383 // codepoint.
384 const codepoint = getCodepoint(character, 0);
385 return {
386 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
387 codepoint >= 0xE000
388 ? codepoint
389 : 0xFFFD,
390 done: false,
391 };
392 }
393 }
394 default: {
395 // Should not be possible!
396 throw new TypeError(
397 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
398 );
399 }
400 }
401 }
402 };
403
404 const {
405 next: stringCodeValueIteratorNext,
406 } = StringCodeValueIterator.prototype;
407 const stringCodeValueIteratorPrototype = objectCreate(
408 iteratorPrototype,
409 {
410 next: {
411 configurable: true,
412 enumerable: false,
413 value: stringCodeValueIteratorNext,
414 writable: true,
415 },
416 [toStringTagSymbol]: {
417 configurable: true,
418 enumerable: false,
419 value: "String Code Value Iterator",
420 writable: false,
421 },
422 },
423 );
424 const scalarValueIterablePrototype = {
425 [iteratorSymbol]() {
426 return {
427 next: bind(
428 stringCodeValueIteratorNext,
429 new StringCodeValueIterator(
430 call(stringIterator, this.source, []),
431 false,
432 ),
433 [],
434 ),
435 };
436 },
437 };
438
439 return {
440 codeUnits: ($) =>
441 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
442 codepoints: ($) =>
443 new StringCodeValueIterator(
444 call(stringIterator, `${$}`, []),
445 true,
446 ),
447 scalarValues: ($) =>
448 new StringCodeValueIterator(
449 call(stringIterator, `${$}`, []),
450 false,
451 ),
452 scalarValueString: ($) =>
453 stringFromCodepoints(...objectCreate(
454 scalarValueIterablePrototype,
455 { source: { value: `${$}` } },
456 )),
457 };
458 })();
459
460 /**
461 * Returns an iterator over the codepoints in the string representation
462 * of the provided value according to the algorithm of
463 * String::[Symbol.iterator].
464 */
465 export const characters = makeCallable(
466 String.prototype[Symbol.iterator],
467 );
468
469 /**
470 * Returns the character at the provided position in the string
471 * representation of the provided value according to the algorithm of
472 * String::codePointAt.
473 */
474 export const getCharacter = ($, pos) => {
475 const codepoint = getCodepoint($, pos);
476 return codepoint == null
477 ? undefined
478 : stringFromCodepoints(codepoint);
479 };
480
481 /**
482 * Returns the code unit at the provided position in the string
483 * representation of the provided value according to the algorithm of
484 * String::charAt.
485 */
486 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
487
488 /**
489 * Returns the codepoint at the provided position in the string
490 * representation of the provided value according to the algorithm of
491 * String::codePointAt.
492 */
493 export const getCodepoint = makeCallable(String.prototype.codePointAt);
494
495 /**
496 * Returns the index of the first occurrence of the search string in
497 * the string representation of the provided value according to the
498 * algorithm of String::indexOf.
499 */
500 export const getFirstSubstringIndex = makeCallable(
501 String.prototype.indexOf,
502 );
503
504 /**
505 * Returns the index of the last occurrence of the search string in the
506 * string representation of the provided value according to the
507 * algorithm of String::lastIndexOf.
508 */
509 export const getLastSubstringIndex = makeCallable(
510 String.prototype.lastIndexOf,
511 );
512
513 /**
514 * Returns the result of joining the provided iterable.
515 *
516 * If no separator is provided, it defaults to ",".
517 *
518 * If a value is nullish, it will be stringified as the empty string.
519 */
520 export const join = (() => {
521 const { join: arrayJoin } = Array.prototype;
522 const join = ($, separator = ",") =>
523 call(arrayJoin, [...$], [`${separator}`]);
524 return join;
525 })();
526
527 export const {
528 /**
529 * Returns a string created from the raw value of the tagged template
530 * literal.
531 *
532 * ※ This is an alias for String.raw.
533 */
534 raw: rawString,
535
536 /**
537 * Returns a string created from the provided code units.
538 *
539 * ※ This is an alias for String.fromCharCode.
540 */
541 fromCharCode: stringFromCodeUnits,
542
543 /**
544 * Returns a string created from the provided codepoints.
545 *
546 * ※ This is an alias for String.fromCodePoint.
547 */
548 fromCodePoint: stringFromCodepoints,
549 } = String;
550
551 /**
552 * Returns the result of splitting the provided value on A·S·C·I·I
553 * whitespace.
554 */
555 export const splitOnASCIIWhitespace = ($) =>
556 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
557
558 /**
559 * Returns the result of splitting the provided value on commas,
560 * trimming A·S·C·I·I whitespace from the resulting tokens.
561 */
562 export const splitOnCommas = ($) =>
563 stringSplit(
564 stripLeadingAndTrailingASCIIWhitespace(
565 stringReplaceAll(
566 `${$}`,
567 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
568 ",",
569 ),
570 ),
571 ",",
572 );
573
574 /**
575 * Returns the result of catenating the string representations of the
576 * provided values, returning a new string according to the algorithm
577 * of String::concat.
578 */
579 export const stringCatenate = makeCallable(String.prototype.concat);
580
581 /**
582 * Returns whether the string representation of the provided value ends
583 * with the provided search string according to the algorithm of
584 * String::endsWith.
585 */
586 export const stringEndsWith = makeCallable(String.prototype.endsWith);
587
588 /**
589 * Returns whether the string representation of the provided value
590 * contains the provided search string according to the algorithm of
591 * String::includes.
592 */
593 export const stringIncludes = makeCallable(String.prototype.includes);
594
595 /**
596 * Returns the result of matching the string representation of the
597 * provided value with the provided matcher according to the algorithm
598 * of String::match.
599 */
600 export const stringMatch = makeCallable(String.prototype.match);
601
602 /**
603 * Returns the result of matching the string representation of the
604 * provided value with the provided matcher according to the algorithm
605 * of String::matchAll.
606 */
607 export const stringMatchAll = makeCallable(String.prototype.matchAll);
608
609 /**
610 * Returns the normalized form of the string representation of the
611 * provided value according to the algorithm of String::matchAll.
612 */
613 export const stringNormalize = makeCallable(
614 String.prototype.normalize,
615 );
616
617 /**
618 * Returns the result of padding the end of the string representation
619 * of the provided value padded until it is the desired length
620 * according to the algorithm of String::padEnd.
621 */
622 export const stringPadEnd = makeCallable(String.prototype.padEnd);
623
624 /**
625 * Returns the result of padding the start of the string representation
626 * of the provided value padded until it is the desired length
627 * according to the algorithm of String::padStart.
628 */
629 export const stringPadStart = makeCallable(String.prototype.padStart);
630
631 /**
632 * Returns the result of repeating the string representation of the
633 * provided value the provided number of times according to the
634 * algorithm of String::repeat.
635 */
636 export const stringRepeat = makeCallable(String.prototype.repeat);
637
638 /**
639 * Returns the result of replacing the string representation of the
640 * provided value with the provided replacement, using the provided
641 * matcher and according to the algorithm of String::replace.
642 */
643 export const stringReplace = makeCallable(String.prototype.replace);
644
645 /**
646 * Returns the result of replacing the string representation of the
647 * provided value with the provided replacement, using the provided
648 * matcher and according to the algorithm of String::replaceAll.
649 */
650 export const stringReplaceAll = makeCallable(
651 String.prototype.replaceAll,
652 );
653
654 /**
655 * Returns the result of searching the string representation of the
656 * provided value using the provided matcher and according to the
657 * algorithm of String::search.
658 */
659 export const stringSearch = makeCallable(String.prototype.search);
660
661 /**
662 * Returns a slice of the string representation of the provided value
663 * according to the algorithm of String::slice.
664 */
665 export const stringSlice = makeCallable(String.prototype.slice);
666
667 /**
668 * Returns the result of splitting of the string representation of the
669 * provided value on the provided separator according to the algorithm
670 * of String::split.
671 */
672 export const stringSplit = makeCallable(String.prototype.split);
673
674 /**
675 * Returns whether the string representation of the provided value
676 * starts with the provided search string according to the algorithm of
677 * String::startsWith.
678 */
679 export const stringStartsWith = makeCallable(
680 String.prototype.startsWith,
681 );
682
683 /**
684 * Returns the `[[StringData]]` of the provided value.
685 *
686 * ☡ This function will throw if the provided object does not have a
687 * `[[StringData]]` internal slot.
688 */
689 export const stringValue = makeCallable(String.prototype.valueOf);
690
691 /**
692 * Returns the result of stripping leading and trailing A·S·C·I·I
693 * whitespace from the provided value and collapsing other A·S·C·I·I
694 * whitespace in the string representation of the provided value.
695 */
696 export const stripAndCollapseASCIIWhitespace = ($) =>
697 stripLeadingAndTrailingASCIIWhitespace(
698 stringReplaceAll(
699 `${$}`,
700 /[\n\r\t\f ]+/gu,
701 " ",
702 ),
703 );
704
705 /**
706 * Returns the result of stripping leading and trailing A·S·C·I·I
707 * whitespace from the string representation of the provided value.
708 */
709 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
710 const { exec: reExec } = RegExp.prototype;
711 return ($) =>
712 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
713 })();
714
715 /**
716 * Returns a substring of the string representation of the provided
717 * value according to the algorithm of String::substring.
718 */
719 export const substring = makeCallable(String.prototype.substring);
720
721 /**
722 * Returns the result of converting the provided value to a string.
723 *
724 * ☡ This method throws for symbols and other objects without a string
725 * representation.
726 */
727 export const toString = ($) => `${$}`;
This page took 0.117195 seconds and 5 git commands to generate.