]> Lady’s Gitweb - Pisces/blob - string.js
Add support for constraints to Matcher
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getPrototype,
14 objectCreate,
15 setPrototype,
16 } from "./object.js";
17
18 export const {
19 /**
20 * A RegExp·like object which only matches entire strings, and may
21 * have additional constraints specified.
22 *
23 * Matchers are callable objects and will return true if they are
24 * called with a string that they match, and false otherwise.
25 * Matchers will always return false if called with nonstrings,
26 * although other methods like `exec` coerce their arguments and may
27 * still return true.
28 */
29 Matcher,
30 } = (() => {
31 const RE = RegExp;
32 const { prototype: rePrototype } = RE;
33 const { exec: reExec, toString: reToString } = rePrototype;
34 const getDotAll =
35 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
36 const getGlobal =
37 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
38 const getHasIndices =
39 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
40 const getIgnoreCase =
41 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
42 const getMultiline =
43 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
44 const getSource =
45 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
46 const getSticky =
47 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
48 const getUnicode =
49 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
50
51 const Matcher = class extends identity {
52 #constraint;
53 #regExp;
54
55 /**
56 * Constructs a new Matcher from the provided source.
57 *
58 * If the provided source is a regular expression, then it must
59 * have the unicode flag set. Otherwise, it is interpreted as the
60 * string source of a regular expression with the unicode flag set.
61 *
62 * Other flags are taken from the provided regular expression
63 * object, if any are present.
64 *
65 * A name for the matcher may be provided as the second argument.
66 *
67 * A callable constraint on acceptable inputs may be provided as a
68 * third argument. If provided, it will be called with two
69 * arguments whenever a match appears successful: first, the string
70 * being matched, and second, the Matcher object itself. If the
71 * return value of this call is falsey, then the match will be
72 * considered a failure.
73 *
74 * ☡ If the provided source regular expression uses nongreedy
75 * quantifiers, it may not match the whole string even if a match
76 * with the whole string is possible. Surround the regular
77 * expression with `^(?:` and `)$` if you don’t want nongreedy
78 * regular expressions to fail when shorter matches are possible.
79 */
80 constructor(source, name = undefined, constraint = null) {
81 super(
82 ($) => {
83 if (typeof $ !== "string") {
84 // The provided value is not a string.
85 return false;
86 } else {
87 // The provided value is a string. Set the `lastIndex` of
88 // the regular expression to 0 and see if the first attempt
89 // at a match matches the whole string and passes the
90 // provided constraint (if present).
91 regExp.lastIndex = 0;
92 return call(reExec, regExp, [$])?.[0] === $ &&
93 (constraint === null || constraint($, this));
94 }
95 },
96 );
97 const regExp = this.#regExp = (() => {
98 try {
99 call(reExec, source, [""]); // throws if source not a RegExp
100 } catch {
101 return new RE(`${source}`, "u");
102 }
103 const unicode = call(getUnicode, source, []);
104 if (!unicode) {
105 // The provided regular expression does not have a unicode
106 // flag.
107 throw new TypeError(
108 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
109 );
110 } else {
111 // The provided regular expression has a unicode flag.
112 return new RE(source);
113 }
114 })();
115 if (constraint !== null && typeof constraint !== "function") {
116 throw new TypeError(
117 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
118 );
119 } else {
120 this.#constraint = constraint;
121 return defineOwnProperties(
122 setPrototype(this, matcherPrototype),
123 {
124 lastIndex: {
125 configurable: false,
126 enumerable: false,
127 value: 0,
128 writable: false,
129 },
130 name: {
131 value: name != null
132 ? `${name}`
133 : `Matcher(${call(reToString, regExp, [])})`,
134 },
135 },
136 );
137 }
138 }
139
140 /** Gets whether the dotAll flag is present on this Matcher. */
141 get dotAll() {
142 return call(getDotAll, this.#regExp, []);
143 }
144
145 /**
146 * Executes this Matcher on the provided value and returns the
147 * result if there is a match, or null otherwise.
148 *
149 * Matchers only match if they can match the entire value on the
150 * first attempt.
151 */
152 exec($) {
153 const regExp = this.#regExp;
154 const constraint = this.#constraint;
155 const string = `${$}`;
156 regExp.lastIndex = 0;
157 const result = call(reExec, regExp, [string]);
158 if (
159 result?.[0] === string &&
160 (constraint === null || constraint(string, this))
161 ) {
162 // The entire string was matched and the constraint, if
163 // present, returned a truthy value.
164 return result;
165 } else {
166 // The entire string was not matched or the constraint returned
167 // a falsey value.
168 return null;
169 }
170 }
171
172 /** Gets whether the global flag is present on this Matcher. */
173 get global() {
174 return call(getGlobal, this.#regExp, []);
175 }
176
177 /** Gets whether the hasIndices flag is present on this Matcher. */
178 get hasIndices() {
179 return call(getHasIndices, this.#regExp, []);
180 }
181
182 /** Gets whether the ignoreCase flag is present on this Matcher. */
183 get ignoreCase() {
184 return call(getIgnoreCase, this.#regExp, []);
185 }
186
187 /** Gets whether the multiline flag is present on this Matcher. */
188 get multiline() {
189 return call(getMultiline, this.#regExp, []);
190 }
191
192 /** Gets the regular expression source for this Matcher. */
193 get source() {
194 return call(getSource, this.#regExp, []);
195 }
196
197 /** Gets whether the sticky flag is present on this Matcher. */
198 get sticky() {
199 return call(getSticky, this.#regExp, []);
200 }
201
202 /**
203 * Gets whether the unicode flag is present on this Matcher.
204 *
205 * ※ This will always be true.
206 */
207 get unicode() {
208 return call(getUnicode, this.#regExp, []);
209 }
210 };
211 const matcherPrototype = setPrototype(
212 Matcher.prototype,
213 rePrototype,
214 );
215
216 return { Matcher };
217 })();
218
219 export const {
220 /**
221 * Returns the result of converting the provided value to A·S·C·I·I
222 * lowercase.
223 */
224 asciiLowercase,
225
226 /**
227 * Returns the result of converting the provided value to A·S·C·I·I
228 * uppercase.
229 */
230 asciiUppercase,
231 } = (() => {
232 const {
233 toLowerCase: stringToLowercase,
234 toUpperCase: stringToUppercase,
235 } = String.prototype;
236 return {
237 asciiLowercase: ($) =>
238 stringReplaceAll(
239 `${$}`,
240 /[A-Z]/gu,
241 makeCallable(stringToLowercase),
242 ),
243 asciiUppercase: ($) =>
244 stringReplaceAll(
245 `${$}`,
246 /[a-z]/gu,
247 makeCallable(stringToUppercase),
248 ),
249 };
250 })();
251
252 export const {
253 /**
254 * Returns an iterator over the code units in the string
255 * representation of the provided value.
256 */
257 codeUnits,
258
259 /**
260 * Returns an iterator over the codepoints in the string
261 * representation of the provided value.
262 */
263 codepoints,
264
265 /**
266 * Returns an iterator over the scalar values in the string
267 * representation of the provided value.
268 *
269 * Codepoints which are not valid Unicode scalar values are replaced
270 * with U+FFFF.
271 */
272 scalarValues,
273
274 /**
275 * Returns the result of converting the provided value to a string of
276 * scalar values by replacing (unpaired) surrogate values with
277 * U+FFFD.
278 */
279 scalarValueString,
280 } = (() => {
281 const {
282 iterator: iteratorSymbol,
283 toStringTag: toStringTagSymbol,
284 } = Symbol;
285 const { [iteratorSymbol]: arrayIterator } = Array.prototype;
286 const arrayIteratorPrototype = Object.getPrototypeOf(
287 [][iteratorSymbol](),
288 );
289 const { next: arrayIteratorNext } = arrayIteratorPrototype;
290 const iteratorPrototype = Object.getPrototypeOf(
291 arrayIteratorPrototype,
292 );
293 const { [iteratorSymbol]: stringIterator } = String.prototype;
294 const stringIteratorPrototype = Object.getPrototypeOf(
295 ""[iteratorSymbol](),
296 );
297 const { next: stringIteratorNext } = stringIteratorPrototype;
298
299 /**
300 * An iterator object for iterating over code values (either code
301 * units or codepoints) in a string.
302 *
303 * ※ This class is not exposed, although its methods are (through
304 * the prototypes of string code value iterator objects).
305 */
306 const StringCodeValueIterator = class extends identity {
307 #allowSurrogates;
308 #baseIterator;
309
310 /**
311 * Constructs a new string code value iterator from the provided
312 * base iterator.
313 *
314 * If the provided base iterator is an array iterator, this is a
315 * code unit iterator. If the provided iterator is a string
316 * iterator and surrogates are allowed, this is a codepoint
317 * iterator. If the provided iterator is a string iterator and
318 * surrogates are not allowed, this is a scalar value iterator.
319 */
320 constructor(baseIterator, allowSurrogates = true) {
321 super(objectCreate(stringCodeValueIteratorPrototype));
322 this.#allowSurrogates = !!allowSurrogates;
323 this.#baseIterator = baseIterator;
324 }
325
326 /** Provides the next code value in the iterator. */
327 next() {
328 const baseIterator = this.#baseIterator;
329 switch (getPrototype(baseIterator)) {
330 case arrayIteratorPrototype: {
331 // The base iterator is iterating over U·C·S characters.
332 const {
333 value: ucsCharacter,
334 done,
335 } = call(arrayIteratorNext, baseIterator, []);
336 return done
337 ? { value: undefined, done: true }
338 : { value: getCodeUnit(ucsCharacter, 0), done: false };
339 }
340 case stringIteratorPrototype: {
341 // The base iterator is iterating over Unicode characters.
342 const {
343 value: character,
344 done,
345 } = call(stringIteratorNext, baseIterator, []);
346 if (done) {
347 // The base iterator has been exhausted.
348 return { value: undefined, done: true };
349 } else {
350 // The base iterator provided a character; yield the
351 // codepoint.
352 const codepoint = getCodepoint(character, 0);
353 return {
354 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
355 codepoint >= 0xE000
356 ? codepoint
357 : 0xFFFD,
358 done: false,
359 };
360 }
361 }
362 default: {
363 // Should not be possible!
364 throw new TypeError(
365 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
366 );
367 }
368 }
369 }
370 };
371
372 const {
373 next: stringCodeValueIteratorNext,
374 } = StringCodeValueIterator.prototype;
375 const stringCodeValueIteratorPrototype = objectCreate(
376 iteratorPrototype,
377 {
378 next: {
379 configurable: true,
380 enumerable: false,
381 value: stringCodeValueIteratorNext,
382 writable: true,
383 },
384 [toStringTagSymbol]: {
385 configurable: true,
386 enumerable: false,
387 value: "String Code Value Iterator",
388 writable: false,
389 },
390 },
391 );
392 const scalarValueIterablePrototype = {
393 [iteratorSymbol]() {
394 return {
395 next: bind(
396 stringCodeValueIteratorNext,
397 new StringCodeValueIterator(
398 call(stringIterator, this.source, []),
399 false,
400 ),
401 [],
402 ),
403 };
404 },
405 };
406
407 return {
408 codeUnits: ($) =>
409 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
410 codepoints: ($) =>
411 new StringCodeValueIterator(
412 call(stringIterator, `${$}`, []),
413 true,
414 ),
415 scalarValues: ($) =>
416 new StringCodeValueIterator(
417 call(stringIterator, `${$}`, []),
418 false,
419 ),
420 scalarValueString: ($) =>
421 stringFromCodepoints(...objectCreate(
422 scalarValueIterablePrototype,
423 { source: { value: `${$}` } },
424 )),
425 };
426 })();
427
428 /**
429 * Returns an iterator over the codepoints in the string representation
430 * of the provided value according to the algorithm of
431 * String::[Symbol.iterator].
432 */
433 export const characters = makeCallable(
434 String.prototype[Symbol.iterator],
435 );
436
437 /**
438 * Returns the character at the provided position in the string
439 * representation of the provided value according to the algorithm of
440 * String::codePointAt.
441 */
442 export const getCharacter = ($, pos) => {
443 const codepoint = getCodepoint($, pos);
444 return codepoint == null
445 ? undefined
446 : stringFromCodepoints(codepoint);
447 };
448
449 /**
450 * Returns the code unit at the provided position in the string
451 * representation of the provided value according to the algorithm of
452 * String::charAt.
453 */
454 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
455
456 /**
457 * Returns the codepoint at the provided position in the string
458 * representation of the provided value according to the algorithm of
459 * String::codePointAt.
460 */
461 export const getCodepoint = makeCallable(String.prototype.codePointAt);
462
463 /**
464 * Returns the index of the first occurrence of the search string in
465 * the string representation of the provided value according to the
466 * algorithm of String::indexOf.
467 */
468 export const getFirstSubstringIndex = makeCallable(
469 String.prototype.indexOf,
470 );
471
472 /**
473 * Returns the index of the last occurrence of the search string in the
474 * string representation of the provided value according to the
475 * algorithm of String::lastIndexOf.
476 */
477 export const getLastSubstringIndex = makeCallable(
478 String.prototype.lastIndexOf,
479 );
480
481 /**
482 * Returns the result of joining the provided iterable.
483 *
484 * If no separator is provided, it defaults to ",".
485 *
486 * If a value is nullish, it will be stringified as the empty string.
487 */
488 export const join = (() => {
489 const { join: arrayJoin } = Array.prototype;
490 const join = ($, separator = ",") =>
491 call(arrayJoin, [...$], [`${separator}`]);
492 return join;
493 })();
494
495 export const {
496 /**
497 * Returns a string created from the raw value of the tagged template
498 * literal.
499 *
500 * ※ This is an alias for String.raw.
501 */
502 raw: rawString,
503
504 /**
505 * Returns a string created from the provided code units.
506 *
507 * ※ This is an alias for String.fromCharCode.
508 */
509 fromCharCode: stringFromCodeUnits,
510
511 /**
512 * Returns a string created from the provided codepoints.
513 *
514 * ※ This is an alias for String.fromCodePoint.
515 */
516 fromCodePoint: stringFromCodepoints,
517 } = String;
518
519 /**
520 * Returns the result of splitting the provided value on A·S·C·I·I
521 * whitespace.
522 */
523 export const splitOnASCIIWhitespace = ($) =>
524 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
525
526 /**
527 * Returns the result of splitting the provided value on commas,
528 * trimming A·S·C·I·I whitespace from the resulting tokens.
529 */
530 export const splitOnCommas = ($) =>
531 stringSplit(
532 stripLeadingAndTrailingASCIIWhitespace(
533 stringReplaceAll(
534 `${$}`,
535 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
536 ",",
537 ),
538 ),
539 ",",
540 );
541
542 /**
543 * Returns the result of catenating the string representations of the
544 * provided values, returning a new string according to the algorithm
545 * of String::concat.
546 */
547 export const stringCatenate = makeCallable(String.prototype.concat);
548
549 /**
550 * Returns whether the string representation of the provided value ends
551 * with the provided search string according to the algorithm of
552 * String::endsWith.
553 */
554 export const stringEndsWith = makeCallable(String.prototype.endsWith);
555
556 /**
557 * Returns whether the string representation of the provided value
558 * contains the provided search string according to the algorithm of
559 * String::includes.
560 */
561 export const stringIncludes = makeCallable(String.prototype.includes);
562
563 /**
564 * Returns the result of matching the string representation of the
565 * provided value with the provided matcher according to the algorithm
566 * of String::match.
567 */
568 export const stringMatch = makeCallable(String.prototype.match);
569
570 /**
571 * Returns the result of matching the string representation of the
572 * provided value with the provided matcher according to the algorithm
573 * of String::matchAll.
574 */
575 export const stringMatchAll = makeCallable(String.prototype.matchAll);
576
577 /**
578 * Returns the normalized form of the string representation of the
579 * provided value according to the algorithm of String::matchAll.
580 */
581 export const stringNormalize = makeCallable(
582 String.prototype.normalize,
583 );
584
585 /**
586 * Returns the result of padding the end of the string representation
587 * of the provided value padded until it is the desired length
588 * according to the algorithm of String::padEnd.
589 */
590 export const stringPadEnd = makeCallable(String.prototype.padEnd);
591
592 /**
593 * Returns the result of padding the start of the string representation
594 * of the provided value padded until it is the desired length
595 * according to the algorithm of String::padStart.
596 */
597 export const stringPadStart = makeCallable(String.prototype.padStart);
598
599 /**
600 * Returns the result of repeating the string representation of the
601 * provided value the provided number of times according to the
602 * algorithm of String::repeat.
603 */
604 export const stringRepeat = makeCallable(String.prototype.repeat);
605
606 /**
607 * Returns the result of replacing the string representation of the
608 * provided value with the provided replacement, using the provided
609 * matcher and according to the algorithm of String::replace.
610 */
611 export const stringReplace = makeCallable(String.prototype.replace);
612
613 /**
614 * Returns the result of replacing the string representation of the
615 * provided value with the provided replacement, using the provided
616 * matcher and according to the algorithm of String::replaceAll.
617 */
618 export const stringReplaceAll = makeCallable(
619 String.prototype.replaceAll,
620 );
621
622 /**
623 * Returns the result of searching the string representation of the
624 * provided value using the provided matcher and according to the
625 * algorithm of String::search.
626 */
627 export const stringSearch = makeCallable(String.prototype.search);
628
629 /**
630 * Returns a slice of the string representation of the provided value
631 * according to the algorithm of String::slice.
632 */
633 export const stringSlice = makeCallable(String.prototype.slice);
634
635 /**
636 * Returns the result of splitting of the string representation of the
637 * provided value on the provided separator according to the algorithm
638 * of String::split.
639 */
640 export const stringSplit = makeCallable(String.prototype.split);
641
642 /**
643 * Returns whether the string representation of the provided value
644 * starts with the provided search string according to the algorithm of
645 * String::startsWith.
646 */
647 export const stringStartsWith = makeCallable(
648 String.prototype.startsWith,
649 );
650
651 /**
652 * Returns the `[[StringData]]` of the provided value.
653 *
654 * ☡ This function will throw if the provided object does not have a
655 * `[[StringData]]` internal slot.
656 */
657 export const stringValue = makeCallable(String.prototype.valueOf);
658
659 /**
660 * Returns the result of stripping leading and trailing A·S·C·I·I
661 * whitespace from the provided value and collapsing other A·S·C·I·I
662 * whitespace in the string representation of the provided value.
663 */
664 export const stripAndCollapseASCIIWhitespace = ($) =>
665 stripLeadingAndTrailingASCIIWhitespace(
666 stringReplaceAll(
667 `${$}`,
668 /[\n\r\t\f ]+/gu,
669 " ",
670 ),
671 );
672
673 /**
674 * Returns the result of stripping leading and trailing A·S·C·I·I
675 * whitespace from the string representation of the provided value.
676 */
677 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
678 const { exec: reExec } = RegExp.prototype;
679 return ($) =>
680 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
681 })();
682
683 /**
684 * Returns a substring of the string representation of the provided
685 * value according to the algorithm of String::substring.
686 */
687 export const substring = makeCallable(String.prototype.substring);
688
689 /**
690 * Returns the result of converting the provided value to a string.
691 *
692 * ☡ This method throws for symbols and other objects without a string
693 * representation.
694 */
695 export const toString = ($) => `${$}`;
This page took 0.108117 seconds and 5 git commands to generate.