]> Lady’s Gitweb - Pisces/blob - string.js
Add Matcher class for whole‐string matching
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getPrototype,
14 objectCreate,
15 setPrototype,
16 } from "./object.js";
17
18 export const {
19 /**
20 * A RegExp·like object which only matches entire strings.
21 *
22 * Matchers are callable objects and will return true if they are
23 * called with a string that they match, and false otherwise.
24 * Matchers will always return false if called with nonstrings,
25 * although other methods like `exec` may still return true.
26 */
27 Matcher,
28 } = (() => {
29 const RE = RegExp;
30 const { prototype: rePrototype } = RE;
31 const { exec: reExec, toString: reToString } = rePrototype;
32 const getDotAll =
33 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
34 const getGlobal =
35 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
36 const getHasIndices =
37 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
38 const getIgnoreCase =
39 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
40 const getMultiline =
41 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
42 const getSource =
43 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
44 const getSticky =
45 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
46 const getUnicode =
47 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
48
49 const Matcher = class extends identity {
50 #regExp;
51
52 /**
53 * Constructs a new Matcher from the provided source.
54 *
55 * If the provided source is a regular expression, then it must
56 * have the unicode flag set. Otherwise, it is interpreted as the
57 * string source of a regular expression with the unicode flag set.
58 *
59 * Other flags are taken from the provided regular expression
60 * object, if any are present.
61 *
62 * A name for the matcher may be provided as the second argument.
63 *
64 * ☡ If the provided source regular expression uses nongreedy
65 * quantifiers, it may not match the whole string even if a match
66 * with the whole string is possible. Surround the regular
67 * expression with `^(?:` and `)$` if you don’t want nongreedy
68 * regular expressions to fail when shorter matches are possible.
69 */
70 constructor(source, name = undefined) {
71 super(
72 ($) => {
73 if (typeof $ !== "string") {
74 // The provided value is not a string.
75 return false;
76 } else {
77 // The provided value is a string. Set the `lastIndex` of
78 // the regular expression to 0 and see if the first attempt
79 // at a match matches the whole string.
80 regExp.lastIndex = 0;
81 return call(reExec, regExp, [$])?.[0] === $;
82 }
83 },
84 );
85 const regExp = this.#regExp = (() => {
86 try {
87 call(reExec, source, [""]); // throws if source not a RegExp
88 } catch {
89 return new RE(`${source}`, "u");
90 }
91 const unicode = call(getUnicode, source, []);
92 if (!unicode) {
93 // The provided regular expression does not have a unicode
94 // flag.
95 throw new TypeError(
96 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
97 );
98 } else {
99 // The provided regular expression has a unicode flag.
100 return new RE(source);
101 }
102 })();
103 return defineOwnProperties(
104 setPrototype(this, matcherPrototype),
105 {
106 lastIndex: {
107 configurable: false,
108 enumerable: false,
109 value: 0,
110 writable: false,
111 },
112 name: {
113 value: name != null
114 ? `${name}`
115 : `Matcher(${call(reToString, regExp, [])})`,
116 },
117 },
118 );
119 }
120
121 /** Gets whether the dotAll flag is present on this Matcher. */
122 get dotAll() {
123 return call(getDotAll, this.#regExp, []);
124 }
125
126 /**
127 * Executes this Matcher on the provided value and returns the
128 * result if there is a match, or null otherwise.
129 *
130 * Matchers only match if they can match the entire value on the
131 * first attempt.
132 */
133 exec($) {
134 const regExp = this.#regExp;
135 const string = `${$}`;
136 regExp.lastIndex = 0;
137 const result = call(reExec, regExp, [string]);
138 if (result?.[0] === string) {
139 // The entire string was matched.
140 return result;
141 } else {
142 // The entire string was not matched.
143 return null;
144 }
145 }
146
147 /** Gets whether the global flag is present on this Matcher. */
148 get global() {
149 return call(getGlobal, this.#regExp, []);
150 }
151
152 /** Gets whether the hasIndices flag is present on this Matcher. */
153 get hasIndices() {
154 return call(getHasIndices, this.#regExp, []);
155 }
156
157 /** Gets whether the ignoreCase flag is present on this Matcher. */
158 get ignoreCase() {
159 return call(getIgnoreCase, this.#regExp, []);
160 }
161
162 /** Gets whether the multiline flag is present on this Matcher. */
163 get multiline() {
164 return call(getMultiline, this.#regExp, []);
165 }
166
167 /** Gets the regular expression source for this Matcher. */
168 get source() {
169 return call(getSource, this.#regExp, []);
170 }
171
172 /** Gets whether the sticky flag is present on this Matcher. */
173 get sticky() {
174 return call(getSticky, this.#regExp, []);
175 }
176
177 /**
178 * Gets whether the unicode flag is present on this Matcher.
179 *
180 * ※ This will always be true.
181 */
182 get unicode() {
183 return call(getUnicode, this.#regExp, []);
184 }
185 };
186 const matcherPrototype = setPrototype(
187 Matcher.prototype,
188 rePrototype,
189 );
190
191 return { Matcher };
192 })();
193
194 export const {
195 /**
196 * Returns the result of converting the provided value to A·S·C·I·I
197 * lowercase.
198 */
199 asciiLowercase,
200
201 /**
202 * Returns the result of converting the provided value to A·S·C·I·I
203 * uppercase.
204 */
205 asciiUppercase,
206 } = (() => {
207 const {
208 toLowerCase: stringToLowercase,
209 toUpperCase: stringToUppercase,
210 } = String.prototype;
211 return {
212 asciiLowercase: ($) =>
213 stringReplaceAll(
214 `${$}`,
215 /[A-Z]/gu,
216 makeCallable(stringToLowercase),
217 ),
218 asciiUppercase: ($) =>
219 stringReplaceAll(
220 `${$}`,
221 /[a-z]/gu,
222 makeCallable(stringToUppercase),
223 ),
224 };
225 })();
226
227 export const {
228 /**
229 * Returns an iterator over the code units in the string
230 * representation of the provided value.
231 */
232 codeUnits,
233
234 /**
235 * Returns an iterator over the codepoints in the string
236 * representation of the provided value.
237 */
238 codepoints,
239
240 /**
241 * Returns an iterator over the scalar values in the string
242 * representation of the provided value.
243 *
244 * Codepoints which are not valid Unicode scalar values are replaced
245 * with U+FFFF.
246 */
247 scalarValues,
248
249 /**
250 * Returns the result of converting the provided value to a string of
251 * scalar values by replacing (unpaired) surrogate values with
252 * U+FFFD.
253 */
254 scalarValueString,
255 } = (() => {
256 const {
257 iterator: iteratorSymbol,
258 toStringTag: toStringTagSymbol,
259 } = Symbol;
260 const { [iteratorSymbol]: arrayIterator } = Array.prototype;
261 const arrayIteratorPrototype = Object.getPrototypeOf(
262 [][iteratorSymbol](),
263 );
264 const { next: arrayIteratorNext } = arrayIteratorPrototype;
265 const iteratorPrototype = Object.getPrototypeOf(
266 arrayIteratorPrototype,
267 );
268 const { [iteratorSymbol]: stringIterator } = String.prototype;
269 const stringIteratorPrototype = Object.getPrototypeOf(
270 ""[iteratorSymbol](),
271 );
272 const { next: stringIteratorNext } = stringIteratorPrototype;
273
274 /**
275 * An iterator object for iterating over code values (either code
276 * units or codepoints) in a string.
277 *
278 * ※ This constructor is not exposed.
279 */
280 const StringCodeValueIterator = class extends identity {
281 #allowSurrogates;
282 #baseIterator;
283
284 /**
285 * Constructs a new string code value iterator from the provided
286 * base iterator.
287 *
288 * If the provided base iterator is an array iterator, this is a
289 * code unit iterator. If the provided iterator is a string
290 * iterator and surrogates are allowed, this is a codepoint
291 * iterator. If the provided iterator is a string iterator and
292 * surrogates are not allowed, this is a scalar value iterator.
293 */
294 constructor(baseIterator, allowSurrogates = true) {
295 super(objectCreate(stringCodeValueIteratorPrototype));
296 this.#allowSurrogates = !!allowSurrogates;
297 this.#baseIterator = baseIterator;
298 }
299
300 /** Provides the next code value in the iterator. */
301 next() {
302 const baseIterator = this.#baseIterator;
303 switch (getPrototype(baseIterator)) {
304 case arrayIteratorPrototype: {
305 // The base iterator is iterating over U·C·S characters.
306 const {
307 value: ucsCharacter,
308 done,
309 } = call(arrayIteratorNext, baseIterator, []);
310 return done
311 ? { value: undefined, done: true }
312 : { value: getCodeUnit(ucsCharacter, 0), done: false };
313 }
314 case stringIteratorPrototype: {
315 // The base iterator is iterating over Unicode characters.
316 const {
317 value: character,
318 done,
319 } = call(stringIteratorNext, baseIterator, []);
320 if (done) {
321 // The base iterator has been exhausted.
322 return { value: undefined, done: true };
323 } else {
324 // The base iterator provided a character; yield the
325 // codepoint.
326 const codepoint = getCodepoint(character, 0);
327 return {
328 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
329 codepoint >= 0xE000
330 ? codepoint
331 : 0xFFFD,
332 done: false,
333 };
334 }
335 }
336 default: {
337 // Should not be possible!
338 throw new TypeError(
339 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
340 );
341 }
342 }
343 }
344 };
345
346 const {
347 next: stringCodeValueIteratorNext,
348 } = StringCodeValueIterator.prototype;
349 const stringCodeValueIteratorPrototype = objectCreate(
350 iteratorPrototype,
351 {
352 next: {
353 configurable: true,
354 enumerable: false,
355 value: stringCodeValueIteratorNext,
356 writable: true,
357 },
358 [toStringTagSymbol]: {
359 configurable: true,
360 enumerable: false,
361 value: "String Code Value Iterator",
362 writable: false,
363 },
364 },
365 );
366 const scalarValueIterablePrototype = {
367 [iteratorSymbol]() {
368 return {
369 next: bind(
370 stringCodeValueIteratorNext,
371 new StringCodeValueIterator(
372 call(stringIterator, this.source, []),
373 false,
374 ),
375 [],
376 ),
377 };
378 },
379 };
380
381 return {
382 codeUnits: ($) =>
383 new StringCodeValueIterator(call(arrayIterator, $, [])),
384 codepoints: ($) =>
385 new StringCodeValueIterator(
386 call(stringIterator, $, []),
387 true,
388 ),
389 scalarValues: ($) =>
390 new StringCodeValueIterator(
391 call(stringIterator, $, []),
392 false,
393 ),
394 scalarValueString: ($) =>
395 stringFromCodepoints(...objectCreate(
396 scalarValueIterablePrototype,
397 { source: { value: $ } },
398 )),
399 };
400 })();
401
402 /**
403 * Returns an iterator over the codepoints in the string representation
404 * of the provided value according to the algorithm of
405 * String::[Symbol.iterator].
406 */
407 export const characters = makeCallable(
408 String.prototype[Symbol.iterator],
409 );
410
411 /**
412 * Returns the character at the provided position in the string
413 * representation of the provided value according to the algorithm of
414 * String::codePointAt.
415 */
416 export const getCharacter = ($, pos) => {
417 const codepoint = getCodepoint($, pos);
418 return codepoint == null
419 ? undefined
420 : stringFromCodepoints(codepoint);
421 };
422
423 /**
424 * Returns the code unit at the provided position in the string
425 * representation of the provided value according to the algorithm of
426 * String::charAt.
427 */
428 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
429
430 /**
431 * Returns the codepoint at the provided position in the string
432 * representation of the provided value according to the algorithm of
433 * String::codePointAt.
434 */
435 export const getCodepoint = makeCallable(String.prototype.codePointAt);
436
437 /**
438 * Returns the index of the first occurrence of the search string in
439 * the string representation of the provided value according to the
440 * algorithm of String::indexOf.
441 */
442 export const getFirstSubstringIndex = makeCallable(
443 String.prototype.indexOf,
444 );
445
446 /**
447 * Returns the index of the last occurrence of the search string in the
448 * string representation of the provided value according to the
449 * algorithm of String::lastIndexOf.
450 */
451 export const getLastSubstringIndex = makeCallable(
452 String.prototype.lastIndexOf,
453 );
454
455 /**
456 * Returns the result of joining the provided iterable.
457 *
458 * If no separator is provided, it defaults to ",".
459 *
460 * If a value is nullish, it will be stringified as the empty string.
461 */
462 export const join = (() => {
463 const { join: arrayJoin } = Array.prototype;
464 const join = ($, separator = ",") =>
465 call(arrayJoin, [...$], [`${separator}`]);
466 return join;
467 })();
468
469 export const {
470 /**
471 * Returns a string created from the raw value of the tagged template
472 * literal.
473 *
474 * ※ This is an alias for String.raw.
475 */
476 raw: rawString,
477
478 /**
479 * Returns a string created from the provided code units.
480 *
481 * ※ This is an alias for String.fromCharCode.
482 */
483 fromCharCode: stringFromCodeUnits,
484
485 /**
486 * Returns a string created from the provided codepoints.
487 *
488 * ※ This is an alias for String.fromCodePoint.
489 */
490 fromCodePoint: stringFromCodepoints,
491 } = String;
492
493 /**
494 * Returns the result of splitting the provided value on A·S·C·I·I
495 * whitespace.
496 */
497 export const splitOnASCIIWhitespace = ($) =>
498 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
499
500 /**
501 * Returns the result of splitting the provided value on commas,
502 * trimming A·S·C·I·I whitespace from the resulting tokens.
503 */
504 export const splitOnCommas = ($) =>
505 stringSplit(
506 stripLeadingAndTrailingASCIIWhitespace(
507 stringReplaceAll(
508 `${$}`,
509 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
510 ",",
511 ),
512 ),
513 ",",
514 );
515
516 /**
517 * Returns the result of catenating the string representations of the
518 * provided values, returning a new string according to the algorithm
519 * of String::concat.
520 */
521 export const stringCatenate = makeCallable(String.prototype.concat);
522
523 /**
524 * Returns whether the string representation of the provided value ends
525 * with the provided search string according to the algorithm of
526 * String::endsWith.
527 */
528 export const stringEndsWith = makeCallable(String.prototype.endsWith);
529
530 /**
531 * Returns whether the string representation of the provided value
532 * contains the provided search string according to the algorithm of
533 * String::includes.
534 */
535 export const stringIncludes = makeCallable(String.prototype.includes);
536
537 /**
538 * Returns the result of matching the string representation of the
539 * provided value with the provided matcher according to the algorithm
540 * of String::match.
541 */
542 export const stringMatch = makeCallable(String.prototype.match);
543
544 /**
545 * Returns the result of matching the string representation of the
546 * provided value with the provided matcher according to the algorithm
547 * of String::matchAll.
548 */
549 export const stringMatchAll = makeCallable(String.prototype.matchAll);
550
551 /**
552 * Returns the normalized form of the string representation of the
553 * provided value according to the algorithm of String::matchAll.
554 */
555 export const stringNormalize = makeCallable(
556 String.prototype.normalize,
557 );
558
559 /**
560 * Returns the result of padding the end of the string representation
561 * of the provided value padded until it is the desired length
562 * according to the algorithm of String::padEnd.
563 */
564 export const stringPadEnd = makeCallable(String.prototype.padEnd);
565
566 /**
567 * Returns the result of padding the start of the string representation
568 * of the provided value padded until it is the desired length
569 * according to the algorithm of String::padStart.
570 */
571 export const stringPadStart = makeCallable(String.prototype.padStart);
572
573 /**
574 * Returns the result of repeating the string representation of the
575 * provided value the provided number of times according to the
576 * algorithm of String::repeat.
577 */
578 export const stringRepeat = makeCallable(String.prototype.repeat);
579
580 /**
581 * Returns the result of replacing the string representation of the
582 * provided value with the provided replacement, using the provided
583 * matcher and according to the algorithm of String::replace.
584 */
585 export const stringReplace = makeCallable(String.prototype.replace);
586
587 /**
588 * Returns the result of replacing the string representation of the
589 * provided value with the provided replacement, using the provided
590 * matcher and according to the algorithm of String::replaceAll.
591 */
592 export const stringReplaceAll = makeCallable(
593 String.prototype.replaceAll,
594 );
595
596 /**
597 * Returns the result of searching the string representation of the
598 * provided value using the provided matcher and according to the
599 * algorithm of String::search.
600 */
601 export const stringSearch = makeCallable(String.prototype.search);
602
603 /**
604 * Returns a slice of the string representation of the provided value
605 * according to the algorithm of String::slice.
606 */
607 export const stringSlice = makeCallable(String.prototype.slice);
608
609 /**
610 * Returns the result of splitting of the string representation of the
611 * provided value on the provided separator according to the algorithm
612 * of String::split.
613 */
614 export const stringSplit = makeCallable(String.prototype.split);
615
616 /**
617 * Returns whether the string representation of the provided value
618 * starts with the provided search string according to the algorithm of
619 * String::startsWith.
620 */
621 export const stringStartsWith = makeCallable(
622 String.prototype.startsWith,
623 );
624
625 /**
626 * Returns the `[[StringData]]` of the provided value.
627 *
628 * ☡ This function will throw if the provided object does not have a
629 * `[[StringData]]` internal slot.
630 */
631 export const stringValue = makeCallable(String.prototype.valueOf);
632
633 /**
634 * Returns the result of stripping leading and trailing A·S·C·I·I
635 * whitespace from the provided value and collapsing other A·S·C·I·I
636 * whitespace in the string representation of the provided value.
637 */
638 export const stripAndCollapseASCIIWhitespace = ($) =>
639 stripLeadingAndTrailingASCIIWhitespace(
640 stringReplaceAll(
641 `${$}`,
642 /[\n\r\t\f ]+/gu,
643 " ",
644 ),
645 );
646
647 /**
648 * Returns the result of stripping leading and trailing A·S·C·I·I
649 * whitespace from the string representation of the provided value.
650 */
651 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
652 const { exec: reExec } = RegExp.prototype;
653 return ($) =>
654 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
655 })();
656
657 /**
658 * Returns a substring of the string representation of the provided
659 * value according to the algorithm of String::substring.
660 */
661 export const substring = makeCallable(String.prototype.substring);
This page took 0.101388 seconds and 5 git commands to generate.