]> Lady’s Gitweb - Pisces/blob - string.js
e697a1fcad38f51cfba5e0fe8ae6d2dc3d34d7e1
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getOwnPropertyDescriptors,
14 getPrototype,
15 objectCreate,
16 setPrototype,
17 } from "./object.js";
18 import { ITERATOR, TO_STRING_TAG } from "./value.js";
19
20 const RE = RegExp;
21 const { prototype: rePrototype } = RE;
22 const { prototype: arrayPrototype } = Array;
23 const { prototype: stringPrototype } = String;
24
25 const { exec: reExec } = rePrototype;
26
27 export const {
28 /**
29 * A `RegExp`like object which only matches entire strings, and may
30 * have additional constraints specified.
31 *
32 * Matchers are callable objects and will return true if they are
33 * called with a string that they match, and false otherwise.
34 * Matchers will always return false if called with nonstrings,
35 * although other methods like `::exec` coerce their arguments and
36 * may still return true.
37 */
38 Matcher,
39 } = (() => {
40 const { toString: reToString } = rePrototype;
41 const getDotAll =
42 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
43 const getFlags =
44 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
45 const getGlobal =
46 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
47 const getHasIndices =
48 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
49 const getIgnoreCase =
50 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
51 const getMultiline =
52 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
53 const getSource =
54 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
55 const getSticky =
56 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
57 const getUnicode =
58 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
59
60 const Matcher = class extends identity {
61 #constraint;
62 #regExp;
63
64 /**
65 * Constructs a new `Matcher` from the provided source.
66 *
67 * If the provided source is a regular expression, then it must
68 * have the unicode flag set. Otherwise, it is interpreted as the
69 * string source of a regular expression with the unicode flag set.
70 *
71 * Other flags are taken from the provided regular expression
72 * object, if any are present.
73 *
74 * A name for the matcher may be provided as the second argument.
75 *
76 * A callable constraint on acceptable inputs may be provided as a
77 * third argument. If provided, it will be called with three
78 * arguments whenever a match appears successful: first, the string
79 * being matched, second, the match result, and third, the
80 * `Matcher` object itself. If the return value of this call is
81 * falsey, then the match will be considered a failure.
82 *
83 * ☡ If the provided source regular expression uses nongreedy
84 * quantifiers, it may not match the whole string even if a match
85 * with the whole string is possible. Surround the regular
86 * expression with `^(?:` and `)$` if you don’t want nongreedy
87 * regular expressions to fail when shorter matches are possible.
88 */
89 constructor(source, name = undefined, constraint = null) {
90 super(
91 ($) => {
92 if (typeof $ !== "string") {
93 // The provided value is not a string.
94 return false;
95 } else {
96 // The provided value is a string. Set the `.lastIndex` of
97 // the regular expression to 0 and see if the first attempt
98 // at a match matches the whole string and passes the
99 // provided constraint (if present).
100 regExp.lastIndex = 0;
101 const result = call(reExec, regExp, [$]);
102 return result?.[0] === $ &&
103 (constraint === null || constraint($, result, this));
104 }
105 },
106 );
107 const regExp = this.#regExp = (() => {
108 try {
109 call(reExec, source, [""]); // throws if source not a RegExp
110 } catch {
111 return new RE(`${source}`, "u");
112 }
113 const unicode = call(getUnicode, source, []);
114 if (!unicode) {
115 // The provided regular expression does not have a unicode
116 // flag.
117 throw new TypeError(
118 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
119 );
120 } else {
121 // The provided regular expression has a unicode flag.
122 return new RE(source);
123 }
124 })();
125 if (constraint !== null && typeof constraint !== "function") {
126 throw new TypeError(
127 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
128 );
129 } else {
130 this.#constraint = constraint;
131 return defineOwnProperties(
132 setPrototype(this, matcherPrototype),
133 {
134 lastIndex: {
135 configurable: false,
136 enumerable: false,
137 value: 0,
138 writable: false,
139 },
140 name: {
141 value: name != null
142 ? `${name}`
143 : `Matcher(${call(reToString, regExp, [])})`,
144 },
145 },
146 );
147 }
148 }
149
150 /** Gets whether the dot‐all flag is present on this `Matcher`. */
151 get dotAll() {
152 return call(getDotAll, this.#regExp, []);
153 }
154
155 /**
156 * Executes this `Matcher` on the provided value and returns the
157 * result if there is a match, or null otherwise.
158 *
159 * Matchers only match if they can match the entire value on the
160 * first attempt.
161 *
162 * ☡ The match result returned by this method will be the same as
163 * that passed to the constraint function—and may have been
164 * modified by said function prior to being returned.
165 */
166 exec($) {
167 const regExp = this.#regExp;
168 const constraint = this.#constraint;
169 const string = `${$}`;
170 regExp.lastIndex = 0;
171 const result = call(reExec, regExp, [string]);
172 if (
173 result?.[0] === string &&
174 (constraint === null || constraint(string, result, this))
175 ) {
176 // The entire string was matched and the constraint, if
177 // present, returned a truthy value.
178 return result;
179 } else {
180 // The entire string was not matched or the constraint returned
181 // a falsey value.
182 return null;
183 }
184 }
185
186 /**
187 * Gets the flags present on this `Matcher`.
188 *
189 * ※ This needs to be defined because the internal `RegExp` object
190 * may have flags which are not yet recognized by ♓🌟 Piscēs.
191 */
192 get flags() {
193 return call(getFlags, this.#regExp, []);
194 }
195
196 /** Gets whether the global flag is present on this `Matcher`. */
197 get global() {
198 return call(getGlobal, this.#regExp, []);
199 }
200
201 /**
202 * Gets whether the has‐indices flag is present on this `Matcher`.
203 */
204 get hasIndices() {
205 return call(getHasIndices, this.#regExp, []);
206 }
207
208 /**
209 * Gets whether the ignore‐case flag is present on this `Matcher`.
210 */
211 get ignoreCase() {
212 return call(getIgnoreCase, this.#regExp, []);
213 }
214
215 /**
216 * Gets whether the multiline flag is present on this `Matcher`.
217 */
218 get multiline() {
219 return call(getMultiline, this.#regExp, []);
220 }
221
222 /** Gets the regular expression source for this `Matcher`. */
223 get source() {
224 return call(getSource, this.#regExp, []);
225 }
226
227 /** Gets whether the sticky flag is present on this `Matcher`. */
228 get sticky() {
229 return call(getSticky, this.#regExp, []);
230 }
231
232 /**
233 * Gets whether the unicode flag is present on this `Matcher`.
234 *
235 * ※ This will always be true.
236 */
237 get unicode() {
238 return call(getUnicode, this.#regExp, []);
239 }
240 };
241
242 const matcherConstructor = defineOwnProperties(
243 class extends RegExp {
244 constructor(...args) {
245 return new Matcher(...args);
246 }
247 },
248 {
249 name: { value: "Matcher" },
250 length: { value: 1 },
251 },
252 );
253 const matcherPrototype = defineOwnProperties(
254 matcherConstructor.prototype,
255 getOwnPropertyDescriptors(Matcher.prototype),
256 { constructor: { value: matcherConstructor } },
257 );
258
259 return { Matcher: matcherConstructor };
260 })();
261
262 export const {
263 /**
264 * Returns the result of converting the provided value to A·S·C·I·I
265 * lowercase.
266 */
267 asciiLowercase,
268
269 /**
270 * Returns the result of converting the provided value to A·S·C·I·I
271 * uppercase.
272 */
273 asciiUppercase,
274 } = (() => {
275 const {
276 toLowerCase: stringToLowercase,
277 toUpperCase: stringToUppercase,
278 } = stringPrototype;
279 return {
280 asciiLowercase: ($) =>
281 stringReplaceAll(
282 `${$}`,
283 /[A-Z]/gu,
284 makeCallable(stringToLowercase),
285 ),
286 asciiUppercase: ($) =>
287 stringReplaceAll(
288 `${$}`,
289 /[a-z]/gu,
290 makeCallable(stringToUppercase),
291 ),
292 };
293 })();
294
295 export const {
296 /**
297 * Returns an iterator over the code units in the string
298 * representation of the provided value.
299 */
300 codeUnits,
301
302 /**
303 * Returns an iterator over the codepoints in the string
304 * representation of the provided value.
305 */
306 codepoints,
307
308 /**
309 * Returns an iterator over the scalar values in the string
310 * representation of the provided value.
311 *
312 * Codepoints which are not valid Unicode scalar values are replaced
313 * with U+FFFF.
314 */
315 scalarValues,
316
317 /**
318 * Returns the result of converting the provided value to a string of
319 * scalar values by replacing (unpaired) surrogate values with
320 * U+FFFD.
321 */
322 scalarValueString,
323 } = (() => {
324 const { [ITERATOR]: arrayIterator } = arrayPrototype;
325 const arrayIteratorPrototype = Object.getPrototypeOf(
326 [][ITERATOR](),
327 );
328 const { next: arrayIteratorNext } = arrayIteratorPrototype;
329 const iteratorPrototype = Object.getPrototypeOf(
330 arrayIteratorPrototype,
331 );
332 const { [ITERATOR]: stringIterator } = stringPrototype;
333 const stringIteratorPrototype = Object.getPrototypeOf(
334 ""[ITERATOR](),
335 );
336 const { next: stringIteratorNext } = stringIteratorPrototype;
337
338 /**
339 * An iterator object for iterating over code values (either code
340 * units or codepoints) in a string.
341 *
342 * ※ This class is not exposed, although its methods are (through
343 * the prototypes of string code value iterator objects).
344 */
345 const StringCodeValueIterator = class extends identity {
346 #allowSurrogates;
347 #baseIterator;
348
349 /**
350 * Constructs a new string code value iterator from the provided
351 * base iterator.
352 *
353 * If the provided base iterator is an array iterator, this is a
354 * code unit iterator. If the provided iterator is a string
355 * iterator and surrogates are allowed, this is a codepoint
356 * iterator. If the provided iterator is a string iterator and
357 * surrogates are not allowed, this is a scalar value iterator.
358 */
359 constructor(baseIterator, allowSurrogates = true) {
360 super(objectCreate(stringCodeValueIteratorPrototype));
361 this.#allowSurrogates = !!allowSurrogates;
362 this.#baseIterator = baseIterator;
363 }
364
365 /** Provides the next code value in the iterator. */
366 next() {
367 const baseIterator = this.#baseIterator;
368 switch (getPrototype(baseIterator)) {
369 case arrayIteratorPrototype: {
370 // The base iterator is iterating over U·C·S characters.
371 const {
372 value: ucsCharacter,
373 done,
374 } = call(arrayIteratorNext, baseIterator, []);
375 return done
376 ? { value: undefined, done: true }
377 : { value: getCodeUnit(ucsCharacter, 0), done: false };
378 }
379 case stringIteratorPrototype: {
380 // The base iterator is iterating over Unicode characters.
381 const {
382 value: character,
383 done,
384 } = call(stringIteratorNext, baseIterator, []);
385 if (done) {
386 // The base iterator has been exhausted.
387 return { value: undefined, done: true };
388 } else {
389 // The base iterator provided a character; yield the
390 // codepoint.
391 const codepoint = getCodepoint(character, 0);
392 return {
393 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
394 codepoint >= 0xE000
395 ? codepoint
396 : 0xFFFD,
397 done: false,
398 };
399 }
400 }
401 default: {
402 // Should not be possible!
403 throw new TypeError(
404 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
405 );
406 }
407 }
408 }
409 };
410
411 const {
412 next: stringCodeValueIteratorNext,
413 } = StringCodeValueIterator.prototype;
414 const stringCodeValueIteratorPrototype = objectCreate(
415 iteratorPrototype,
416 {
417 next: {
418 configurable: true,
419 enumerable: false,
420 value: stringCodeValueIteratorNext,
421 writable: true,
422 },
423 [TO_STRING_TAG]: {
424 configurable: true,
425 enumerable: false,
426 value: "String Code Value Iterator",
427 writable: false,
428 },
429 },
430 );
431 const scalarValueIterablePrototype = {
432 [ITERATOR]() {
433 return {
434 next: bind(
435 stringCodeValueIteratorNext,
436 new StringCodeValueIterator(
437 call(stringIterator, this.source, []),
438 false,
439 ),
440 [],
441 ),
442 };
443 },
444 };
445
446 return {
447 codeUnits: ($) =>
448 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
449 codepoints: ($) =>
450 new StringCodeValueIterator(
451 call(stringIterator, `${$}`, []),
452 true,
453 ),
454 scalarValues: ($) =>
455 new StringCodeValueIterator(
456 call(stringIterator, `${$}`, []),
457 false,
458 ),
459 scalarValueString: ($) =>
460 stringFromCodepoints(...objectCreate(
461 scalarValueIterablePrototype,
462 { source: { value: `${$}` } },
463 )),
464 };
465 })();
466
467 /**
468 * Returns an iterator over the codepoints in the string representation
469 * of the provided value according to the algorithm of
470 * `String::[Symbol.iterator]`.
471 */
472 export const characters = makeCallable(
473 stringPrototype[ITERATOR],
474 );
475
476 /**
477 * Returns the character at the provided position in the string
478 * representation of the provided value according to the algorithm of
479 * `String::codePointAt`.
480 */
481 export const getCharacter = ($, pos) => {
482 const codepoint = getCodepoint($, pos);
483 return codepoint == null
484 ? undefined
485 : stringFromCodepoints(codepoint);
486 };
487
488 /**
489 * Returns the code unit at the provided position in the string
490 * representation of the provided value according to the algorithm of
491 * `String::charAt`.
492 */
493 export const getCodeUnit = makeCallable(stringPrototype.charCodeAt);
494
495 /**
496 * Returns the codepoint at the provided position in the string
497 * representation of the provided value according to the algorithm of
498 * `String::codePointAt`.
499 */
500 export const getCodepoint = makeCallable(stringPrototype.codePointAt);
501
502 /**
503 * Returns the index of the first occurrence of the search string in
504 * the string representation of the provided value according to the
505 * algorithm of `String::indexOf`.
506 */
507 export const getFirstSubstringIndex = makeCallable(
508 stringPrototype.indexOf,
509 );
510
511 /**
512 * Returns the index of the last occurrence of the search string in the
513 * string representation of the provided value according to the
514 * algorithm of `String::lastIndexOf`.
515 */
516 export const getLastSubstringIndex = makeCallable(
517 stringPrototype.lastIndexOf,
518 );
519
520 /**
521 * Returns the result of joining the provided iterable.
522 *
523 * If no separator is provided, it defaults to ",".
524 *
525 * If a value is nullish, it will be stringified as the empty string.
526 */
527 export const join = (() => {
528 const { join: arrayJoin } = arrayPrototype;
529 const join = ($, separator = ",") =>
530 call(arrayJoin, [...$], [`${separator}`]);
531 return join;
532 })();
533
534 export const {
535 /**
536 * Returns a string created from the raw value of the tagged template
537 * literal.
538 *
539 * ※ This is an alias for `String.raw`.
540 */
541 raw: rawString,
542
543 /**
544 * Returns a string created from the provided code units.
545 *
546 * ※ This is an alias for `String.fromCharCode`.
547 */
548 fromCharCode: stringFromCodeUnits,
549
550 /**
551 * Returns a string created from the provided codepoints.
552 *
553 * ※ This is an alias for `String.fromCodePoint`.
554 */
555 fromCodePoint: stringFromCodepoints,
556 } = String;
557
558 /**
559 * Returns the result of splitting the provided value on A·S·C·I·I
560 * whitespace.
561 */
562 export const splitOnASCIIWhitespace = ($) =>
563 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
564
565 /**
566 * Returns the result of splitting the provided value on commas,
567 * trimming A·S·C·I·I whitespace from the resulting tokens.
568 */
569 export const splitOnCommas = ($) =>
570 stringSplit(
571 stripLeadingAndTrailingASCIIWhitespace(
572 stringReplaceAll(
573 `${$}`,
574 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
575 ",",
576 ),
577 ),
578 ",",
579 );
580
581 /**
582 * Returns the result of catenating the string representations of the
583 * provided values, returning a new string according to the algorithm
584 * of `String::concat`.
585 */
586 export const stringCatenate = makeCallable(stringPrototype.concat);
587
588 /**
589 * Returns whether the string representation of the provided value ends
590 * with the provided search string according to the algorithm of
591 * `String::endsWith`.
592 */
593 export const stringEndsWith = makeCallable(stringPrototype.endsWith);
594
595 /**
596 * Returns whether the string representation of the provided value
597 * contains the provided search string according to the algorithm of
598 * `String::includes`.
599 */
600 export const stringIncludes = makeCallable(stringPrototype.includes);
601
602 /**
603 * Returns the result of matching the string representation of the
604 * provided value with the provided matcher according to the algorithm
605 * of `String::match`.
606 */
607 export const stringMatch = makeCallable(stringPrototype.match);
608
609 /**
610 * Returns the result of matching the string representation of the
611 * provided value with the provided matcher according to the algorithm
612 * of `String::matchAll`.
613 */
614 export const stringMatchAll = makeCallable(stringPrototype.matchAll);
615
616 /**
617 * Returns the normalized form of the string representation of the
618 * provided value according to the algorithm of `String::matchAll`.
619 */
620 export const stringNormalize = makeCallable(
621 stringPrototype.normalize,
622 );
623
624 /**
625 * Returns the result of padding the end of the string representation
626 * of the provided value padded until it is the desired length
627 * according to the algorithm of `String::padEnd`.
628 */
629 export const stringPadEnd = makeCallable(stringPrototype.padEnd);
630
631 /**
632 * Returns the result of padding the start of the string representation
633 * of the provided value padded until it is the desired length
634 * according to the algorithm of `String::padStart`.
635 */
636 export const stringPadStart = makeCallable(stringPrototype.padStart);
637
638 /**
639 * Returns the result of repeating the string representation of the
640 * provided value the provided number of times according to the
641 * algorithm of `String::repeat`.
642 */
643 export const stringRepeat = makeCallable(stringPrototype.repeat);
644
645 /**
646 * Returns the result of replacing the string representation of the
647 * provided value with the provided replacement, using the provided
648 * matcher and according to the algorithm of `String::replace`.
649 */
650 export const stringReplace = makeCallable(stringPrototype.replace);
651
652 /**
653 * Returns the result of replacing the string representation of the
654 * provided value with the provided replacement, using the provided
655 * matcher and according to the algorithm of `String::replaceAll`.
656 */
657 export const stringReplaceAll = makeCallable(
658 stringPrototype.replaceAll,
659 );
660
661 /**
662 * Returns the result of searching the string representation of the
663 * provided value using the provided matcher and according to the
664 * algorithm of `String::search`.
665 */
666 export const stringSearch = makeCallable(stringPrototype.search);
667
668 /**
669 * Returns a slice of the string representation of the provided value
670 * according to the algorithm of `String::slice`.
671 */
672 export const stringSlice = makeCallable(stringPrototype.slice);
673
674 /**
675 * Returns the result of splitting of the string representation of the
676 * provided value on the provided separator according to the algorithm
677 * of `String::split`.
678 */
679 export const stringSplit = makeCallable(stringPrototype.split);
680
681 /**
682 * Returns whether the string representation of the provided value
683 * starts with the provided search string according to the algorithm of
684 * `String::startsWith`.
685 */
686 export const stringStartsWith = makeCallable(
687 stringPrototype.startsWith,
688 );
689
690 /**
691 * Returns the `[[StringData]]` of the provided value.
692 *
693 * ☡ This function will throw if the provided object does not have a
694 * `[[StringData]]` internal slot.
695 */
696 export const stringValue = makeCallable(stringPrototype.valueOf);
697
698 /**
699 * Returns the result of stripping leading and trailing A·S·C·I·I
700 * whitespace from the provided value and collapsing other A·S·C·I·I
701 * whitespace in the string representation of the provided value.
702 */
703 export const stripAndCollapseASCIIWhitespace = ($) =>
704 stripLeadingAndTrailingASCIIWhitespace(
705 stringReplaceAll(
706 `${$}`,
707 /[\n\r\t\f ]+/gu,
708 " ",
709 ),
710 );
711
712 /**
713 * Returns the result of stripping leading and trailing A·S·C·I·I
714 * whitespace from the string representation of the provided value.
715 */
716 export const stripLeadingAndTrailingASCIIWhitespace = ($) =>
717 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
718
719 /**
720 * Returns a substring of the string representation of the provided
721 * value according to the algorithm of `String::substring`.
722 */
723 export const substring = makeCallable(stringPrototype.substring);
724
725 /**
726 * Returns the result of converting the provided value to a string.
727 *
728 * ☡ This method throws for symbols and other objects without a string
729 * representation.
730 */
731 export const toString = ($) => `${$}`;
This page took 0.144926 seconds and 3 git commands to generate.