]> Lady’s Gitweb - Pisces/blob - string.js
Move index string functions back into value.js
[Pisces] / string.js
1 // SPDX-FileCopyrightText: 2022, 2023, 2025 Lady <https://www.ladys.computer/about/#lady>
2 // SPDX-License-Identifier: MPL-2.0
3 /**
4 * ⁌ ♓🧩 Piscēs ∷ string.js
5 *
6 * Copyright © 2022–2023, 2025 Lady [@ Ladys Computer].
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
11 */
12
13 import {
14 bind,
15 call,
16 completesNormally,
17 createArrowFunction,
18 createCallableFunction,
19 identity,
20 } from "./function.js";
21 import {
22 arrayIteratorFunction,
23 stringIteratorFunction,
24 } from "./iterable.js";
25 import {
26 defineOwnDataProperty,
27 defineOwnProperties,
28 getOwnPropertyDescriptors,
29 objectCreate,
30 setPropertyValues,
31 setPrototype,
32 } from "./object.js";
33 import { sameValue, toLength, UNDEFINED } from "./value.js";
34
35 const PISCĒS = "♓🧩 Piscēs";
36
37 const RE = RegExp;
38 const { prototype: rePrototype } = RE;
39 const { prototype: arrayPrototype } = Array;
40 const { prototype: stringPrototype } = String;
41
42 const { exec: reExec } = rePrototype;
43
44 export const {
45 /**
46 * A `RegExp´‐like object which only matches entire strings, and may
47 * have additional constraints specified.
48 *
49 * Matchers are callable objects and will return true if they are
50 * called with a string that they match, and false otherwise.
51 * Matchers will always return false if called with nonstrings, altho
52 * other methods like `::exec´ coerce their arguments and may still
53 * return true.
54 */
55 Matcher,
56 } = (() => {
57 const { toString: reToString } = rePrototype;
58 const getDotAll =
59 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
60 const getFlags =
61 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
62 const getGlobal =
63 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
64 const getHasIndices =
65 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
66 const getIgnoreCase =
67 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
68 const getMultiline =
69 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
70 const getSource =
71 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
72 const getSticky =
73 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
74 const getUnicode =
75 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
76 const getUnicodeSets =
77 Object.getOwnPropertyDescriptor(rePrototype, "unicodeSets").get;
78
79 /**
80 * The internal implementation of `Matcher´.
81 *
82 * ※ This class extends the identity function to enable the addition
83 * of private fields to the callable matcher function it constructs.
84 *
85 * ※ This class is not exposed.
86 */
87 const Matcher = class extends identity {
88 #constraint;
89 #regExp;
90
91 /**
92 * Constructs a new `Matcher´ from the provided source.
93 *
94 * If the provided source is a regular expression, then it must
95 * have either the unicode flag set or the unicode sets flag set.
96 * Otherwise, it is interpreted as the string source of a regular
97 * expression with the unicode flag set.
98 *
99 * Other flags are taken from the provided regular expression
100 * object, if any are present.
101 *
102 * A name for the matcher may be provided as the second argument.
103 *
104 * A callable constraint on acceptable inputs may be provided as a
105 * third argument. If provided, it will be called with three
106 * arguments whenever a match appears successful: first, the string
107 * being matched, second, the match result, and third, the
108 * `Matcher´ object itself. If the return value of this call is
109 * falsey, then the match will be considered a failure.
110 *
111 * ☡ If the provided source regular expression uses nongreedy
112 * quantifiers, it may not match the whole string even if a match
113 * with the whole string is possible. Surround the regular
114 * expression with `^(?:´ and `)$´ if you don¦t want nongreedy
115 * regular expressions to fail when shorter matches are possible.
116 */
117 constructor(source, name = UNDEFINED, constraint = null) {
118 super(
119 ($) => {
120 if (typeof $ !== "string") {
121 // The provided value is not a string.
122 return false;
123 } else {
124 // The provided value is a string.
125 //
126 // Set the `.lastIndex´ of the regular expression to 0, and
127 // see if the first attempt at a match successfully matches
128 // the whole string and passes the provided constraint (if
129 // present).
130 regExp.lastIndex = 0;
131 const result = call(reExec, regExp, [$]);
132 return result?.[0] === $
133 && (constraint === null || constraint($, result, this));
134 }
135 },
136 );
137 const regExp = this.#regExp = (() => {
138 if (completesNormally(() => call(reExec, source, [""]))) {
139 // The provided source is a `RegExp´.
140 if (
141 !call(getUnicode, source, [])
142 && !call(getUnicodeSets, source, [])
143 ) {
144 // The provided regular expression does not have a unicode
145 // flag or unicode sets flag.
146 throw new TypeError(
147 `${PISCĒS}: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
148 );
149 } else {
150 // The provided regular expression has a unicode flag or
151 // unicode sets flag.
152 return new RE(source);
153 }
154 } else {
155 // The provided source is not a `RegExp´.
156 //
157 // Create one using it as the source string.
158 return new RE(`${source}`, "u");
159 }
160 })();
161 if (constraint !== null && typeof constraint !== "function") {
162 throw new TypeError(
163 `${PISCĒS}: Cannot construct Matcher: Constraint is not callable.`,
164 );
165 } else {
166 this.#constraint = constraint;
167 return defineOwnProperties(
168 setPrototype(this, matcherPrototype),
169 {
170 lastIndex: setPropertyValues(objectCreate(null), {
171 configurable: false,
172 enumerable: false,
173 value: 0,
174 writable: false,
175 }),
176 name: defineOwnDataProperty(
177 objectCreate(null),
178 "value",
179 name != null
180 ? `${name}`
181 : `Matcher(${call(reToString, regExp, [])})`,
182 ),
183 },
184 );
185 }
186 }
187
188 /** Gets whether the dot‐all flag is present on this `Matcher´. */
189 get dotAll() {
190 return call(getDotAll, this.#regExp, []);
191 }
192
193 /**
194 * Executes this `Matcher´ on the provided value and returns the
195 * result if there is a match, or null otherwise.
196 *
197 * Matchers only match if they can match the entire value on the
198 * first attempt.
199 *
200 * ☡ The match result returned by this method will be the same as
201 * that passed to the constraint function—and may have been
202 * modified by said function prior to being returned.
203 */
204 exec($) {
205 const regExp = this.#regExp;
206 const constraint = this.#constraint;
207 const string = `${$}`;
208 regExp.lastIndex = 0;
209 const result = call(reExec, regExp, [string]);
210 if (
211 result?.[0] === string
212 && (constraint === null || constraint(string, result, this))
213 ) {
214 // The entire string was matched and the constraint, if
215 // present, returned a truthy value.
216 return result;
217 } else {
218 // The entire string was not matched or the constraint returned
219 // a falsey value.
220 return null;
221 }
222 }
223
224 /**
225 * Gets the flags present on this `Matcher´.
226 *
227 * ※ This needs to be defined because the internal `RegExp´ object
228 * may have flags which are not yet recognized by ♓🧩 Piscēs.
229 */
230 get flags() {
231 return call(getFlags, this.#regExp, []);
232 }
233
234 /** Gets whether the global flag is present on this `Matcher´. */
235 get global() {
236 return call(getGlobal, this.#regExp, []);
237 }
238
239 /**
240 * Gets whether the has‐indices flag is present on this `Matcher´.
241 */
242 get hasIndices() {
243 return call(getHasIndices, this.#regExp, []);
244 }
245
246 /**
247 * Gets whether the ignore‐case flag is present on this `Matcher´.
248 */
249 get ignoreCase() {
250 return call(getIgnoreCase, this.#regExp, []);
251 }
252
253 /**
254 * Gets whether the multiline flag is present on this `Matcher´.
255 */
256 get multiline() {
257 return call(getMultiline, this.#regExp, []);
258 }
259
260 /** Gets the regular expression source for this `Matcher´. */
261 get source() {
262 return call(getSource, this.#regExp, []);
263 }
264
265 /** Gets whether the sticky flag is present on this `Matcher´. */
266 get sticky() {
267 return call(getSticky, this.#regExp, []);
268 }
269
270 /**
271 * Gets whether the unicode flag is present on this `Matcher´.
272 */
273 get unicode() {
274 return call(getUnicode, this.#regExp, []);
275 }
276
277 /**
278 * Gets whether the unicode sets flag is present on this `Matcher´.
279 */
280 get unicodeSets() {
281 return call(getUnicodeSets, this.#regExp, []);
282 }
283 };
284
285 const matcherConstructor = Object.defineProperties(
286 class extends RegExp {
287 constructor(...args) {
288 return new Matcher(...args);
289 }
290 },
291 {
292 name: defineOwnDataProperty(
293 Object.create(null),
294 "value",
295 "Matcher",
296 ),
297 length: defineOwnDataProperty(Object.create(null), "value", 1),
298 },
299 );
300 const matcherPrototype = defineOwnProperties(
301 matcherConstructor.prototype,
302 getOwnPropertyDescriptors(Matcher.prototype),
303 {
304 constructor: defineOwnDataProperty(
305 Object.create(null),
306 "value",
307 matcherConstructor,
308 ),
309 },
310 );
311
312 return { Matcher: matcherConstructor };
313 })();
314
315 export const {
316 /**
317 * Returns the result of converting the provided value to A·S·C·I·I
318 * lowercase.
319 */
320 asciiLowercase,
321
322 /**
323 * Returns the result of converting the provided value to A·S·C·I·I
324 * uppercase.
325 */
326 asciiUppercase,
327 } = (() => {
328 const {
329 toLowerCase: stringToLowercase,
330 toUpperCase: stringToUppercase,
331 } = stringPrototype;
332 return {
333 asciiLowercase: ($) =>
334 stringReplaceAll(
335 `${$}`,
336 /[A-Z]/gu,
337 createCallableFunction(stringToLowercase),
338 ),
339 asciiUppercase: ($) =>
340 stringReplaceAll(
341 `${$}`,
342 /[a-z]/gu,
343 createCallableFunction(stringToUppercase),
344 ),
345 };
346 })();
347
348 export const {
349 /**
350 * Returns an iterator over the codepoints in the string representation
351 * of the provided value according to the algorithm of
352 * `String::[Symbol.iterator]´.
353 */
354 characters,
355
356 /**
357 * Returns an iterator over the code units in the string
358 * representation of the provided value.
359 */
360 codeUnits,
361
362 /**
363 * Returns an iterator over the codepoints in the string
364 * representation of the provided value.
365 */
366 codepoints,
367
368 /**
369 * Returns an iterator over the scalar values in the string
370 * representation of the provided value.
371 *
372 * Codepoints which are not valid Unicode scalar values are replaced
373 * with U+FFFD.
374 */
375 scalarValues,
376 } = (() => {
377 const generateCharacters = function* (character) {
378 yield character;
379 };
380 const generateCodeUnits = function* (ucsCharacter) {
381 yield getCodeUnit(ucsCharacter, 0);
382 };
383 const generateCodepoints = function* (character) {
384 const { allowSurrogates } = this;
385 const codepoint = getCodepoint(character, 0);
386 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
387 ? codepoint
388 : 0xFFFD;
389 };
390
391 const charactersIterator = stringIteratorFunction(
392 generateCharacters,
393 "String Character Iterator",
394 );
395 const codeUnitsIterator = arrayIteratorFunction(
396 generateCodeUnits,
397 "String Code Unit Iterator",
398 );
399 const codepointsIterator = stringIteratorFunction(
400 bind(generateCodepoints, { allowSurrogates: true }, []),
401 "String Codepoint Iterator",
402 );
403 const scalarValuesIterator = stringIteratorFunction(
404 bind(generateCodepoints, { allowSurrogates: false }, []),
405 "String Scalar Value Iterator",
406 );
407
408 return {
409 characters: ($) => charactersIterator(`${$}`),
410 codeUnits: ($) => codeUnitsIterator(`${$}`),
411 codepoints: ($) => codepointsIterator(`${$}`),
412 scalarValues: ($) => scalarValuesIterator(`${$}`),
413 };
414 })();
415
416 /**
417 * Returns the character at the provided position in the string
418 * representation of the provided value according to the algorithm of
419 * `String::codePointAt´.
420 */
421 export const getCharacter = ($, pos) => {
422 const codepoint = getCodepoint($, pos);
423 return codepoint == null
424 ? UNDEFINED
425 : stringFromCodepoints(codepoint);
426 };
427
428 export const {
429 /**
430 * Returns the code unit at the provided position in the string
431 * representation of the provided value according to the algorithm of
432 * `String::charAt´, except that out‐of‐bounds values return
433 * undefined in place of nan.
434 */
435 getCodeUnit,
436
437 /**
438 * Returns a string created from the provided code units.
439 *
440 * ※ This is effectively an alias for `String.fromCharCode´, but
441 * with the same error behaviour as `String.fromCodePoint´.
442 *
443 * ☡ This function throws an error if provided with an argument which
444 * is not an integral number from 0 to FFFF₁₆ inclusive.
445 */
446 stringFromCodeUnits,
447
448 /**
449 * Returns the result of catenating the string representations of the
450 * provided values, returning a new string according to the algorithm
451 * of `String::concat´.
452 *
453 * ※ If no arguments are given, this function returns the empty
454 * string. This is different behaviour than if an explicit undefined
455 * first argument is given, in which case the resulting string will
456 * begin with `"undefined"´.
457 */
458 stringCatenate,
459 } = (() => {
460 const { fromCharCode } = String;
461 const { charCodeAt, concat } = String.prototype;
462 const {
463 isInteger: isIntegralNumber,
464 isNaN: isNan,
465 } = Number;
466
467 return {
468 getCodeUnit: ($, n) => {
469 const codeUnit = call(charCodeAt, $, [n]);
470 return isNan(codeUnit) ? UNDEFINED : codeUnit;
471 },
472 stringCatenate: Object.defineProperties(
473 (...args) => call(concat, "", args),
474 { name: { value: "stringCatenate" }, length: { value: 2 } },
475 ),
476 stringFromCodeUnits: Object.defineProperties(
477 (...codeUnits) => {
478 for (let index = 0; index < codeUnits.length; ++index) {
479 // Iterate over each provided code unit and throw if it is
480 // out of range.
481 const nextCU = +codeUnits[index];
482 if (
483 !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF
484 ) {
485 // The code unit is not an integral number between 0 and
486 // 0xFFFF; this is an error.
487 throw new RangeError(
488 `${PISCĒS}: Code unit out of range: ${nextCU}.`,
489 );
490 } else {
491 // The code unit is acceptable.
492 /* do nothing */
493 }
494 }
495 return call(fromCharCode, UNDEFINED, codeUnits);
496 },
497 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
498 ),
499 };
500 })();
501
502 /**
503 * Returns the codepoint at the provided position in the string
504 * representation of the provided value according to the algorithm of
505 * `String::codePointAt´.
506 */
507 export const getCodepoint = createCallableFunction(
508 stringPrototype.codePointAt,
509 { name: "getCodepoint" },
510 );
511
512 /**
513 * Returns the index of the first occurrence of the search string in
514 * the string representation of the provided value according to the
515 * algorithm of `String::indexOf´.
516 */
517 export const getFirstSubstringIndex = createCallableFunction(
518 stringPrototype.indexOf,
519 { name: "getFirstSubstringIndex" },
520 );
521
522 /**
523 * Returns the index of the last occurrence of the search string in the
524 * string representation of the provided value according to the
525 * algorithm of `String::lastIndexOf´.
526 */
527 export const getLastSubstringIndex = createCallableFunction(
528 stringPrototype.lastIndexOf,
529 { name: "getLastSubstringIndex" },
530 );
531
532 /**
533 * Returns the result of joining the provided iterable.
534 *
535 * If no separator is provided, it defaults to `","´.
536 *
537 * If a value is nullish, it will be stringified as the empty string.
538 */
539 export const join = (() => {
540 const { join: arrayJoin } = arrayPrototype;
541 const join = ($, separator) =>
542 call(
543 arrayJoin,
544 [...$],
545 [separator === UNDEFINED ? "," : `${separator}`],
546 );
547 return join;
548 })();
549
550 /**
551 * Returns a string created from the raw value of the tagged template
552 * literal.
553 *
554 * ※ This is effectively an alias for `String.raw´.
555 */
556 export const rawString = createArrowFunction(String.raw, {
557 name: "rawString",
558 });
559
560 /**
561 * Returns a string created from the provided codepoints.
562 *
563 * ※ This is effectively an alias for `String.fromCodePoint´.
564 *
565 * ☡ This function throws an error if provided with an argument which
566 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
567 */
568 export const stringFromCodepoints = createArrowFunction(
569 String.fromCodePoint,
570 { name: "stringFromCodepoints" },
571 );
572
573 /**
574 * Returns the result of splitting the provided value on Ascii
575 * whitespace.
576 */
577 export const splitOnAsciiWhitespace = ($) =>
578 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
579
580 /**
581 * Returns the result of splitting the provided value on commas,
582 * trimming Ascii whitespace from the resulting tokens.
583 */
584 export const splitOnCommas = ($) =>
585 stringSplit(
586 stripLeadingAndTrailingAsciiWhitespace(
587 stringReplaceAll(
588 `${$}`,
589 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
590 ",",
591 ),
592 ),
593 ",",
594 );
595
596 /**
597 * Returns whether the string representation of the provided value ends
598 * with the provided search string according to the algorithm of
599 * `String::endsWith´.
600 */
601 export const stringEndsWith = createCallableFunction(
602 stringPrototype.endsWith,
603 { name: "stringEndsWith" },
604 );
605
606 /**
607 * Returns whether the string representation of the provided value
608 * contains the provided search string according to the algorithm of
609 * `String::includes´.
610 */
611 export const stringIncludes = createCallableFunction(
612 stringPrototype.includes,
613 { name: "stringIncludes" },
614 );
615
616 /**
617 * Returns the result of matching the string representation of the
618 * provided value with the provided matcher according to the algorithm
619 * of `String::match´.
620 */
621 export const stringMatch = createCallableFunction(
622 stringPrototype.match,
623 { name: "stringMatch" },
624 );
625
626 /**
627 * Returns the result of matching the string representation of the
628 * provided value with the provided matcher according to the algorithm
629 * of `String::matchAll´.
630 */
631 export const stringMatchAll = createCallableFunction(
632 stringPrototype.matchAll,
633 { name: "stringMatchAll" },
634 );
635
636 /**
637 * Returns the normalized form of the string representation of the
638 * provided value according to the algorithm of `String::normalize´.
639 */
640 export const stringNormalize = createCallableFunction(
641 stringPrototype.normalize,
642 { name: "stringNormalize" },
643 );
644
645 /**
646 * Returns the result of padding the end of the string representation
647 * of the provided value padded until it is the desired length
648 * according to the algorithm of `String::padEnd´.
649 */
650 export const stringPadEnd = createCallableFunction(
651 stringPrototype.padEnd,
652 { name: "stringPadEnd" },
653 );
654
655 /**
656 * Returns the result of padding the start of the string representation
657 * of the provided value padded until it is the desired length
658 * according to the algorithm of `String::padStart´.
659 */
660 export const stringPadStart = createCallableFunction(
661 stringPrototype.padStart,
662 { name: "stringPadStart" },
663 );
664
665 /**
666 * Returns the result of repeating the string representation of the
667 * provided value the provided number of times according to the
668 * algorithm of `String::repeat´.
669 */
670 export const stringRepeat = createCallableFunction(
671 stringPrototype.repeat,
672 { name: "stringRepeat" },
673 );
674
675 /**
676 * Returns the result of replacing the string representation of the
677 * provided value with the provided replacement, using the provided
678 * matcher and according to the algorithm of `String::replace´.
679 */
680 export const stringReplace = createCallableFunction(
681 stringPrototype.replace,
682 { name: "stringReplace" },
683 );
684
685 /**
686 * Returns the result of replacing the string representation of the
687 * provided value with the provided replacement, using the provided
688 * matcher and according to the algorithm of `String::replaceAll´.
689 */
690 export const stringReplaceAll = createCallableFunction(
691 stringPrototype.replaceAll,
692 { name: "stringReplaceAll" },
693 );
694
695 /**
696 * Returns the result of searching the string representation of the
697 * provided value using the provided matcher and according to the
698 * algorithm of `String::search´.
699 */
700 export const stringSearch = createCallableFunction(
701 stringPrototype.search,
702 { name: "stringSearch" },
703 );
704
705 /**
706 * Returns a slice of the string representation of the provided value
707 * according to the algorithm of `String::slice´.
708 */
709 export const stringSlice = createCallableFunction(
710 stringPrototype.slice,
711 { name: "stringSlice" },
712 );
713
714 /**
715 * Returns the result of splitting of the string representation of the
716 * provided value on the provided separator according to the algorithm
717 * of `String::split´.
718 */
719 export const stringSplit = createCallableFunction(
720 stringPrototype.split,
721 { name: "stringSplit" },
722 );
723
724 /**
725 * Returns whether the string representation of the provided value
726 * starts with the provided search string according to the algorithm of
727 * `String::startsWith´.
728 */
729 export const stringStartsWith = createCallableFunction(
730 stringPrototype.startsWith,
731 { name: "stringStartsWith" },
732 );
733
734 /**
735 * Returns the value of the provided string.
736 *
737 * ※ This is effectively an alias for the `String::valueOf´.
738 *
739 * ☡ This function throws if the provided argument is not a string and
740 * does not have a `[[StringData]]´ slot.
741 */
742 export const stringValue = createCallableFunction(
743 stringPrototype.valueOf,
744 { name: "stringValue" },
745 );
746
747 /**
748 * Returns the result of stripping leading and trailing Ascii
749 * whitespace from the provided value and collapsing other Ascii
750 * whitespace in the string representation of the provided value.
751 */
752 export const stripAndCollapseAsciiWhitespace = ($) =>
753 stripLeadingAndTrailingAsciiWhitespace(
754 stringReplaceAll(
755 `${$}`,
756 /[\n\r\t\f ]+/gu,
757 " ",
758 ),
759 );
760
761 /**
762 * Returns the result of stripping leading and trailing Ascii
763 * whitespace from the string representation of the provided value.
764 */
765 export const stripLeadingAndTrailingAsciiWhitespace = ($) =>
766 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
767
768 /**
769 * Returns a substring of the string representation of the provided
770 * value according to the algorithm of `String::substring´.
771 */
772 export const substring = createCallableFunction(
773 stringPrototype.substring,
774 );
775
776 /**
777 * Returns the result of converting the provided value to a string of
778 * scalar values by replacing (unpaired) surrogate values with
779 * U+FFFD.
780 */
781 export const toScalarValueString = createCallableFunction(
782 String.prototype.toWellFormed,
783 { name: "toScalarValueString" },
784 );
785
786 /**
787 * Returns the result of converting the provided value to a string.
788 *
789 * ☡ This method throws for symbols and other objects without a string
790 * representation.
791 */
792 export const toString = ($) => `${$}`;
This page took 0.106478 seconds and 5 git commands to generate.