1 // SPDX-FileCopyrightText: 2022, 2023, 2025 Lady <https://www.ladys.computer/about/#lady>
2 // SPDX-License-Identifier: MPL-2.0
4 * ⁌ ♓🧩 Piscēs ∷ string.js
6 * Copyright © 2022–2023, 2025 Lady [@ Ladys Computer].
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
18 createCallableFunction
,
20 } from "./function.js";
22 arrayIteratorFunction
,
23 stringIteratorFunction
,
24 } from "./iterable.js";
26 defineOwnDataProperty
,
28 getOwnPropertyDescriptors
,
33 import { sameValue
, toLength
, UNDEFINED
} from "./value.js";
35 const PISC
ĒS
= "♓🧩 Piscēs";
38 const { prototype: rePrototype
} = RE
;
39 const { prototype: arrayPrototype
} = Array
;
40 const { prototype: stringPrototype
} = String
;
42 const { exec: reExec
} = rePrototype
;
46 * A `RegExp´‐like object which only matches entire strings, and may
47 * have additional constraints specified.
49 * Matchers are callable objects and will return true if they are
50 * called with a string that they match, and false otherwise.
51 * Matchers will always return false if called with nonstrings, altho
52 * other methods like `::exec´ coerce their arguments and may still
57 const { toString: reToString
} = rePrototype
;
59 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
61 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
63 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
65 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
67 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
69 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
71 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
73 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
75 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
76 const getUnicodeSets
=
77 Object
.getOwnPropertyDescriptor(rePrototype
, "unicodeSets").get;
80 * The internal implementation of `Matcher´.
82 * ※ This class extends the identity function to enable the addition
83 * of private fields to the callable matcher function it constructs.
85 * ※ This class is not exposed.
87 const Matcher
= class extends identity
{
92 * Constructs a new `Matcher´ from the provided source.
94 * If the provided source is a regular expression, then it must
95 * have either the unicode flag set or the unicode sets flag set.
96 * Otherwise, it is interpreted as the string source of a regular
97 * expression with the unicode flag set.
99 * Other flags are taken from the provided regular expression
100 * object, if any are present.
102 * A name for the matcher may be provided as the second argument.
104 * A callable constraint on acceptable inputs may be provided as a
105 * third argument. If provided, it will be called with three
106 * arguments whenever a match appears successful: first, the string
107 * being matched, second, the match result, and third, the
108 * `Matcher´ object itself. If the return value of this call is
109 * falsey, then the match will be considered a failure.
111 * ☡ If the provided source regular expression uses nongreedy
112 * quantifiers, it may not match the whole string even if a match
113 * with the whole string is possible. Surround the regular
114 * expression with `^(?:´ and `)$´ if you don¦t want nongreedy
115 * regular expressions to fail when shorter matches are possible.
117 constructor(source
, name
= UNDEFINED
, constraint
= null) {
120 if (typeof $ !== "string") {
121 // The provided value is not a string.
124 // The provided value is a string.
126 // Set the `.lastIndex´ of the regular expression to 0, and
127 // see if the first attempt at a match successfully matches
128 // the whole string and passes the provided constraint (if
130 regExp
.lastIndex
= 0;
131 const result
= call(reExec
, regExp
, [$]);
132 return result
?.[0] === $
133 && (constraint
=== null || constraint($, result
, this));
137 const regExp
= this.#regExp
= (() => {
138 if (completesNormally(() => call(reExec
, source
, [""]))) {
139 // The provided source is a `RegExp´.
141 !call(getUnicode
, source
, [])
142 && !call(getUnicodeSets
, source
, [])
144 // The provided regular expression does not have a unicode
145 // flag or unicode sets flag.
147 `${PISCĒS}: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
150 // The provided regular expression has a unicode flag or
151 // unicode sets flag.
152 return new RE(source
);
155 // The provided source is not a `RegExp´.
157 // Create one using it as the source string.
158 return new RE(`${source}`, "u");
161 if (constraint
!== null && typeof constraint
!== "function") {
163 `${PISCĒS}: Cannot construct Matcher: Constraint is not callable.`,
166 this.#constraint
= constraint
;
167 return defineOwnProperties(
168 setPrototype(this, matcherPrototype
),
170 lastIndex: setPropertyValues(objectCreate(null), {
176 name: defineOwnDataProperty(
181 : `Matcher(${call(reToString, regExp, [])})`,
188 /** Gets whether the dot‐all flag is present on this `Matcher´. */
190 return call(getDotAll
, this.#regExp
, []);
194 * Executes this `Matcher´ on the provided value and returns the
195 * result if there is a match, or null otherwise.
197 * Matchers only match if they can match the entire value on the
200 * ☡ The match result returned by this method will be the same as
201 * that passed to the constraint function—and may have been
202 * modified by said function prior to being returned.
205 const regExp
= this.#regExp
;
206 const constraint
= this.#constraint
;
207 const string
= `${$}`;
208 regExp
.lastIndex
= 0;
209 const result
= call(reExec
, regExp
, [string
]);
211 result
?.[0] === string
212 && (constraint
=== null || constraint(string
, result
, this))
214 // The entire string was matched and the constraint, if
215 // present, returned a truthy value.
218 // The entire string was not matched or the constraint returned
225 * Gets the flags present on this `Matcher´.
227 * ※ This needs to be defined because the internal `RegExp´ object
228 * may have flags which are not yet recognized by ♓🧩 Piscēs.
231 return call(getFlags
, this.#regExp
, []);
234 /** Gets whether the global flag is present on this `Matcher´. */
236 return call(getGlobal
, this.#regExp
, []);
240 * Gets whether the has‐indices flag is present on this `Matcher´.
243 return call(getHasIndices
, this.#regExp
, []);
247 * Gets whether the ignore‐case flag is present on this `Matcher´.
250 return call(getIgnoreCase
, this.#regExp
, []);
254 * Gets whether the multiline flag is present on this `Matcher´.
257 return call(getMultiline
, this.#regExp
, []);
260 /** Gets the regular expression source for this `Matcher´. */
262 return call(getSource
, this.#regExp
, []);
265 /** Gets whether the sticky flag is present on this `Matcher´. */
267 return call(getSticky
, this.#regExp
, []);
271 * Gets whether the unicode flag is present on this `Matcher´.
274 return call(getUnicode
, this.#regExp
, []);
278 * Gets whether the unicode sets flag is present on this `Matcher´.
281 return call(getUnicodeSets
, this.#regExp
, []);
285 const matcherConstructor
= Object
.defineProperties(
286 class extends RegExp
{
287 constructor(...args
) {
288 return new Matcher(...args
);
292 name: defineOwnDataProperty(
297 length: defineOwnDataProperty(Object
.create(null), "value", 1),
300 const matcherPrototype
= defineOwnProperties(
301 matcherConstructor
.prototype,
302 getOwnPropertyDescriptors(Matcher
.prototype),
304 constructor: defineOwnDataProperty(
312 return { Matcher: matcherConstructor
};
317 * Returns the result of converting the provided value to A·S·C·I·I
323 * Returns the result of converting the provided value to A·S·C·I·I
329 toLowerCase: stringToLowercase
,
330 toUpperCase: stringToUppercase
,
333 asciiLowercase: ($) =>
337 createCallableFunction(stringToLowercase
),
339 asciiUppercase: ($) =>
343 createCallableFunction(stringToUppercase
),
349 * Returns −0 if the provided argument is `"-0"´; returns a number
350 * representing the index if the provided argument is a canonical
351 * numeric index string; otherwise, returns undefined.
353 * There is no clamping of the numeric index, but note that numbers
354 * above 2^53 − 1 are not safe nor valid integer indices.
356 export const canonicalNumericIndexString
= ($) => {
357 if (typeof $ !== "string") {
359 } else if ($ === "-0") {
363 return $ === `${n}` ? n : UNDEFINED
;
369 * Returns an iterator over the codepoints in the string representation
370 * of the provided value according to the algorithm of
371 * `String::[Symbol.iterator]´.
376 * Returns an iterator over the code units in the string
377 * representation of the provided value.
382 * Returns an iterator over the codepoints in the string
383 * representation of the provided value.
388 * Returns an iterator over the scalar values in the string
389 * representation of the provided value.
391 * Codepoints which are not valid Unicode scalar values are replaced
396 const generateCharacters
= function* (character
) {
399 const generateCodeUnits
= function* (ucsCharacter
) {
400 yield getCodeUnit(ucsCharacter
, 0);
402 const generateCodepoints
= function* (character
) {
403 const { allowSurrogates
} = this;
404 const codepoint
= getCodepoint(character
, 0);
405 yield allowSurrogates
|| codepoint
<= 0xD7FF || codepoint
>= 0xE000
410 const charactersIterator
= stringIteratorFunction(
412 "String Character Iterator",
414 const codeUnitsIterator
= arrayIteratorFunction(
416 "String Code Unit Iterator",
418 const codepointsIterator
= stringIteratorFunction(
419 bind(generateCodepoints
, { allowSurrogates: true }, []),
420 "String Codepoint Iterator",
422 const scalarValuesIterator
= stringIteratorFunction(
423 bind(generateCodepoints
, { allowSurrogates: false }, []),
424 "String Scalar Value Iterator",
428 characters: ($) => charactersIterator(`${$}`),
429 codeUnits: ($) => codeUnitsIterator(`${$}`),
430 codepoints: ($) => codepointsIterator(`${$}`),
431 scalarValues: ($) => scalarValuesIterator(`${$}`),
436 * Returns the character at the provided position in the string
437 * representation of the provided value according to the algorithm of
438 * `String::codePointAt´.
440 export const getCharacter
= ($, pos
) => {
441 const codepoint
= getCodepoint($, pos
);
442 return codepoint
== null
444 : stringFromCodepoints(codepoint
);
449 * Returns the code unit at the provided position in the string
450 * representation of the provided value according to the algorithm of
451 * `String::charAt´, except that out‐of‐bounds values return
452 * undefined in place of nan.
457 * Returns a string created from the provided code units.
459 * ※ This is effectively an alias for `String.fromCharCode´, but
460 * with the same error behaviour as `String.fromCodePoint´.
462 * ☡ This function throws an error if provided with an argument which
463 * is not an integral number from 0 to FFFF₁₆ inclusive.
468 * Returns the result of catenating the string representations of the
469 * provided values, returning a new string according to the algorithm
470 * of `String::concat´.
472 * ※ If no arguments are given, this function returns the empty
473 * string. This is different behaviour than if an explicit undefined
474 * first argument is given, in which case the resulting string will
475 * begin with `"undefined"´.
479 const { fromCharCode
} = String
;
480 const { charCodeAt
, concat
} = String
.prototype;
482 isInteger: isIntegralNumber
,
487 getCodeUnit: ($, n
) => {
488 const codeUnit
= call(charCodeAt
, $, [n
]);
489 return isNan(codeUnit
) ? UNDEFINED : codeUnit
;
491 stringCatenate: Object
.defineProperties(
492 (...args
) => call(concat
, "", args
),
493 { name: { value: "stringCatenate" }, length: { value: 2 } },
495 stringFromCodeUnits: Object
.defineProperties(
497 for (let index
= 0; index
< codeUnits
.length
; ++index
) {
498 // Iterate over each provided code unit and throw if it is
500 const nextCU
= +codeUnits
[index
];
502 !isIntegralNumber(nextCU
) || nextCU
< 0 || nextCU
> 0xFFFF
504 // The code unit is not an integral number between 0 and
505 // 0xFFFF; this is an error.
506 throw new RangeError(
507 `${PISCĒS}: Code unit out of range: ${nextCU}.`,
510 // The code unit is acceptable.
514 return call(fromCharCode
, UNDEFINED
, codeUnits
);
516 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
522 * Returns the codepoint at the provided position in the string
523 * representation of the provided value according to the algorithm of
524 * `String::codePointAt´.
526 export const getCodepoint
= createCallableFunction(
527 stringPrototype
.codePointAt
,
528 { name: "getCodepoint" },
532 * Returns the index of the first occurrence of the search string in
533 * the string representation of the provided value according to the
534 * algorithm of `String::indexOf´.
536 export const getFirstSubstringIndex
= createCallableFunction(
537 stringPrototype
.indexOf
,
538 { name: "getFirstSubstringIndex" },
542 * Returns the index of the last occurrence of the search string in the
543 * string representation of the provided value according to the
544 * algorithm of `String::lastIndexOf´.
546 export const getLastSubstringIndex
= createCallableFunction(
547 stringPrototype
.lastIndexOf
,
548 { name: "getLastSubstringIndex" },
551 /** Returns whether the provided value is an array index. */
552 export const isArrayIndexString
= ($) => {
553 const value
= canonicalNumericIndexString($);
554 if (value
!== UNDEFINED
) {
555 // The provided value is a canonical numeric index string.
557 // Return whether it is in range for array indices.
558 return sameValue(value
, 0)
559 || value
=== toLength(value
) && value
> 0 && value
< -1 >>> 0;
561 // The provided value is not a canonical numeric index string.
566 /** Returns whether the provided value is an integer index string. */
567 export const isIntegerIndexString
= ($) => {
568 const value
= canonicalNumericIndexString($);
569 if (value
!== UNDEFINED
) {
570 // The provided value is a canonical numeric index string.
572 // Return whether it is in range for integer indices.
573 return sameValue(value
, 0)
574 || value
=== toLength(value
) && value
> 0;
576 // The provided value is not a canonical numeric index string.
582 * Returns the result of joining the provided iterable.
584 * If no separator is provided, it defaults to `","´.
586 * If a value is nullish, it will be stringified as the empty string.
588 export const join
= (() => {
589 const { join: arrayJoin
} = arrayPrototype
;
590 const join
= ($, separator
) =>
594 [separator
=== UNDEFINED
? "," : `${separator}`],
600 * Returns a string created from the raw value of the tagged template
603 * ※ This is effectively an alias for `String.raw´.
605 export const rawString
= createArrowFunction(String
.raw
, {
610 * Returns a string created from the provided codepoints.
612 * ※ This is effectively an alias for `String.fromCodePoint´.
614 * ☡ This function throws an error if provided with an argument which
615 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
617 export const stringFromCodepoints
= createArrowFunction(
618 String
.fromCodePoint
,
619 { name: "stringFromCodepoints" },
623 * Returns the result of splitting the provided value on Ascii
626 export const splitOnAsciiWhitespace
= ($) =>
627 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
630 * Returns the result of splitting the provided value on commas,
631 * trimming Ascii whitespace from the resulting tokens.
633 export const splitOnCommas
= ($) =>
635 stripLeadingAndTrailingAsciiWhitespace(
638 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu
,
646 * Returns whether the string representation of the provided value ends
647 * with the provided search string according to the algorithm of
648 * `String::endsWith´.
650 export const stringEndsWith
= createCallableFunction(
651 stringPrototype
.endsWith
,
652 { name: "stringEndsWith" },
656 * Returns whether the string representation of the provided value
657 * contains the provided search string according to the algorithm of
658 * `String::includes´.
660 export const stringIncludes
= createCallableFunction(
661 stringPrototype
.includes
,
662 { name: "stringIncludes" },
666 * Returns the result of matching the string representation of the
667 * provided value with the provided matcher according to the algorithm
668 * of `String::match´.
670 export const stringMatch
= createCallableFunction(
671 stringPrototype
.match
,
672 { name: "stringMatch" },
676 * Returns the result of matching the string representation of the
677 * provided value with the provided matcher according to the algorithm
678 * of `String::matchAll´.
680 export const stringMatchAll
= createCallableFunction(
681 stringPrototype
.matchAll
,
682 { name: "stringMatchAll" },
686 * Returns the normalized form of the string representation of the
687 * provided value according to the algorithm of `String::normalize´.
689 export const stringNormalize
= createCallableFunction(
690 stringPrototype
.normalize
,
691 { name: "stringNormalize" },
695 * Returns the result of padding the end of the string representation
696 * of the provided value padded until it is the desired length
697 * according to the algorithm of `String::padEnd´.
699 export const stringPadEnd
= createCallableFunction(
700 stringPrototype
.padEnd
,
701 { name: "stringPadEnd" },
705 * Returns the result of padding the start of the string representation
706 * of the provided value padded until it is the desired length
707 * according to the algorithm of `String::padStart´.
709 export const stringPadStart
= createCallableFunction(
710 stringPrototype
.padStart
,
711 { name: "stringPadStart" },
715 * Returns the result of repeating the string representation of the
716 * provided value the provided number of times according to the
717 * algorithm of `String::repeat´.
719 export const stringRepeat
= createCallableFunction(
720 stringPrototype
.repeat
,
721 { name: "stringRepeat" },
725 * Returns the result of replacing the string representation of the
726 * provided value with the provided replacement, using the provided
727 * matcher and according to the algorithm of `String::replace´.
729 export const stringReplace
= createCallableFunction(
730 stringPrototype
.replace
,
731 { name: "stringReplace" },
735 * Returns the result of replacing the string representation of the
736 * provided value with the provided replacement, using the provided
737 * matcher and according to the algorithm of `String::replaceAll´.
739 export const stringReplaceAll
= createCallableFunction(
740 stringPrototype
.replaceAll
,
741 { name: "stringReplaceAll" },
745 * Returns the result of searching the string representation of the
746 * provided value using the provided matcher and according to the
747 * algorithm of `String::search´.
749 export const stringSearch
= createCallableFunction(
750 stringPrototype
.search
,
751 { name: "stringSearch" },
755 * Returns a slice of the string representation of the provided value
756 * according to the algorithm of `String::slice´.
758 export const stringSlice
= createCallableFunction(
759 stringPrototype
.slice
,
760 { name: "stringSlice" },
764 * Returns the result of splitting of the string representation of the
765 * provided value on the provided separator according to the algorithm
766 * of `String::split´.
768 export const stringSplit
= createCallableFunction(
769 stringPrototype
.split
,
770 { name: "stringSplit" },
774 * Returns whether the string representation of the provided value
775 * starts with the provided search string according to the algorithm of
776 * `String::startsWith´.
778 export const stringStartsWith
= createCallableFunction(
779 stringPrototype
.startsWith
,
780 { name: "stringStartsWith" },
784 * Returns the value of the provided string.
786 * ※ This is effectively an alias for the `String::valueOf´.
788 * ☡ This function throws if the provided argument is not a string and
789 * does not have a `[[StringData]]´ slot.
791 export const stringValue
= createCallableFunction(
792 stringPrototype
.valueOf
,
793 { name: "stringValue" },
797 * Returns the result of stripping leading and trailing Ascii
798 * whitespace from the provided value and collapsing other Ascii
799 * whitespace in the string representation of the provided value.
801 export const stripAndCollapseAsciiWhitespace
= ($) =>
802 stripLeadingAndTrailingAsciiWhitespace(
811 * Returns the result of stripping leading and trailing Ascii
812 * whitespace from the string representation of the provided value.
814 export const stripLeadingAndTrailingAsciiWhitespace
= ($) =>
815 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u
, [$])[1];
818 * Returns a substring of the string representation of the provided
819 * value according to the algorithm of `String::substring´.
821 export const substring
= createCallableFunction(
822 stringPrototype
.substring
,
826 * Returns the result of converting the provided value to a string of
827 * scalar values by replacing (unpaired) surrogate values with
830 export const toScalarValueString
= createCallableFunction(
831 String
.prototype.toWellFormed
,
832 { name: "toScalarValueString" },
836 * Returns the result of converting the provided value to a string.
838 * ☡ This method throws for symbols and other objects without a string
841 export const toString
= ($) => `${$}`;