1 // SPDX-FileCopyrightText: 2022, 2023, 2025 Lady <https://www.ladys.computer/about/#lady>
2 // SPDX-License-Identifier: MPL-2.0
4 * ⁌ ♓🧩 Piscēs ∷ string.js
6 * Copyright © 2022–2023, 2025 Lady [@ Ladys Computer].
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
18 createCallableFunction
,
20 } from "./function.js";
22 arrayIteratorFunction
,
23 stringIteratorFunction
,
24 } from "./iterable.js";
26 defineOwnDataProperty
,
28 getOwnPropertyDescriptors
,
33 import { sameValue
, toLength
, UNDEFINED
} from "./value.js";
35 const PISC
ĒS
= "♓🧩 Piscēs";
38 const { prototype: rePrototype
} = RE
;
39 const { prototype: arrayPrototype
} = Array
;
40 const { prototype: stringPrototype
} = String
;
42 const { exec: reExec
} = rePrototype
;
46 * A `RegExp´‐like object which only matches entire strings, and may
47 * have additional constraints specified.
49 * Matchers are callable objects and will return true if they are
50 * called with a string that they match, and false otherwise.
51 * Matchers will always return false if called with nonstrings, altho
52 * other methods like `::exec´ coerce their arguments and may still
57 const { toString: reToString
} = rePrototype
;
59 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
61 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
63 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
65 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
67 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
69 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
71 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
73 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
75 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
76 const getUnicodeSets
=
77 Object
.getOwnPropertyDescriptor(rePrototype
, "unicodeSets").get;
80 * The internal implementation of `Matcher´.
82 * ※ This class extends the identity function to enable the addition
83 * of private fields to the callable matcher function it constructs.
85 * ※ This class is not exposed.
87 const Matcher
= class extends identity
{
92 * Constructs a new `Matcher´ from the provided source.
94 * If the provided source is a regular expression, then it must
95 * have either the unicode flag set or the unicode sets flag set.
96 * Otherwise, it is interpreted as the string source of a regular
97 * expression with the unicode flag set.
99 * Other flags are taken from the provided regular expression
100 * object, if any are present.
102 * A name for the matcher may be provided as the second argument.
104 * A callable constraint on acceptable inputs may be provided as a
105 * third argument. If provided, it will be called with three
106 * arguments whenever a match appears successful: first, the string
107 * being matched, second, the match result, and third, the
108 * `Matcher´ object itself. If the return value of this call is
109 * falsey, then the match will be considered a failure.
111 * ☡ If the provided source regular expression uses nongreedy
112 * quantifiers, it may not match the whole string even if a match
113 * with the whole string is possible. Surround the regular
114 * expression with `^(?:´ and `)$´ if you don¦t want nongreedy
115 * regular expressions to fail when shorter matches are possible.
117 constructor(source
, name
= UNDEFINED
, constraint
= null) {
120 if (typeof $ !== "string") {
121 // The provided value is not a string.
124 // The provided value is a string.
126 // Set the `.lastIndex´ of the regular expression to 0, and
127 // see if the first attempt at a match successfully matches
128 // the whole string and passes the provided constraint (if
130 regExp
.lastIndex
= 0;
131 const result
= call(reExec
, regExp
, [$]);
132 return result
?.[0] === $
133 && (constraint
=== null || constraint($, result
, this));
137 const regExp
= this.#regExp
= (() => {
138 if (completesNormally(() => call(reExec
, source
, [""]))) {
139 // The provided source is a `RegExp´.
141 !call(getUnicode
, source
, [])
142 && !call(getUnicodeSets
, source
, [])
144 // The provided regular expression does not have a unicode
145 // flag or unicode sets flag.
147 `${PISCĒS}: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
150 // The provided regular expression has a unicode flag or
151 // unicode sets flag.
152 return new RE(source
);
155 // The provided source is not a `RegExp´.
157 // Create one using it as the source string.
158 return new RE(`${source}`, "u");
161 if (constraint
!== null && typeof constraint
!== "function") {
163 `${PISCĒS}: Cannot construct Matcher: Constraint is not callable.`,
166 this.#constraint
= constraint
;
167 return defineOwnProperties(
168 setPrototype(this, matcherPrototype
),
170 lastIndex: setPropertyValues(objectCreate(null), {
176 name: defineOwnDataProperty(
181 : `Matcher(${call(reToString, regExp, [])})`,
188 /** Gets whether the dot‐all flag is present on this `Matcher´. */
190 return call(getDotAll
, this.#regExp
, []);
194 * Executes this `Matcher´ on the provided value and returns the
195 * result if there is a match, or null otherwise.
197 * Matchers only match if they can match the entire value on the
200 * ☡ The match result returned by this method will be the same as
201 * that passed to the constraint function—and may have been
202 * modified by said function prior to being returned.
205 const regExp
= this.#regExp
;
206 const constraint
= this.#constraint
;
207 const string
= `${$}`;
208 regExp
.lastIndex
= 0;
209 const result
= call(reExec
, regExp
, [string
]);
211 result
?.[0] === string
212 && (constraint
=== null || constraint(string
, result
, this))
214 // The entire string was matched and the constraint, if
215 // present, returned a truthy value.
218 // The entire string was not matched or the constraint returned
225 * Gets the flags present on this `Matcher´.
227 * ※ This needs to be defined because the internal `RegExp´ object
228 * may have flags which are not yet recognized by ♓🧩 Piscēs.
231 return call(getFlags
, this.#regExp
, []);
234 /** Gets whether the global flag is present on this `Matcher´. */
236 return call(getGlobal
, this.#regExp
, []);
240 * Gets whether the has‐indices flag is present on this `Matcher´.
243 return call(getHasIndices
, this.#regExp
, []);
247 * Gets whether the ignore‐case flag is present on this `Matcher´.
250 return call(getIgnoreCase
, this.#regExp
, []);
254 * Gets whether the multiline flag is present on this `Matcher´.
257 return call(getMultiline
, this.#regExp
, []);
260 /** Gets the regular expression source for this `Matcher´. */
262 return call(getSource
, this.#regExp
, []);
265 /** Gets whether the sticky flag is present on this `Matcher´. */
267 return call(getSticky
, this.#regExp
, []);
271 * Gets whether the unicode flag is present on this `Matcher´.
274 return call(getUnicode
, this.#regExp
, []);
278 * Gets whether the unicode sets flag is present on this `Matcher´.
281 return call(getUnicodeSets
, this.#regExp
, []);
285 const matcherConstructor
= Object
.defineProperties(
286 class extends RegExp
{
287 constructor(...args
) {
288 return new Matcher(...args
);
292 name: defineOwnDataProperty(
297 length: defineOwnDataProperty(Object
.create(null), "value", 1),
300 const matcherPrototype
= defineOwnProperties(
301 matcherConstructor
.prototype,
302 getOwnPropertyDescriptors(Matcher
.prototype),
304 constructor: defineOwnDataProperty(
312 return { Matcher: matcherConstructor
};
317 * Returns the result of converting the provided value to A·S·C·I·I
323 * Returns the result of converting the provided value to A·S·C·I·I
329 toLowerCase: stringToLowercase
,
330 toUpperCase: stringToUppercase
,
333 asciiLowercase: ($) =>
337 createCallableFunction(stringToLowercase
),
339 asciiUppercase: ($) =>
343 createCallableFunction(stringToUppercase
),
350 * Returns an iterator over the codepoints in the string representation
351 * of the provided value according to the algorithm of
352 * `String::[Symbol.iterator]´.
357 * Returns an iterator over the code units in the string
358 * representation of the provided value.
363 * Returns an iterator over the codepoints in the string
364 * representation of the provided value.
369 * Returns an iterator over the scalar values in the string
370 * representation of the provided value.
372 * Codepoints which are not valid Unicode scalar values are replaced
377 const generateCharacters
= function* (character
) {
380 const generateCodeUnits
= function* (ucsCharacter
) {
381 yield getCodeUnit(ucsCharacter
, 0);
383 const generateCodepoints
= function* (character
) {
384 const { allowSurrogates
} = this;
385 const codepoint
= getCodepoint(character
, 0);
386 yield allowSurrogates
|| codepoint
<= 0xD7FF || codepoint
>= 0xE000
391 const charactersIterator
= stringIteratorFunction(
393 "String Character Iterator",
395 const codeUnitsIterator
= arrayIteratorFunction(
397 "String Code Unit Iterator",
399 const codepointsIterator
= stringIteratorFunction(
400 bind(generateCodepoints
, { allowSurrogates: true }, []),
401 "String Codepoint Iterator",
403 const scalarValuesIterator
= stringIteratorFunction(
404 bind(generateCodepoints
, { allowSurrogates: false }, []),
405 "String Scalar Value Iterator",
409 characters: ($) => charactersIterator(`${$}`),
410 codeUnits: ($) => codeUnitsIterator(`${$}`),
411 codepoints: ($) => codepointsIterator(`${$}`),
412 scalarValues: ($) => scalarValuesIterator(`${$}`),
417 * Returns the character at the provided position in the string
418 * representation of the provided value according to the algorithm of
419 * `String::codePointAt´.
421 export const getCharacter
= ($, pos
) => {
422 const codepoint
= getCodepoint($, pos
);
423 return codepoint
== null
425 : stringFromCodepoints(codepoint
);
430 * Returns the code unit at the provided position in the string
431 * representation of the provided value according to the algorithm of
432 * `String::charAt´, except that out‐of‐bounds values return
433 * undefined in place of nan.
438 * Returns a string created from the provided code units.
440 * ※ This is effectively an alias for `String.fromCharCode´, but
441 * with the same error behaviour as `String.fromCodePoint´.
443 * ☡ This function throws an error if provided with an argument which
444 * is not an integral number from 0 to FFFF₁₆ inclusive.
449 * Returns the result of catenating the string representations of the
450 * provided values, returning a new string according to the algorithm
451 * of `String::concat´.
453 * ※ If no arguments are given, this function returns the empty
454 * string. This is different behaviour than if an explicit undefined
455 * first argument is given, in which case the resulting string will
456 * begin with `"undefined"´.
460 const { fromCharCode
} = String
;
461 const { charCodeAt
, concat
} = String
.prototype;
463 isInteger: isIntegralNumber
,
468 getCodeUnit: ($, n
) => {
469 const codeUnit
= call(charCodeAt
, $, [n
]);
470 return isNan(codeUnit
) ? UNDEFINED : codeUnit
;
472 stringCatenate: Object
.defineProperties(
473 (...args
) => call(concat
, "", args
),
474 { name: { value: "stringCatenate" }, length: { value: 2 } },
476 stringFromCodeUnits: Object
.defineProperties(
478 for (let index
= 0; index
< codeUnits
.length
; ++index
) {
479 // Iterate over each provided code unit and throw if it is
481 const nextCU
= +codeUnits
[index
];
483 !isIntegralNumber(nextCU
) || nextCU
< 0 || nextCU
> 0xFFFF
485 // The code unit is not an integral number between 0 and
486 // 0xFFFF; this is an error.
487 throw new RangeError(
488 `${PISCĒS}: Code unit out of range: ${nextCU}.`,
491 // The code unit is acceptable.
495 return call(fromCharCode
, UNDEFINED
, codeUnits
);
497 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
503 * Returns the codepoint at the provided position in the string
504 * representation of the provided value according to the algorithm of
505 * `String::codePointAt´.
507 export const getCodepoint
= createCallableFunction(
508 stringPrototype
.codePointAt
,
509 { name: "getCodepoint" },
513 * Returns the index of the first occurrence of the search string in
514 * the string representation of the provided value according to the
515 * algorithm of `String::indexOf´.
517 export const getFirstSubstringIndex
= createCallableFunction(
518 stringPrototype
.indexOf
,
519 { name: "getFirstSubstringIndex" },
523 * Returns the index of the last occurrence of the search string in the
524 * string representation of the provided value according to the
525 * algorithm of `String::lastIndexOf´.
527 export const getLastSubstringIndex
= createCallableFunction(
528 stringPrototype
.lastIndexOf
,
529 { name: "getLastSubstringIndex" },
533 * Returns the result of joining the provided iterable.
535 * If no separator is provided, it defaults to `","´.
537 * If a value is nullish, it will be stringified as the empty string.
539 export const join
= (() => {
540 const { join: arrayJoin
} = arrayPrototype
;
541 const join
= ($, separator
) =>
545 [separator
=== UNDEFINED
? "," : `${separator}`],
551 * Returns a string created from the raw value of the tagged template
554 * ※ This is effectively an alias for `String.raw´.
556 export const rawString
= createArrowFunction(String
.raw
, {
561 * Returns a string created from the provided codepoints.
563 * ※ This is effectively an alias for `String.fromCodePoint´.
565 * ☡ This function throws an error if provided with an argument which
566 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
568 export const stringFromCodepoints
= createArrowFunction(
569 String
.fromCodePoint
,
570 { name: "stringFromCodepoints" },
574 * Returns the result of splitting the provided value on Ascii
577 export const splitOnAsciiWhitespace
= ($) =>
578 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
581 * Returns the result of splitting the provided value on commas,
582 * trimming Ascii whitespace from the resulting tokens.
584 export const splitOnCommas
= ($) =>
586 stripLeadingAndTrailingAsciiWhitespace(
589 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu
,
597 * Returns whether the string representation of the provided value ends
598 * with the provided search string according to the algorithm of
599 * `String::endsWith´.
601 export const stringEndsWith
= createCallableFunction(
602 stringPrototype
.endsWith
,
603 { name: "stringEndsWith" },
607 * Returns whether the string representation of the provided value
608 * contains the provided search string according to the algorithm of
609 * `String::includes´.
611 export const stringIncludes
= createCallableFunction(
612 stringPrototype
.includes
,
613 { name: "stringIncludes" },
617 * Returns the result of matching the string representation of the
618 * provided value with the provided matcher according to the algorithm
619 * of `String::match´.
621 export const stringMatch
= createCallableFunction(
622 stringPrototype
.match
,
623 { name: "stringMatch" },
627 * Returns the result of matching the string representation of the
628 * provided value with the provided matcher according to the algorithm
629 * of `String::matchAll´.
631 export const stringMatchAll
= createCallableFunction(
632 stringPrototype
.matchAll
,
633 { name: "stringMatchAll" },
637 * Returns the normalized form of the string representation of the
638 * provided value according to the algorithm of `String::normalize´.
640 export const stringNormalize
= createCallableFunction(
641 stringPrototype
.normalize
,
642 { name: "stringNormalize" },
646 * Returns the result of padding the end of the string representation
647 * of the provided value padded until it is the desired length
648 * according to the algorithm of `String::padEnd´.
650 export const stringPadEnd
= createCallableFunction(
651 stringPrototype
.padEnd
,
652 { name: "stringPadEnd" },
656 * Returns the result of padding the start of the string representation
657 * of the provided value padded until it is the desired length
658 * according to the algorithm of `String::padStart´.
660 export const stringPadStart
= createCallableFunction(
661 stringPrototype
.padStart
,
662 { name: "stringPadStart" },
666 * Returns the result of repeating the string representation of the
667 * provided value the provided number of times according to the
668 * algorithm of `String::repeat´.
670 export const stringRepeat
= createCallableFunction(
671 stringPrototype
.repeat
,
672 { name: "stringRepeat" },
676 * Returns the result of replacing the string representation of the
677 * provided value with the provided replacement, using the provided
678 * matcher and according to the algorithm of `String::replace´.
680 export const stringReplace
= createCallableFunction(
681 stringPrototype
.replace
,
682 { name: "stringReplace" },
686 * Returns the result of replacing the string representation of the
687 * provided value with the provided replacement, using the provided
688 * matcher and according to the algorithm of `String::replaceAll´.
690 export const stringReplaceAll
= createCallableFunction(
691 stringPrototype
.replaceAll
,
692 { name: "stringReplaceAll" },
696 * Returns the result of searching the string representation of the
697 * provided value using the provided matcher and according to the
698 * algorithm of `String::search´.
700 export const stringSearch
= createCallableFunction(
701 stringPrototype
.search
,
702 { name: "stringSearch" },
706 * Returns a slice of the string representation of the provided value
707 * according to the algorithm of `String::slice´.
709 export const stringSlice
= createCallableFunction(
710 stringPrototype
.slice
,
711 { name: "stringSlice" },
715 * Returns the result of splitting of the string representation of the
716 * provided value on the provided separator according to the algorithm
717 * of `String::split´.
719 export const stringSplit
= createCallableFunction(
720 stringPrototype
.split
,
721 { name: "stringSplit" },
725 * Returns whether the string representation of the provided value
726 * starts with the provided search string according to the algorithm of
727 * `String::startsWith´.
729 export const stringStartsWith
= createCallableFunction(
730 stringPrototype
.startsWith
,
731 { name: "stringStartsWith" },
735 * Returns the value of the provided string.
737 * ※ This is effectively an alias for the `String::valueOf´.
739 * ☡ This function throws if the provided argument is not a string and
740 * does not have a `[[StringData]]´ slot.
742 export const stringValue
= createCallableFunction(
743 stringPrototype
.valueOf
,
744 { name: "stringValue" },
748 * Returns the result of stripping leading and trailing Ascii
749 * whitespace from the provided value and collapsing other Ascii
750 * whitespace in the string representation of the provided value.
752 export const stripAndCollapseAsciiWhitespace
= ($) =>
753 stripLeadingAndTrailingAsciiWhitespace(
762 * Returns the result of stripping leading and trailing Ascii
763 * whitespace from the string representation of the provided value.
765 export const stripLeadingAndTrailingAsciiWhitespace
= ($) =>
766 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u
, [$])[1];
769 * Returns a substring of the string representation of the provided
770 * value according to the algorithm of `String::substring´.
772 export const substring
= createCallableFunction(
773 stringPrototype
.substring
,
777 * Returns the result of converting the provided value to a string of
778 * scalar values by replacing (unpaired) surrogate values with
781 export const toScalarValueString
= createCallableFunction(
782 String
.prototype.toWellFormed
,
783 { name: "toScalarValueString" },
787 * Returns the result of converting the provided value to a string.
789 * ☡ This method throws for symbols and other objects without a string
792 export const toString
= ($) => `${$}`;