1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
14 createCallableFunction
,
16 } from "./function.js";
18 arrayIteratorFunction
,
19 stringIteratorFunction
,
20 } from "./iterable.js";
23 getOwnPropertyDescriptors
,
26 import { sameValue
, toLength
} from "./value.js";
29 const { prototype: rePrototype
} = RE
;
30 const { prototype: arrayPrototype
} = Array
;
31 const { prototype: stringPrototype
} = String
;
33 const { exec
: reExec
} = rePrototype
;
37 * A `RegExp`like object which only matches entire strings, and may
38 * have additional constraints specified.
40 * Matchers are callable objects and will return true if they are
41 * called with a string that they match, and false otherwise.
42 * Matchers will always return false if called with nonstrings,
43 * although other methods like `::exec` coerce their arguments and
44 * may still return true.
48 const { toString
: reToString
} = rePrototype
;
50 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
52 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
54 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
56 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
58 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
60 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
62 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
64 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
66 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
68 const Matcher
= class extends identity
{
73 * Constructs a new `Matcher` from the provided source.
75 * If the provided source is a regular expression, then it must
76 * have the unicode flag set. Otherwise, it is interpreted as the
77 * string source of a regular expression with the unicode flag set.
79 * Other flags are taken from the provided regular expression
80 * object, if any are present.
82 * A name for the matcher may be provided as the second argument.
84 * A callable constraint on acceptable inputs may be provided as a
85 * third argument. If provided, it will be called with three
86 * arguments whenever a match appears successful: first, the string
87 * being matched, second, the match result, and third, the
88 * `Matcher` object itself. If the return value of this call is
89 * falsey, then the match will be considered a failure.
91 * ☡ If the provided source regular expression uses nongreedy
92 * quantifiers, it may not match the whole string even if a match
93 * with the whole string is possible. Surround the regular
94 * expression with `^(?:` and `)$` if you don’t want nongreedy
95 * regular expressions to fail when shorter matches are possible.
97 constructor(source
, name
= undefined, constraint
= null) {
100 if (typeof $ !== "string") {
101 // The provided value is not a string.
104 // The provided value is a string. Set the `.lastIndex` of
105 // the regular expression to 0 and see if the first attempt
106 // at a match matches the whole string and passes the
107 // provided constraint (if present).
108 regExp
.lastIndex
= 0;
109 const result
= call(reExec
, regExp
, [$]);
110 return result
?.[0] === $ &&
111 (constraint
=== null || constraint($, result
, this));
115 const regExp
= this.#regExp
= (() => {
117 call(reExec
, source
, [""]); // throws if source not a RegExp
119 return new RE(`${source}`, "u");
121 const unicode
= call(getUnicode
, source
, []);
123 // The provided regular expression does not have a unicode
126 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
129 // The provided regular expression has a unicode flag.
130 return new RE(source
);
133 if (constraint
!== null && typeof constraint
!== "function") {
135 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
138 this.#constraint
= constraint
;
139 return defineOwnProperties(
140 setPrototype(this, matcherPrototype
),
151 : `Matcher(${call(reToString, regExp, [])})`,
158 /** Gets whether the dot‐all flag is present on this `Matcher`. */
160 return call(getDotAll
, this.#regExp
, []);
164 * Executes this `Matcher` on the provided value and returns the
165 * result if there is a match, or null otherwise.
167 * Matchers only match if they can match the entire value on the
170 * ☡ The match result returned by this method will be the same as
171 * that passed to the constraint function—and may have been
172 * modified by said function prior to being returned.
175 const regExp
= this.#regExp
;
176 const constraint
= this.#constraint
;
177 const string
= `${$}`;
178 regExp
.lastIndex
= 0;
179 const result
= call(reExec
, regExp
, [string
]);
181 result
?.[0] === string
&&
182 (constraint
=== null || constraint(string
, result
, this))
184 // The entire string was matched and the constraint, if
185 // present, returned a truthy value.
188 // The entire string was not matched or the constraint returned
195 * Gets the flags present on this `Matcher`.
197 * ※ This needs to be defined because the internal `RegExp` object
198 * may have flags which are not yet recognized by ♓🌟 Piscēs.
201 return call(getFlags
, this.#regExp
, []);
204 /** Gets whether the global flag is present on this `Matcher`. */
206 return call(getGlobal
, this.#regExp
, []);
210 * Gets whether the has‐indices flag is present on this `Matcher`.
213 return call(getHasIndices
, this.#regExp
, []);
217 * Gets whether the ignore‐case flag is present on this `Matcher`.
220 return call(getIgnoreCase
, this.#regExp
, []);
224 * Gets whether the multiline flag is present on this `Matcher`.
227 return call(getMultiline
, this.#regExp
, []);
230 /** Gets the regular expression source for this `Matcher`. */
232 return call(getSource
, this.#regExp
, []);
235 /** Gets whether the sticky flag is present on this `Matcher`. */
237 return call(getSticky
, this.#regExp
, []);
241 * Gets whether the unicode flag is present on this `Matcher`.
243 * ※ This will always be true.
246 return call(getUnicode
, this.#regExp
, []);
250 const matcherConstructor
= defineOwnProperties(
251 class extends RegExp
{
252 constructor(...args
) {
253 return new Matcher(...args
);
257 name
: { value
: "Matcher" },
258 length
: { value
: 1 },
261 const matcherPrototype
= defineOwnProperties(
262 matcherConstructor
.prototype,
263 getOwnPropertyDescriptors(Matcher
.prototype),
264 { constructor: { value
: matcherConstructor
} },
267 return { Matcher
: matcherConstructor
};
272 * Returns the result of converting the provided value to A·S·C·I·I
278 * Returns the result of converting the provided value to A·S·C·I·I
284 toLowerCase
: stringToLowercase
,
285 toUpperCase
: stringToUppercase
,
288 asciiLowercase
: ($) =>
292 createCallableFunction(stringToLowercase
),
294 asciiUppercase
: ($) =>
298 createCallableFunction(stringToUppercase
),
304 * Returns −0 if the provided argument is "-0"; returns a number
305 * representing the index if the provided argument is a canonical
306 * numeric index string; otherwise, returns undefined.
308 * There is no clamping of the numeric index, but note that numbers
309 * above 2^53 − 1 are not safe nor valid integer indices.
311 export const canonicalNumericIndexString
= ($) => {
312 if (typeof $ !== "string") {
314 } else if ($ === "-0") {
318 return $ === `${n}` ? n
: undefined;
324 * Returns an iterator over the codepoints in the string representation
325 * of the provided value according to the algorithm of
326 * `String::[Symbol.iterator]`.
331 * Returns an iterator over the code units in the string
332 * representation of the provided value.
337 * Returns an iterator over the codepoints in the string
338 * representation of the provided value.
343 * Returns an iterator over the scalar values in the string
344 * representation of the provided value.
346 * Codepoints which are not valid Unicode scalar values are replaced
351 const generateCharacters
= function* (character
) {
354 const generateCodeUnits
= function* (ucsCharacter
) {
355 yield getCodeUnit(ucsCharacter
, 0);
357 const generateCodepoints
= function* (character
) {
358 const { allowSurrogates
} = this;
359 const codepoint
= getCodepoint(character
, 0);
360 yield allowSurrogates
|| codepoint
<= 0xD7FF || codepoint
>= 0xE000
365 const charactersIterator
= stringIteratorFunction(
367 "String Character Iterator",
369 const codeUnitsIterator
= arrayIteratorFunction(
371 "String Code Unit Iterator",
373 const codepointsIterator
= stringIteratorFunction(
374 bind(generateCodepoints
, { allowSurrogates
: true }, []),
375 "String Codepoint Iterator",
377 const scalarValuesIterator
= stringIteratorFunction(
378 bind(generateCodepoints
, { allowSurrogates
: false }, []),
379 "String Scalar Value Iterator",
383 characters
: ($) => charactersIterator(`${$}`),
384 codeUnits
: ($) => codeUnitsIterator(`${$}`),
385 codepoints
: ($) => codepointsIterator(`${$}`),
386 scalarValues
: ($) => scalarValuesIterator(`${$}`),
391 * Returns the character at the provided position in the string
392 * representation of the provided value according to the algorithm of
393 * `String::codePointAt`.
395 export const getCharacter
= ($, pos
) => {
396 const codepoint
= getCodepoint($, pos
);
397 return codepoint
== null
399 : stringFromCodepoints(codepoint
);
404 * Returns the code unit at the provided position in the string
405 * representation of the provided value according to the algorithm of
406 * `String::charAt`, except that out‐of‐bounds values return undefined
411 /** Returns whether the provided value is an integer index string. */
412 isIntegerIndexString
,
415 * Returns a string created from the provided code units.
417 * ※ This is effectively an alias for `String.fromCharCode`, but
418 * with the same error behaviour as `String.fromCodePoint`.
420 * ☡ This function throws an error if provided with an argument which
421 * is not an integral number from 0 to FFFF₁₆ inclusive.
426 * Returns the result of catenating the string representations of the
427 * provided values, returning a new string according to the algorithm
428 * of `String::concat`.
430 * ※ If no arguments are given, this function returns the empty
431 * string. This is different behaviour than if an explicit undefined
432 * first argument is given, in which case the resulting string will
433 * begin with `"undefined"`.
437 const { fromCharCode
} = String
;
438 const { charCodeAt
, concat
} = String
.prototype;
440 MAX_SAFE_INTEGER
: MAXIMUM_SAFE_INTEGRAL_NUMBER
,
441 isInteger
: isIntegralNumber
,
446 getCodeUnit
: ($, n
) => {
447 const codeUnit
= call(charCodeAt
, $, [n
]);
448 return isNan(codeUnit
) ? undefined : codeUnit
;
450 isIntegerIndexString
: ($) => {
451 const value
= canonicalNumericIndexString($);
452 if (value
!== undefined && isIntegralNumber(value
)) {
453 // The provided value is an integral canonical numeric index
455 return sameValue(value
, 0) ||
456 value
> 0 && value
<= MAXIMUM_SAFE_INTEGRAL_NUMBER
&&
457 value
=== toLength(value
);
459 // The provided value is not an integral canonical numeric
464 stringCatenate
: defineOwnProperties(
465 (...args
) => call(concat
, "", args
),
466 { name
: { value
: "stringCatenate" }, length
: { value
: 2 } },
468 stringFromCodeUnits
: defineOwnProperties(
470 for (let index
= 0; index
< codeUnits
.length
; ++index
) {
471 // Iterate over each provided code unit and throw if it is
473 const nextCU
= +codeUnits
[index
];
475 !isIntegralNumber(nextCU
) || nextCU
< 0 || nextCU
> 0xFFFF
477 // The code unit is not an integral number between 0 and
479 throw new RangeError(
480 `Piscēs: Code unit out of range: ${nextCU}.`,
483 // The code unit is acceptable.
487 return call(fromCharCode
, undefined, codeUnits
);
489 { name
: { value
: "stringFromCodeUnits" }, length
: { value
: 1 } },
495 * Returns the codepoint at the provided position in the string
496 * representation of the provided value according to the algorithm of
497 * `String::codePointAt`.
499 export const getCodepoint
= createCallableFunction(
500 stringPrototype
.codePointAt
,
501 { name
: "getCodepoint" },
505 * Returns the index of the first occurrence of the search string in
506 * the string representation of the provided value according to the
507 * algorithm of `String::indexOf`.
509 export const getFirstSubstringIndex
= createCallableFunction(
510 stringPrototype
.indexOf
,
511 { name
: "getFirstSubstringIndex" },
515 * Returns the index of the last occurrence of the search string in the
516 * string representation of the provided value according to the
517 * algorithm of `String::lastIndexOf`.
519 export const getLastSubstringIndex
= createCallableFunction(
520 stringPrototype
.lastIndexOf
,
521 { name
: "getLastSubstringIndex" },
525 * Returns the result of joining the provided iterable.
527 * If no separator is provided, it defaults to ",".
529 * If a value is nullish, it will be stringified as the empty string.
531 export const join
= (() => {
532 const { join
: arrayJoin
} = arrayPrototype
;
533 const join
= ($, separator
) =>
537 [separator
=== undefined ? "," : `${separator}`],
543 * Returns a string created from the raw value of the tagged template
546 * ※ This is effectively an alias for `String.raw`.
548 export const rawString
= createArrowFunction(String
.raw
, {
553 * Returns a string created from the provided codepoints.
555 * ※ This is effectively an alias for `String.fromCodePoint`.
557 * ☡ This function throws an error if provided with an argument which
558 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
560 export const stringFromCodepoints
= createArrowFunction(
561 String
.fromCodePoint
,
562 { name
: "stringFromCodepoints" },
566 * Returns the result of splitting the provided value on Ascii
569 export const splitOnAsciiWhitespace
= ($) =>
570 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
573 * Returns the result of splitting the provided value on commas,
574 * trimming Ascii whitespace from the resulting tokens.
576 export const splitOnCommas
= ($) =>
578 stripLeadingAndTrailingAsciiWhitespace(
581 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
589 * Returns whether the string representation of the provided value ends
590 * with the provided search string according to the algorithm of
591 * `String::endsWith`.
593 export const stringEndsWith
= createCallableFunction(
594 stringPrototype
.endsWith
,
595 { name
: "stringEndsWith" },
599 * Returns whether the string representation of the provided value
600 * contains the provided search string according to the algorithm of
601 * `String::includes`.
603 export const stringIncludes
= createCallableFunction(
604 stringPrototype
.includes
,
605 { name
: "stringIncludes" },
609 * Returns the result of matching the string representation of the
610 * provided value with the provided matcher according to the algorithm
611 * of `String::match`.
613 export const stringMatch
= createCallableFunction(
614 stringPrototype
.match
,
615 { name
: "stringMatch" },
619 * Returns the result of matching the string representation of the
620 * provided value with the provided matcher according to the algorithm
621 * of `String::matchAll`.
623 export const stringMatchAll
= createCallableFunction(
624 stringPrototype
.matchAll
,
625 { name
: "stringMatchAll" },
629 * Returns the normalized form of the string representation of the
630 * provided value according to the algorithm of `String::normalize`.
632 export const stringNormalize
= createCallableFunction(
633 stringPrototype
.normalize
,
634 { name
: "stringNormalize" },
638 * Returns the result of padding the end of the string representation
639 * of the provided value padded until it is the desired length
640 * according to the algorithm of `String::padEnd`.
642 export const stringPadEnd
= createCallableFunction(
643 stringPrototype
.padEnd
,
644 { name
: "stringPadEnd" },
648 * Returns the result of padding the start of the string representation
649 * of the provided value padded until it is the desired length
650 * according to the algorithm of `String::padStart`.
652 export const stringPadStart
= createCallableFunction(
653 stringPrototype
.padStart
,
654 { name
: "stringPadStart" },
658 * Returns the result of repeating the string representation of the
659 * provided value the provided number of times according to the
660 * algorithm of `String::repeat`.
662 export const stringRepeat
= createCallableFunction(
663 stringPrototype
.repeat
,
664 { name
: "stringRepeat" },
668 * Returns the result of replacing the string representation of the
669 * provided value with the provided replacement, using the provided
670 * matcher and according to the algorithm of `String::replace`.
672 export const stringReplace
= createCallableFunction(
673 stringPrototype
.replace
,
674 { name
: "stringReplace" },
678 * Returns the result of replacing the string representation of the
679 * provided value with the provided replacement, using the provided
680 * matcher and according to the algorithm of `String::replaceAll`.
682 export const stringReplaceAll
= createCallableFunction(
683 stringPrototype
.replaceAll
,
684 { name
: "stringReplaceAll" },
688 * Returns the result of searching the string representation of the
689 * provided value using the provided matcher and according to the
690 * algorithm of `String::search`.
692 export const stringSearch
= createCallableFunction(
693 stringPrototype
.search
,
694 { name
: "stringSearch" },
698 * Returns a slice of the string representation of the provided value
699 * according to the algorithm of `String::slice`.
701 export const stringSlice
= createCallableFunction(
702 stringPrototype
.slice
,
703 { name
: "stringSlice" },
707 * Returns the result of splitting of the string representation of the
708 * provided value on the provided separator according to the algorithm
709 * of `String::split`.
711 export const stringSplit
= createCallableFunction(
712 stringPrototype
.split
,
713 { name
: "stringSplit" },
717 * Returns whether the string representation of the provided value
718 * starts with the provided search string according to the algorithm of
719 * `String::startsWith`.
721 export const stringStartsWith
= createCallableFunction(
722 stringPrototype
.startsWith
,
723 { name
: "stringStartsWith" },
727 * Returns the value of the provided string.
729 * ※ This is effectively an alias for the `String::valueOf`.
731 * ☡ This function throws if the provided argument is not a string and
732 * does not have a `[[StringData]]` slot.
734 export const stringValue
= createCallableFunction(
735 stringPrototype
.valueOf
,
736 { name
: "stringValue" },
740 * Returns the result of stripping leading and trailing Ascii
741 * whitespace from the provided value and collapsing other Ascii
742 * whitespace in the string representation of the provided value.
744 export const stripAndCollapseAsciiWhitespace
= ($) =>
745 stripLeadingAndTrailingAsciiWhitespace(
754 * Returns the result of stripping leading and trailing Ascii
755 * whitespace from the string representation of the provided value.
757 export const stripLeadingAndTrailingAsciiWhitespace
= ($) =>
758 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
761 * Returns a substring of the string representation of the provided
762 * value according to the algorithm of `String::substring`.
764 export const substring
= createCallableFunction(
765 stringPrototype
.substring
,
769 * Returns the result of converting the provided value to a string of
770 * scalar values by replacing (unpaired) surrogate values with
773 export const toScalarValueString
= createCallableFunction(
774 String
.prototype.toWellFormed
,
775 { name
: "toScalarValueString" },
779 * Returns the result of converting the provided value to a string.
781 * ☡ This method throws for symbols and other objects without a string
784 export const toString
= ($) => `${$}`;