1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
14 createCallableFunction
,
16 } from "./function.js";
18 arrayIteratorFunction
,
19 stringIteratorFunction
,
20 } from "./iterable.js";
22 defineOwnDataProperty
,
24 getOwnPropertyDescriptors
,
29 import { sameValue
, toLength
, UNDEFINED
} from "./value.js";
32 const { prototype: rePrototype
} = RE
;
33 const { prototype: arrayPrototype
} = Array
;
34 const { prototype: stringPrototype
} = String
;
36 const { exec
: reExec
} = rePrototype
;
40 * A `RegExp`like object which only matches entire strings, and may
41 * have additional constraints specified.
43 * Matchers are callable objects and will return true if they are
44 * called with a string that they match, and false otherwise.
45 * Matchers will always return false if called with nonstrings,
46 * although other methods like `::exec` coerce their arguments and
47 * may still return true.
51 const { toString
: reToString
} = rePrototype
;
53 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
55 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
57 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
59 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
61 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
63 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
65 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
67 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
69 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
71 const Matcher
= class extends identity
{
76 * Constructs a new `Matcher` from the provided source.
78 * If the provided source is a regular expression, then it must
79 * have the unicode flag set. Otherwise, it is interpreted as the
80 * string source of a regular expression with the unicode flag set.
82 * Other flags are taken from the provided regular expression
83 * object, if any are present.
85 * A name for the matcher may be provided as the second argument.
87 * A callable constraint on acceptable inputs may be provided as a
88 * third argument. If provided, it will be called with three
89 * arguments whenever a match appears successful: first, the string
90 * being matched, second, the match result, and third, the
91 * `Matcher` object itself. If the return value of this call is
92 * falsey, then the match will be considered a failure.
94 * ☡ If the provided source regular expression uses nongreedy
95 * quantifiers, it may not match the whole string even if a match
96 * with the whole string is possible. Surround the regular
97 * expression with `^(?:` and `)$` if you don’t want nongreedy
98 * regular expressions to fail when shorter matches are possible.
100 constructor(source
, name
= UNDEFINED
, constraint
= null) {
103 if (typeof $ !== "string") {
104 // The provided value is not a string.
107 // The provided value is a string. Set the `.lastIndex` of
108 // the regular expression to 0 and see if the first attempt
109 // at a match matches the whole string and passes the
110 // provided constraint (if present).
111 regExp
.lastIndex
= 0;
112 const result
= call(reExec
, regExp
, [$]);
113 return result
?.[0] === $ &&
114 (constraint
=== null || constraint($, result
, this));
118 const regExp
= this.#regExp
= (() => {
120 call(reExec
, source
, [""]); // throws if source not a RegExp
122 return new RE(`${source}`, "u");
124 const unicode
= call(getUnicode
, source
, []);
126 // The provided regular expression does not have a unicode
129 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
132 // The provided regular expression has a unicode flag.
133 return new RE(source
);
136 if (constraint
!== null && typeof constraint
!== "function") {
138 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
141 this.#constraint
= constraint
;
142 return defineOwnProperties(
143 setPrototype(this, matcherPrototype
),
145 lastIndex
: setPropertyValues(objectCreate(null), {
151 name
: defineOwnDataProperty(
156 : `Matcher(${call(reToString, regExp, [])})`,
163 /** Gets whether the dot‐all flag is present on this `Matcher`. */
165 return call(getDotAll
, this.#regExp
, []);
169 * Executes this `Matcher` on the provided value and returns the
170 * result if there is a match, or null otherwise.
172 * Matchers only match if they can match the entire value on the
175 * ☡ The match result returned by this method will be the same as
176 * that passed to the constraint function—and may have been
177 * modified by said function prior to being returned.
180 const regExp
= this.#regExp
;
181 const constraint
= this.#constraint
;
182 const string
= `${$}`;
183 regExp
.lastIndex
= 0;
184 const result
= call(reExec
, regExp
, [string
]);
186 result
?.[0] === string
&&
187 (constraint
=== null || constraint(string
, result
, this))
189 // The entire string was matched and the constraint, if
190 // present, returned a truthy value.
193 // The entire string was not matched or the constraint returned
200 * Gets the flags present on this `Matcher`.
202 * ※ This needs to be defined because the internal `RegExp` object
203 * may have flags which are not yet recognized by ♓🌟 Piscēs.
206 return call(getFlags
, this.#regExp
, []);
209 /** Gets whether the global flag is present on this `Matcher`. */
211 return call(getGlobal
, this.#regExp
, []);
215 * Gets whether the has‐indices flag is present on this `Matcher`.
218 return call(getHasIndices
, this.#regExp
, []);
222 * Gets whether the ignore‐case flag is present on this `Matcher`.
225 return call(getIgnoreCase
, this.#regExp
, []);
229 * Gets whether the multiline flag is present on this `Matcher`.
232 return call(getMultiline
, this.#regExp
, []);
235 /** Gets the regular expression source for this `Matcher`. */
237 return call(getSource
, this.#regExp
, []);
240 /** Gets whether the sticky flag is present on this `Matcher`. */
242 return call(getSticky
, this.#regExp
, []);
246 * Gets whether the unicode flag is present on this `Matcher`.
248 * ※ This will always be true.
251 return call(getUnicode
, this.#regExp
, []);
255 const matcherConstructor
= Object
.defineProperties(
256 class extends RegExp
{
257 constructor(...args
) {
258 return new Matcher(...args
);
262 name
: defineOwnDataProperty(
267 length
: defineOwnDataProperty(Object
.create(null), "value", 1),
270 const matcherPrototype
= defineOwnProperties(
271 matcherConstructor
.prototype,
272 getOwnPropertyDescriptors(Matcher
.prototype),
274 constructor: defineOwnDataProperty(
282 return { Matcher
: matcherConstructor
};
287 * Returns the result of converting the provided value to A·S·C·I·I
293 * Returns the result of converting the provided value to A·S·C·I·I
299 toLowerCase
: stringToLowercase
,
300 toUpperCase
: stringToUppercase
,
303 asciiLowercase
: ($) =>
307 createCallableFunction(stringToLowercase
),
309 asciiUppercase
: ($) =>
313 createCallableFunction(stringToUppercase
),
319 * Returns −0 if the provided argument is "-0"; returns a number
320 * representing the index if the provided argument is a canonical
321 * numeric index string; otherwise, returns undefined.
323 * There is no clamping of the numeric index, but note that numbers
324 * above 2^53 − 1 are not safe nor valid integer indices.
326 export const canonicalNumericIndexString
= ($) => {
327 if (typeof $ !== "string") {
329 } else if ($ === "-0") {
333 return $ === `${n}` ? n
: UNDEFINED
;
339 * Returns an iterator over the codepoints in the string representation
340 * of the provided value according to the algorithm of
341 * `String::[Symbol.iterator]`.
346 * Returns an iterator over the code units in the string
347 * representation of the provided value.
352 * Returns an iterator over the codepoints in the string
353 * representation of the provided value.
358 * Returns an iterator over the scalar values in the string
359 * representation of the provided value.
361 * Codepoints which are not valid Unicode scalar values are replaced
366 const generateCharacters
= function* (character
) {
369 const generateCodeUnits
= function* (ucsCharacter
) {
370 yield getCodeUnit(ucsCharacter
, 0);
372 const generateCodepoints
= function* (character
) {
373 const { allowSurrogates
} = this;
374 const codepoint
= getCodepoint(character
, 0);
375 yield allowSurrogates
|| codepoint
<= 0xD7FF || codepoint
>= 0xE000
380 const charactersIterator
= stringIteratorFunction(
382 "String Character Iterator",
384 const codeUnitsIterator
= arrayIteratorFunction(
386 "String Code Unit Iterator",
388 const codepointsIterator
= stringIteratorFunction(
389 bind(generateCodepoints
, { allowSurrogates
: true }, []),
390 "String Codepoint Iterator",
392 const scalarValuesIterator
= stringIteratorFunction(
393 bind(generateCodepoints
, { allowSurrogates
: false }, []),
394 "String Scalar Value Iterator",
398 characters
: ($) => charactersIterator(`${$}`),
399 codeUnits
: ($) => codeUnitsIterator(`${$}`),
400 codepoints
: ($) => codepointsIterator(`${$}`),
401 scalarValues
: ($) => scalarValuesIterator(`${$}`),
406 * Returns the character at the provided position in the string
407 * representation of the provided value according to the algorithm of
408 * `String::codePointAt`.
410 export const getCharacter
= ($, pos
) => {
411 const codepoint
= getCodepoint($, pos
);
412 return codepoint
== null
414 : stringFromCodepoints(codepoint
);
419 * Returns the code unit at the provided position in the string
420 * representation of the provided value according to the algorithm of
421 * `String::charAt`, except that out‐of‐bounds values return undefined
427 * Returns a string created from the provided code units.
429 * ※ This is effectively an alias for `String.fromCharCode`, but
430 * with the same error behaviour as `String.fromCodePoint`.
432 * ☡ This function throws an error if provided with an argument which
433 * is not an integral number from 0 to FFFF₁₆ inclusive.
438 * Returns the result of catenating the string representations of the
439 * provided values, returning a new string according to the algorithm
440 * of `String::concat`.
442 * ※ If no arguments are given, this function returns the empty
443 * string. This is different behaviour than if an explicit undefined
444 * first argument is given, in which case the resulting string will
445 * begin with `"undefined"`.
449 const { fromCharCode
} = String
;
450 const { charCodeAt
, concat
} = String
.prototype;
452 isInteger
: isIntegralNumber
,
457 getCodeUnit
: ($, n
) => {
458 const codeUnit
= call(charCodeAt
, $, [n
]);
459 return isNan(codeUnit
) ? UNDEFINED
: codeUnit
;
461 stringCatenate
: Object
.defineProperties(
462 (...args
) => call(concat
, "", args
),
463 { name
: { value
: "stringCatenate" }, length
: { value
: 2 } },
465 stringFromCodeUnits
: Object
.defineProperties(
467 for (let index
= 0; index
< codeUnits
.length
; ++index
) {
468 // Iterate over each provided code unit and throw if it is
470 const nextCU
= +codeUnits
[index
];
472 !isIntegralNumber(nextCU
) || nextCU
< 0 || nextCU
> 0xFFFF
474 // The code unit is not an integral number between 0 and
476 throw new RangeError(
477 `Piscēs: Code unit out of range: ${nextCU}.`,
480 // The code unit is acceptable.
484 return call(fromCharCode
, UNDEFINED
, codeUnits
);
486 { name
: { value
: "stringFromCodeUnits" }, length
: { value
: 1 } },
492 * Returns the codepoint at the provided position in the string
493 * representation of the provided value according to the algorithm of
494 * `String::codePointAt`.
496 export const getCodepoint
= createCallableFunction(
497 stringPrototype
.codePointAt
,
498 { name
: "getCodepoint" },
502 * Returns the index of the first occurrence of the search string in
503 * the string representation of the provided value according to the
504 * algorithm of `String::indexOf`.
506 export const getFirstSubstringIndex
= createCallableFunction(
507 stringPrototype
.indexOf
,
508 { name
: "getFirstSubstringIndex" },
512 * Returns the index of the last occurrence of the search string in the
513 * string representation of the provided value according to the
514 * algorithm of `String::lastIndexOf`.
516 export const getLastSubstringIndex
= createCallableFunction(
517 stringPrototype
.lastIndexOf
,
518 { name
: "getLastSubstringIndex" },
521 /** Returns whether the provided value is an array index. */
522 export const isArrayIndexString
= ($) => {
523 const value
= canonicalNumericIndexString($);
524 if (value
!== UNDEFINED
) {
525 // The provided value is a canonical numeric index string; return
526 // whether it is in range for array indices.
527 return sameValue(value
, 0) ||
528 value
=== toLength(value
) && value
> 0 && value
< -1 >>> 0;
530 // The provided value is not a canonical numeric index string.
535 /** Returns whether the provided value is an integer index string. */
536 export const isIntegerIndexString
= ($) => {
537 const value
= canonicalNumericIndexString($);
538 if (value
!== UNDEFINED
) {
539 // The provided value is a canonical numeric index string; return
540 // whether it is in range for integer indices.
541 return sameValue(value
, 0) ||
542 value
=== toLength(value
) && value
> 0;
544 // The provided value is not a canonical numeric index string.
550 * Returns the result of joining the provided iterable.
552 * If no separator is provided, it defaults to ",".
554 * If a value is nullish, it will be stringified as the empty string.
556 export const join
= (() => {
557 const { join
: arrayJoin
} = arrayPrototype
;
558 const join
= ($, separator
) =>
562 [separator
=== UNDEFINED
? "," : `${separator}`],
568 * Returns a string created from the raw value of the tagged template
571 * ※ This is effectively an alias for `String.raw`.
573 export const rawString
= createArrowFunction(String
.raw
, {
578 * Returns a string created from the provided codepoints.
580 * ※ This is effectively an alias for `String.fromCodePoint`.
582 * ☡ This function throws an error if provided with an argument which
583 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
585 export const stringFromCodepoints
= createArrowFunction(
586 String
.fromCodePoint
,
587 { name
: "stringFromCodepoints" },
591 * Returns the result of splitting the provided value on Ascii
594 export const splitOnAsciiWhitespace
= ($) =>
595 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
598 * Returns the result of splitting the provided value on commas,
599 * trimming Ascii whitespace from the resulting tokens.
601 export const splitOnCommas
= ($) =>
603 stripLeadingAndTrailingAsciiWhitespace(
606 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
614 * Returns whether the string representation of the provided value ends
615 * with the provided search string according to the algorithm of
616 * `String::endsWith`.
618 export const stringEndsWith
= createCallableFunction(
619 stringPrototype
.endsWith
,
620 { name
: "stringEndsWith" },
624 * Returns whether the string representation of the provided value
625 * contains the provided search string according to the algorithm of
626 * `String::includes`.
628 export const stringIncludes
= createCallableFunction(
629 stringPrototype
.includes
,
630 { name
: "stringIncludes" },
634 * Returns the result of matching the string representation of the
635 * provided value with the provided matcher according to the algorithm
636 * of `String::match`.
638 export const stringMatch
= createCallableFunction(
639 stringPrototype
.match
,
640 { name
: "stringMatch" },
644 * Returns the result of matching the string representation of the
645 * provided value with the provided matcher according to the algorithm
646 * of `String::matchAll`.
648 export const stringMatchAll
= createCallableFunction(
649 stringPrototype
.matchAll
,
650 { name
: "stringMatchAll" },
654 * Returns the normalized form of the string representation of the
655 * provided value according to the algorithm of `String::normalize`.
657 export const stringNormalize
= createCallableFunction(
658 stringPrototype
.normalize
,
659 { name
: "stringNormalize" },
663 * Returns the result of padding the end of the string representation
664 * of the provided value padded until it is the desired length
665 * according to the algorithm of `String::padEnd`.
667 export const stringPadEnd
= createCallableFunction(
668 stringPrototype
.padEnd
,
669 { name
: "stringPadEnd" },
673 * Returns the result of padding the start of the string representation
674 * of the provided value padded until it is the desired length
675 * according to the algorithm of `String::padStart`.
677 export const stringPadStart
= createCallableFunction(
678 stringPrototype
.padStart
,
679 { name
: "stringPadStart" },
683 * Returns the result of repeating the string representation of the
684 * provided value the provided number of times according to the
685 * algorithm of `String::repeat`.
687 export const stringRepeat
= createCallableFunction(
688 stringPrototype
.repeat
,
689 { name
: "stringRepeat" },
693 * Returns the result of replacing the string representation of the
694 * provided value with the provided replacement, using the provided
695 * matcher and according to the algorithm of `String::replace`.
697 export const stringReplace
= createCallableFunction(
698 stringPrototype
.replace
,
699 { name
: "stringReplace" },
703 * Returns the result of replacing the string representation of the
704 * provided value with the provided replacement, using the provided
705 * matcher and according to the algorithm of `String::replaceAll`.
707 export const stringReplaceAll
= createCallableFunction(
708 stringPrototype
.replaceAll
,
709 { name
: "stringReplaceAll" },
713 * Returns the result of searching the string representation of the
714 * provided value using the provided matcher and according to the
715 * algorithm of `String::search`.
717 export const stringSearch
= createCallableFunction(
718 stringPrototype
.search
,
719 { name
: "stringSearch" },
723 * Returns a slice of the string representation of the provided value
724 * according to the algorithm of `String::slice`.
726 export const stringSlice
= createCallableFunction(
727 stringPrototype
.slice
,
728 { name
: "stringSlice" },
732 * Returns the result of splitting of the string representation of the
733 * provided value on the provided separator according to the algorithm
734 * of `String::split`.
736 export const stringSplit
= createCallableFunction(
737 stringPrototype
.split
,
738 { name
: "stringSplit" },
742 * Returns whether the string representation of the provided value
743 * starts with the provided search string according to the algorithm of
744 * `String::startsWith`.
746 export const stringStartsWith
= createCallableFunction(
747 stringPrototype
.startsWith
,
748 { name
: "stringStartsWith" },
752 * Returns the value of the provided string.
754 * ※ This is effectively an alias for the `String::valueOf`.
756 * ☡ This function throws if the provided argument is not a string and
757 * does not have a `[[StringData]]` slot.
759 export const stringValue
= createCallableFunction(
760 stringPrototype
.valueOf
,
761 { name
: "stringValue" },
765 * Returns the result of stripping leading and trailing Ascii
766 * whitespace from the provided value and collapsing other Ascii
767 * whitespace in the string representation of the provided value.
769 export const stripAndCollapseAsciiWhitespace
= ($) =>
770 stripLeadingAndTrailingAsciiWhitespace(
779 * Returns the result of stripping leading and trailing Ascii
780 * whitespace from the string representation of the provided value.
782 export const stripLeadingAndTrailingAsciiWhitespace
= ($) =>
783 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
786 * Returns a substring of the string representation of the provided
787 * value according to the algorithm of `String::substring`.
789 export const substring
= createCallableFunction(
790 stringPrototype
.substring
,
794 * Returns the result of converting the provided value to a string of
795 * scalar values by replacing (unpaired) surrogate values with
798 export const toScalarValueString
= createCallableFunction(
799 String
.prototype.toWellFormed
,
800 { name
: "toScalarValueString" },
804 * Returns the result of converting the provided value to a string.
806 * ☡ This method throws for symbols and other objects without a string
809 export const toString
= ($) => `${$}`;