1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
14 createCallableFunction
,
16 } from "./function.js";
18 arrayIteratorFunction
,
19 stringIteratorFunction
,
20 } from "./iterable.js";
23 getOwnPropertyDescriptors
,
28 import { ITERATOR
} from "./value.js";
31 const { prototype: rePrototype
} = RE
;
32 const { prototype: arrayPrototype
} = Array
;
33 const { prototype: stringPrototype
} = String
;
35 const { exec
: reExec
} = rePrototype
;
39 * A `RegExp`like object which only matches entire strings, and may
40 * have additional constraints specified.
42 * Matchers are callable objects and will return true if they are
43 * called with a string that they match, and false otherwise.
44 * Matchers will always return false if called with nonstrings,
45 * although other methods like `::exec` coerce their arguments and
46 * may still return true.
50 const { toString
: reToString
} = rePrototype
;
52 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
54 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
56 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
58 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
60 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
62 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
64 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
66 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
68 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
70 const Matcher
= class extends identity
{
75 * Constructs a new `Matcher` from the provided source.
77 * If the provided source is a regular expression, then it must
78 * have the unicode flag set. Otherwise, it is interpreted as the
79 * string source of a regular expression with the unicode flag set.
81 * Other flags are taken from the provided regular expression
82 * object, if any are present.
84 * A name for the matcher may be provided as the second argument.
86 * A callable constraint on acceptable inputs may be provided as a
87 * third argument. If provided, it will be called with three
88 * arguments whenever a match appears successful: first, the string
89 * being matched, second, the match result, and third, the
90 * `Matcher` object itself. If the return value of this call is
91 * falsey, then the match will be considered a failure.
93 * ☡ If the provided source regular expression uses nongreedy
94 * quantifiers, it may not match the whole string even if a match
95 * with the whole string is possible. Surround the regular
96 * expression with `^(?:` and `)$` if you don’t want nongreedy
97 * regular expressions to fail when shorter matches are possible.
99 constructor(source
, name
= undefined, constraint
= null) {
102 if (typeof $ !== "string") {
103 // The provided value is not a string.
106 // The provided value is a string. Set the `.lastIndex` of
107 // the regular expression to 0 and see if the first attempt
108 // at a match matches the whole string and passes the
109 // provided constraint (if present).
110 regExp
.lastIndex
= 0;
111 const result
= call(reExec
, regExp
, [$]);
112 return result
?.[0] === $ &&
113 (constraint
=== null || constraint($, result
, this));
117 const regExp
= this.#regExp
= (() => {
119 call(reExec
, source
, [""]); // throws if source not a RegExp
121 return new RE(`${source}`, "u");
123 const unicode
= call(getUnicode
, source
, []);
125 // The provided regular expression does not have a unicode
128 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
131 // The provided regular expression has a unicode flag.
132 return new RE(source
);
135 if (constraint
!== null && typeof constraint
!== "function") {
137 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
140 this.#constraint
= constraint
;
141 return defineOwnProperties(
142 setPrototype(this, matcherPrototype
),
153 : `Matcher(${call(reToString, regExp, [])})`,
160 /** Gets whether the dot‐all flag is present on this `Matcher`. */
162 return call(getDotAll
, this.#regExp
, []);
166 * Executes this `Matcher` on the provided value and returns the
167 * result if there is a match, or null otherwise.
169 * Matchers only match if they can match the entire value on the
172 * ☡ The match result returned by this method will be the same as
173 * that passed to the constraint function—and may have been
174 * modified by said function prior to being returned.
177 const regExp
= this.#regExp
;
178 const constraint
= this.#constraint
;
179 const string
= `${$}`;
180 regExp
.lastIndex
= 0;
181 const result
= call(reExec
, regExp
, [string
]);
183 result
?.[0] === string
&&
184 (constraint
=== null || constraint(string
, result
, this))
186 // The entire string was matched and the constraint, if
187 // present, returned a truthy value.
190 // The entire string was not matched or the constraint returned
197 * Gets the flags present on this `Matcher`.
199 * ※ This needs to be defined because the internal `RegExp` object
200 * may have flags which are not yet recognized by ♓🌟 Piscēs.
203 return call(getFlags
, this.#regExp
, []);
206 /** Gets whether the global flag is present on this `Matcher`. */
208 return call(getGlobal
, this.#regExp
, []);
212 * Gets whether the has‐indices flag is present on this `Matcher`.
215 return call(getHasIndices
, this.#regExp
, []);
219 * Gets whether the ignore‐case flag is present on this `Matcher`.
222 return call(getIgnoreCase
, this.#regExp
, []);
226 * Gets whether the multiline flag is present on this `Matcher`.
229 return call(getMultiline
, this.#regExp
, []);
232 /** Gets the regular expression source for this `Matcher`. */
234 return call(getSource
, this.#regExp
, []);
237 /** Gets whether the sticky flag is present on this `Matcher`. */
239 return call(getSticky
, this.#regExp
, []);
243 * Gets whether the unicode flag is present on this `Matcher`.
245 * ※ This will always be true.
248 return call(getUnicode
, this.#regExp
, []);
252 const matcherConstructor
= defineOwnProperties(
253 class extends RegExp
{
254 constructor(...args
) {
255 return new Matcher(...args
);
259 name
: { value
: "Matcher" },
260 length
: { value
: 1 },
263 const matcherPrototype
= defineOwnProperties(
264 matcherConstructor
.prototype,
265 getOwnPropertyDescriptors(Matcher
.prototype),
266 { constructor: { value
: matcherConstructor
} },
269 return { Matcher
: matcherConstructor
};
274 * Returns the result of converting the provided value to A·S·C·I·I
280 * Returns the result of converting the provided value to A·S·C·I·I
286 toLowerCase
: stringToLowercase
,
287 toUpperCase
: stringToUppercase
,
290 asciiLowercase
: ($) =>
294 createCallableFunction(stringToLowercase
),
296 asciiUppercase
: ($) =>
300 createCallableFunction(stringToUppercase
),
307 * Returns an iterator over the codepoints in the string representation
308 * of the provided value according to the algorithm of
309 * `String::[Symbol.iterator]`.
314 * Returns an iterator over the code units in the string
315 * representation of the provided value.
320 * Returns an iterator over the codepoints in the string
321 * representation of the provided value.
326 * Returns an iterator over the scalar values in the string
327 * representation of the provided value.
329 * Codepoints which are not valid Unicode scalar values are replaced
335 * Returns the result of converting the provided value to a string of
336 * scalar values by replacing (unpaired) surrogate values with
341 const generateCharacters
= function* (character
) {
344 const generateCodeUnits
= function* (ucsCharacter
) {
345 yield getCodeUnit(ucsCharacter
, 0);
347 const generateCodepoints
= function* (character
) {
348 const { allowSurrogates
} = this;
349 const codepoint
= getCodepoint(character
, 0);
350 yield allowSurrogates
|| codepoint
<= 0xD7FF || codepoint
>= 0xE000
355 const charactersIterator
= stringIteratorFunction(
357 "String Character Iterator",
359 const codeUnitsIterator
= arrayIteratorFunction(
361 "String Code Unit Iterator",
363 const codepointsIterator
= stringIteratorFunction(
364 bind(generateCodepoints
, { allowSurrogates
: true }, []),
365 "String Codepoint Iterator",
367 const scalarValuesIterator
= stringIteratorFunction(
368 bind(generateCodepoints
, { allowSurrogates
: false }, []),
369 "String Scalar Value Iterator",
372 next
: scalarValuesNext
,
373 } = getPrototype(scalarValuesIterator(""));
374 const scalarValueIterablePrototype
= {
379 scalarValuesIterator(this.source
),
387 characters
: ($) => charactersIterator(`${$}`),
388 codeUnits
: ($) => codeUnitsIterator(`${$}`),
389 codepoints
: ($) => codepointsIterator(`${$}`),
390 scalarValues
: ($) => scalarValuesIterator(`${$}`),
391 scalarValueString
: ($) =>
392 stringFromCodepoints(...objectCreate(
393 scalarValueIterablePrototype
,
394 { source
: { value
: `${$}` } },
400 * Returns the character at the provided position in the string
401 * representation of the provided value according to the algorithm of
402 * `String::codePointAt`.
404 export const getCharacter
= ($, pos
) => {
405 const codepoint
= getCodepoint($, pos
);
406 return codepoint
== null
408 : stringFromCodepoints(codepoint
);
412 * Returns the code unit at the provided position in the string
413 * representation of the provided value according to the algorithm of
414 * `String::charAt`, except that out‐of‐bounds values return undefined
421 * Returns the result of catenating the string representations of the
422 * provided values, returning a new string according to the algorithm
423 * of `String::concat`.
425 * ※ If no arguments are given, this function returns the empty
426 * string. This is different behaviour than if an explicit undefined
427 * first argument is given, in which case the resulting string will
428 * begin with `"undefined"`.
432 const { charCodeAt
, concat
} = String
.prototype;
433 const { isNaN
: isNan
} = Number
;
436 getCodeUnit
: ($, n
) => {
437 const codeUnit
= call(charCodeAt
, $, [n
]);
438 return isNan(codeUnit
) ? undefined : codeUnit
;
440 stringCatenate
: defineOwnProperties(
441 (...args
) => call(concat
, "", args
),
442 { name
: { value
: "stringCatenate" }, length
: { value
: 2 } },
448 * Returns the codepoint at the provided position in the string
449 * representation of the provided value according to the algorithm of
450 * `String::codePointAt`.
452 export const getCodepoint
= createCallableFunction(
453 stringPrototype
.codePointAt
,
454 { name
: "getCodepoint" },
458 * Returns the index of the first occurrence of the search string in
459 * the string representation of the provided value according to the
460 * algorithm of `String::indexOf`.
462 export const getFirstSubstringIndex
= createCallableFunction(
463 stringPrototype
.indexOf
,
464 { name
: "getFirstSubstringIndex" },
468 * Returns the index of the last occurrence of the search string in the
469 * string representation of the provided value according to the
470 * algorithm of `String::lastIndexOf`.
472 export const getLastSubstringIndex
= createCallableFunction(
473 stringPrototype
.lastIndexOf
,
474 { name
: "getLastSubstringIndex" },
478 * Returns the result of joining the provided iterable.
480 * If no separator is provided, it defaults to ",".
482 * If a value is nullish, it will be stringified as the empty string.
484 export const join
= (() => {
485 const { join
: arrayJoin
} = arrayPrototype
;
486 const join
= ($, separator
) =>
490 [separator
=== undefined ? "," : `${separator}`],
496 * Returns a string created from the raw value of the tagged template
499 * ※ This is effectively an alias for `String.raw`.
501 export const rawString
= createArrowFunction(String
.raw
, {
507 * Returns a string created from the provided code units.
509 * ※ This is effectively an alias for `String.fromCharCode`, but
510 * with the same error behaviour as `String.fromCodePoint`.
512 * ☡ This function throws an error if provided with an argument which
513 * is not an integral number from 0 to FFFF₁₆ inclusive.
517 const { fromCharCode
} = String
;
518 const { isInteger
: isIntegralNumber
} = Number
;
521 stringFromCodeUnits
: defineOwnProperties(
523 for (let index
= 0; index
< codeUnits
.length
; ++index
) {
524 // Iterate over each provided code unit and throw if it is
526 const nextCU
= +codeUnits
[index
];
528 !isIntegralNumber(nextCU
) || nextCU
< 0 || nextCU
> 0xFFFF
530 // The code unit is not an integral number between 0 and
532 throw new RangeError(
533 `Piscēs: Code unit out of range: ${nextCU}.`,
536 // The code unit is acceptable.
540 return call(fromCharCode
, undefined, codeUnits
);
542 { name
: { value
: "stringFromCodeUnits" }, length
: { value
: 1 } },
548 * Returns a string created from the provided codepoints.
550 * ※ This is effectively an alias for `String.fromCodePoint`.
552 * ☡ This function throws an error if provided with an argument which
553 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
555 export const stringFromCodepoints
= createArrowFunction(
556 String
.fromCodePoint
,
557 { name
: "stringFromCodepoints" },
561 * Returns the result of splitting the provided value on A·S·C·I·I
564 export const splitOnASCIIWhitespace
= ($) =>
565 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
568 * Returns the result of splitting the provided value on commas,
569 * trimming A·S·C·I·I whitespace from the resulting tokens.
571 export const splitOnCommas
= ($) =>
573 stripLeadingAndTrailingASCIIWhitespace(
576 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
584 * Returns whether the string representation of the provided value ends
585 * with the provided search string according to the algorithm of
586 * `String::endsWith`.
588 export const stringEndsWith
= createCallableFunction(
589 stringPrototype
.endsWith
,
590 { name
: "stringEndsWith" },
594 * Returns whether the string representation of the provided value
595 * contains the provided search string according to the algorithm of
596 * `String::includes`.
598 export const stringIncludes
= createCallableFunction(
599 stringPrototype
.includes
,
600 { name
: "stringIncludes" },
604 * Returns the result of matching the string representation of the
605 * provided value with the provided matcher according to the algorithm
606 * of `String::match`.
608 export const stringMatch
= createCallableFunction(
609 stringPrototype
.match
,
610 { name
: "stringMatch" },
614 * Returns the result of matching the string representation of the
615 * provided value with the provided matcher according to the algorithm
616 * of `String::matchAll`.
618 export const stringMatchAll
= createCallableFunction(
619 stringPrototype
.matchAll
,
620 { name
: "stringMatchAll" },
624 * Returns the normalized form of the string representation of the
625 * provided value according to the algorithm of `String::normalize`.
627 export const stringNormalize
= createCallableFunction(
628 stringPrototype
.normalize
,
629 { name
: "stringNormalize" },
633 * Returns the result of padding the end of the string representation
634 * of the provided value padded until it is the desired length
635 * according to the algorithm of `String::padEnd`.
637 export const stringPadEnd
= createCallableFunction(
638 stringPrototype
.padEnd
,
639 { name
: "stringPadEnd" },
643 * Returns the result of padding the start of the string representation
644 * of the provided value padded until it is the desired length
645 * according to the algorithm of `String::padStart`.
647 export const stringPadStart
= createCallableFunction(
648 stringPrototype
.padStart
,
649 { name
: "stringPadStart" },
653 * Returns the result of repeating the string representation of the
654 * provided value the provided number of times according to the
655 * algorithm of `String::repeat`.
657 export const stringRepeat
= createCallableFunction(
658 stringPrototype
.repeat
,
659 { name
: "stringRepeat" },
663 * Returns the result of replacing the string representation of the
664 * provided value with the provided replacement, using the provided
665 * matcher and according to the algorithm of `String::replace`.
667 export const stringReplace
= createCallableFunction(
668 stringPrototype
.replace
,
669 { name
: "stringReplace" },
673 * Returns the result of replacing the string representation of the
674 * provided value with the provided replacement, using the provided
675 * matcher and according to the algorithm of `String::replaceAll`.
677 export const stringReplaceAll
= createCallableFunction(
678 stringPrototype
.replaceAll
,
679 { name
: "stringReplaceAll" },
683 * Returns the result of searching the string representation of the
684 * provided value using the provided matcher and according to the
685 * algorithm of `String::search`.
687 export const stringSearch
= createCallableFunction(
688 stringPrototype
.search
,
689 { name
: "stringSearch" },
693 * Returns a slice of the string representation of the provided value
694 * according to the algorithm of `String::slice`.
696 export const stringSlice
= createCallableFunction(
697 stringPrototype
.slice
,
698 { name
: "stringSlice" },
702 * Returns the result of splitting of the string representation of the
703 * provided value on the provided separator according to the algorithm
704 * of `String::split`.
706 export const stringSplit
= createCallableFunction(
707 stringPrototype
.split
,
708 { name
: "stringSplit" },
712 * Returns whether the string representation of the provided value
713 * starts with the provided search string according to the algorithm of
714 * `String::startsWith`.
716 export const stringStartsWith
= createCallableFunction(
717 stringPrototype
.startsWith
,
718 { name
: "stringStartsWith" },
722 * Returns the value of the provided string.
724 * ※ This is effectively an alias for the `String::valueOf`.
726 * ☡ This function throws if the provided argument is not a string and
727 * does not have a `[[StringData]]` slot.
729 export const stringValue
= createCallableFunction(
730 stringPrototype
.valueOf
,
731 { name
: "stringValue" },
735 * Returns the result of stripping leading and trailing A·S·C·I·I
736 * whitespace from the provided value and collapsing other A·S·C·I·I
737 * whitespace in the string representation of the provided value.
739 export const stripAndCollapseASCIIWhitespace
= ($) =>
740 stripLeadingAndTrailingASCIIWhitespace(
749 * Returns the result of stripping leading and trailing A·S·C·I·I
750 * whitespace from the string representation of the provided value.
752 export const stripLeadingAndTrailingASCIIWhitespace
= ($) =>
753 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
756 * Returns a substring of the string representation of the provided
757 * value according to the algorithm of `String::substring`.
759 export const substring
= createCallableFunction(
760 stringPrototype
.substring
,
764 * Returns the result of converting the provided value to a string.
766 * ☡ This method throws for symbols and other objects without a string
769 export const toString
= ($) => `${$}`;