1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
10 import { bind
, call
, identity
, makeCallable
} from "./function.js";
13 getOwnPropertyDescriptors
,
18 import { ITERATOR
, TO_STRING_TAG
} from "./value.js";
21 const { prototype: rePrototype
} = RE
;
22 const { prototype: arrayPrototype
} = Array
;
23 const { prototype: stringPrototype
} = String
;
25 const { exec
: reExec
} = rePrototype
;
29 * A `RegExp`like object which only matches entire strings, and may
30 * have additional constraints specified.
32 * Matchers are callable objects and will return true if they are
33 * called with a string that they match, and false otherwise.
34 * Matchers will always return false if called with nonstrings,
35 * although other methods like `::exec` coerce their arguments and
36 * may still return true.
40 const { toString
: reToString
} = rePrototype
;
42 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
44 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
46 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
48 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
50 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
52 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
54 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
56 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
58 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
60 const Matcher
= class extends identity
{
65 * Constructs a new `Matcher` from the provided source.
67 * If the provided source is a regular expression, then it must
68 * have the unicode flag set. Otherwise, it is interpreted as the
69 * string source of a regular expression with the unicode flag set.
71 * Other flags are taken from the provided regular expression
72 * object, if any are present.
74 * A name for the matcher may be provided as the second argument.
76 * A callable constraint on acceptable inputs may be provided as a
77 * third argument. If provided, it will be called with three
78 * arguments whenever a match appears successful: first, the string
79 * being matched, second, the match result, and third, the
80 * `Matcher` object itself. If the return value of this call is
81 * falsey, then the match will be considered a failure.
83 * ☡ If the provided source regular expression uses nongreedy
84 * quantifiers, it may not match the whole string even if a match
85 * with the whole string is possible. Surround the regular
86 * expression with `^(?:` and `)$` if you don’t want nongreedy
87 * regular expressions to fail when shorter matches are possible.
89 constructor(source
, name
= undefined, constraint
= null) {
92 if (typeof $ !== "string") {
93 // The provided value is not a string.
96 // The provided value is a string. Set the `.lastIndex` of
97 // the regular expression to 0 and see if the first attempt
98 // at a match matches the whole string and passes the
99 // provided constraint (if present).
100 regExp
.lastIndex
= 0;
101 const result
= call(reExec
, regExp
, [$]);
102 return result
?.[0] === $ &&
103 (constraint
=== null || constraint($, result
, this));
107 const regExp
= this.#regExp
= (() => {
109 call(reExec
, source
, [""]); // throws if source not a RegExp
111 return new RE(`${source}`, "u");
113 const unicode
= call(getUnicode
, source
, []);
115 // The provided regular expression does not have a unicode
118 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
121 // The provided regular expression has a unicode flag.
122 return new RE(source
);
125 if (constraint
!== null && typeof constraint
!== "function") {
127 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
130 this.#constraint
= constraint
;
131 return defineOwnProperties(
132 setPrototype(this, matcherPrototype
),
143 : `Matcher(${call(reToString, regExp, [])})`,
150 /** Gets whether the dot‐all flag is present on this `Matcher`. */
152 return call(getDotAll
, this.#regExp
, []);
156 * Executes this `Matcher` on the provided value and returns the
157 * result if there is a match, or null otherwise.
159 * Matchers only match if they can match the entire value on the
162 * ☡ The match result returned by this method will be the same as
163 * that passed to the constraint function—and may have been
164 * modified by said function prior to being returned.
167 const regExp
= this.#regExp
;
168 const constraint
= this.#constraint
;
169 const string
= `${$}`;
170 regExp
.lastIndex
= 0;
171 const result
= call(reExec
, regExp
, [string
]);
173 result
?.[0] === string
&&
174 (constraint
=== null || constraint(string
, result
, this))
176 // The entire string was matched and the constraint, if
177 // present, returned a truthy value.
180 // The entire string was not matched or the constraint returned
187 * Gets the flags present on this `Matcher`.
189 * ※ This needs to be defined because the internal `RegExp` object
190 * may have flags which are not yet recognized by ♓🌟 Piscēs.
193 return call(getFlags
, this.#regExp
, []);
196 /** Gets whether the global flag is present on this `Matcher`. */
198 return call(getGlobal
, this.#regExp
, []);
202 * Gets whether the has‐indices flag is present on this `Matcher`.
205 return call(getHasIndices
, this.#regExp
, []);
209 * Gets whether the ignore‐case flag is present on this `Matcher`.
212 return call(getIgnoreCase
, this.#regExp
, []);
216 * Gets whether the multiline flag is present on this `Matcher`.
219 return call(getMultiline
, this.#regExp
, []);
222 /** Gets the regular expression source for this `Matcher`. */
224 return call(getSource
, this.#regExp
, []);
227 /** Gets whether the sticky flag is present on this `Matcher`. */
229 return call(getSticky
, this.#regExp
, []);
233 * Gets whether the unicode flag is present on this `Matcher`.
235 * ※ This will always be true.
238 return call(getUnicode
, this.#regExp
, []);
242 const matcherConstructor
= defineOwnProperties(
243 class extends RegExp
{
244 constructor(...args
) {
245 return new Matcher(...args
);
249 name
: { value
: "Matcher" },
250 length
: { value
: 1 },
253 const matcherPrototype
= defineOwnProperties(
254 matcherConstructor
.prototype,
255 getOwnPropertyDescriptors(Matcher
.prototype),
256 { constructor: { value
: matcherConstructor
} },
259 return { Matcher
: matcherConstructor
};
264 * Returns the result of converting the provided value to A·S·C·I·I
270 * Returns the result of converting the provided value to A·S·C·I·I
276 toLowerCase
: stringToLowercase
,
277 toUpperCase
: stringToUppercase
,
280 asciiLowercase
: ($) =>
284 makeCallable(stringToLowercase
),
286 asciiUppercase
: ($) =>
290 makeCallable(stringToUppercase
),
297 * Returns an iterator over the code units in the string
298 * representation of the provided value.
303 * Returns an iterator over the codepoints in the string
304 * representation of the provided value.
309 * Returns an iterator over the scalar values in the string
310 * representation of the provided value.
312 * Codepoints which are not valid Unicode scalar values are replaced
318 * Returns the result of converting the provided value to a string of
319 * scalar values by replacing (unpaired) surrogate values with
324 const { [ITERATOR
]: arrayIterator
} = arrayPrototype
;
325 const arrayIteratorPrototype
= Object
.getPrototypeOf(
328 const { next
: arrayIteratorNext
} = arrayIteratorPrototype
;
329 const iteratorPrototype
= Object
.getPrototypeOf(
330 arrayIteratorPrototype
,
332 const { [ITERATOR
]: stringIterator
} = stringPrototype
;
333 const stringIteratorPrototype
= Object
.getPrototypeOf(
336 const { next
: stringIteratorNext
} = stringIteratorPrototype
;
339 * An iterator object for iterating over code values (either code
340 * units or codepoints) in a string.
342 * ※ This class is not exposed, although its methods are (through
343 * the prototypes of string code value iterator objects).
345 const StringCodeValueIterator
= class extends identity
{
350 * Constructs a new string code value iterator from the provided
353 * If the provided base iterator is an array iterator, this is a
354 * code unit iterator. If the provided iterator is a string
355 * iterator and surrogates are allowed, this is a codepoint
356 * iterator. If the provided iterator is a string iterator and
357 * surrogates are not allowed, this is a scalar value iterator.
359 constructor(baseIterator
, allowSurrogates
= true) {
360 super(objectCreate(stringCodeValueIteratorPrototype
));
361 this.#allowSurrogates
= !!allowSurrogates
;
362 this.#baseIterator
= baseIterator
;
365 /** Provides the next code value in the iterator. */
367 const baseIterator
= this.#baseIterator
;
368 switch (getPrototype(baseIterator
)) {
369 case arrayIteratorPrototype
: {
370 // The base iterator is iterating over U·C·S characters.
374 } = call(arrayIteratorNext
, baseIterator
, []);
376 ? { value
: undefined, done
: true }
377 : { value
: getCodeUnit(ucsCharacter
, 0), done
: false };
379 case stringIteratorPrototype
: {
380 // The base iterator is iterating over Unicode characters.
384 } = call(stringIteratorNext
, baseIterator
, []);
386 // The base iterator has been exhausted.
387 return { value
: undefined, done
: true };
389 // The base iterator provided a character; yield the
391 const codepoint
= getCodepoint(character
, 0);
393 value
: this.#allowSurrogates
|| codepoint
<= 0xD7FF ||
402 // Should not be possible!
404 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
412 next
: stringCodeValueIteratorNext
,
413 } = StringCodeValueIterator
.prototype;
414 const stringCodeValueIteratorPrototype
= objectCreate(
420 value
: stringCodeValueIteratorNext
,
426 value
: "String Code Value Iterator",
431 const scalarValueIterablePrototype
= {
435 stringCodeValueIteratorNext
,
436 new StringCodeValueIterator(
437 call(stringIterator
, this.source
, []),
448 new StringCodeValueIterator(call(arrayIterator
, `${$}`, [])),
450 new StringCodeValueIterator(
451 call(stringIterator
, `${$}`, []),
455 new StringCodeValueIterator(
456 call(stringIterator
, `${$}`, []),
459 scalarValueString
: ($) =>
460 stringFromCodepoints(...objectCreate(
461 scalarValueIterablePrototype
,
462 { source
: { value
: `${$}` } },
468 * Returns an iterator over the codepoints in the string representation
469 * of the provided value according to the algorithm of
470 * `String::[Symbol.iterator]`.
472 export const characters
= makeCallable(
473 stringPrototype
[ITERATOR
],
477 * Returns the character at the provided position in the string
478 * representation of the provided value according to the algorithm of
479 * `String::codePointAt`.
481 export const getCharacter
= ($, pos
) => {
482 const codepoint
= getCodepoint($, pos
);
483 return codepoint
== null
485 : stringFromCodepoints(codepoint
);
489 * Returns the code unit at the provided position in the string
490 * representation of the provided value according to the algorithm of
493 export const getCodeUnit
= makeCallable(stringPrototype
.charCodeAt
);
496 * Returns the codepoint at the provided position in the string
497 * representation of the provided value according to the algorithm of
498 * `String::codePointAt`.
500 export const getCodepoint
= makeCallable(stringPrototype
.codePointAt
);
503 * Returns the index of the first occurrence of the search string in
504 * the string representation of the provided value according to the
505 * algorithm of `String::indexOf`.
507 export const getFirstSubstringIndex
= makeCallable(
508 stringPrototype
.indexOf
,
512 * Returns the index of the last occurrence of the search string in the
513 * string representation of the provided value according to the
514 * algorithm of `String::lastIndexOf`.
516 export const getLastSubstringIndex
= makeCallable(
517 stringPrototype
.lastIndexOf
,
521 * Returns the result of joining the provided iterable.
523 * If no separator is provided, it defaults to ",".
525 * If a value is nullish, it will be stringified as the empty string.
527 export const join
= (() => {
528 const { join
: arrayJoin
} = arrayPrototype
;
529 const join
= ($, separator
= ",") =>
530 call(arrayJoin
, [...$], [`${separator}`]);
536 * Returns a string created from the raw value of the tagged template
539 * ※ This is an alias for `String.raw`.
544 * Returns a string created from the provided code units.
546 * ※ This is an alias for `String.fromCharCode`.
548 fromCharCode
: stringFromCodeUnits
,
551 * Returns a string created from the provided codepoints.
553 * ※ This is an alias for `String.fromCodePoint`.
555 fromCodePoint
: stringFromCodepoints
,
559 * Returns the result of splitting the provided value on A·S·C·I·I
562 export const splitOnASCIIWhitespace
= ($) =>
563 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
566 * Returns the result of splitting the provided value on commas,
567 * trimming A·S·C·I·I whitespace from the resulting tokens.
569 export const splitOnCommas
= ($) =>
571 stripLeadingAndTrailingASCIIWhitespace(
574 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
582 * Returns the result of catenating the string representations of the
583 * provided values, returning a new string according to the algorithm
584 * of `String::concat`.
586 export const stringCatenate
= makeCallable(stringPrototype
.concat
);
589 * Returns whether the string representation of the provided value ends
590 * with the provided search string according to the algorithm of
591 * `String::endsWith`.
593 export const stringEndsWith
= makeCallable(stringPrototype
.endsWith
);
596 * Returns whether the string representation of the provided value
597 * contains the provided search string according to the algorithm of
598 * `String::includes`.
600 export const stringIncludes
= makeCallable(stringPrototype
.includes
);
603 * Returns the result of matching the string representation of the
604 * provided value with the provided matcher according to the algorithm
605 * of `String::match`.
607 export const stringMatch
= makeCallable(stringPrototype
.match
);
610 * Returns the result of matching the string representation of the
611 * provided value with the provided matcher according to the algorithm
612 * of `String::matchAll`.
614 export const stringMatchAll
= makeCallable(stringPrototype
.matchAll
);
617 * Returns the normalized form of the string representation of the
618 * provided value according to the algorithm of `String::matchAll`.
620 export const stringNormalize
= makeCallable(
621 stringPrototype
.normalize
,
625 * Returns the result of padding the end of the string representation
626 * of the provided value padded until it is the desired length
627 * according to the algorithm of `String::padEnd`.
629 export const stringPadEnd
= makeCallable(stringPrototype
.padEnd
);
632 * Returns the result of padding the start of the string representation
633 * of the provided value padded until it is the desired length
634 * according to the algorithm of `String::padStart`.
636 export const stringPadStart
= makeCallable(stringPrototype
.padStart
);
639 * Returns the result of repeating the string representation of the
640 * provided value the provided number of times according to the
641 * algorithm of `String::repeat`.
643 export const stringRepeat
= makeCallable(stringPrototype
.repeat
);
646 * Returns the result of replacing the string representation of the
647 * provided value with the provided replacement, using the provided
648 * matcher and according to the algorithm of `String::replace`.
650 export const stringReplace
= makeCallable(stringPrototype
.replace
);
653 * Returns the result of replacing the string representation of the
654 * provided value with the provided replacement, using the provided
655 * matcher and according to the algorithm of `String::replaceAll`.
657 export const stringReplaceAll
= makeCallable(
658 stringPrototype
.replaceAll
,
662 * Returns the result of searching the string representation of the
663 * provided value using the provided matcher and according to the
664 * algorithm of `String::search`.
666 export const stringSearch
= makeCallable(stringPrototype
.search
);
669 * Returns a slice of the string representation of the provided value
670 * according to the algorithm of `String::slice`.
672 export const stringSlice
= makeCallable(stringPrototype
.slice
);
675 * Returns the result of splitting of the string representation of the
676 * provided value on the provided separator according to the algorithm
677 * of `String::split`.
679 export const stringSplit
= makeCallable(stringPrototype
.split
);
682 * Returns whether the string representation of the provided value
683 * starts with the provided search string according to the algorithm of
684 * `String::startsWith`.
686 export const stringStartsWith
= makeCallable(
687 stringPrototype
.startsWith
,
691 * Returns the `[[StringData]]` of the provided value.
693 * ☡ This function will throw if the provided object does not have a
694 * `[[StringData]]` internal slot.
696 export const stringValue
= makeCallable(stringPrototype
.valueOf
);
699 * Returns the result of stripping leading and trailing A·S·C·I·I
700 * whitespace from the provided value and collapsing other A·S·C·I·I
701 * whitespace in the string representation of the provided value.
703 export const stripAndCollapseASCIIWhitespace
= ($) =>
704 stripLeadingAndTrailingASCIIWhitespace(
713 * Returns the result of stripping leading and trailing A·S·C·I·I
714 * whitespace from the string representation of the provided value.
716 export const stripLeadingAndTrailingASCIIWhitespace
= ($) =>
717 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
720 * Returns a substring of the string representation of the provided
721 * value according to the algorithm of `String::substring`.
723 export const substring
= makeCallable(stringPrototype
.substring
);
726 * Returns the result of converting the provided value to a string.
728 * ☡ This method throws for symbols and other objects without a string
731 export const toString
= ($) => `${$}`;