1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
10 import { bind
, call
, identity
, makeCallable
} from "./function.js";
13 getOwnPropertyDescriptors
,
18 import { type
} from "./value.js";
22 * A RegExp·like object which only matches entire strings, and may
23 * have additional constraints specified.
25 * Matchers are callable objects and will return true if they are
26 * called with a string that they match, and false otherwise.
27 * Matchers will always return false if called with nonstrings,
28 * although other methods like `exec` coerce their arguments and may
34 const { prototype: rePrototype
} = RE
;
35 const { exec
: reExec
, toString
: reToString
} = rePrototype
;
37 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
39 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
41 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
43 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
45 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
47 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
49 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
51 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
53 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
55 const Matcher
= class extends identity
{
60 * Constructs a new Matcher from the provided source.
62 * If the provided source is a regular expression, then it must
63 * have the unicode flag set. Otherwise, it is interpreted as the
64 * string source of a regular expression with the unicode flag set.
66 * Other flags are taken from the provided regular expression
67 * object, if any are present.
69 * A name for the matcher may be provided as the second argument.
71 * A callable constraint on acceptable inputs may be provided as a
72 * third argument. If provided, it will be called with three
73 * arguments whenever a match appears successful: first, the string
74 * being matched, second, the match result, and third, the Matcher
75 * object itself. If the return value of this call is falsey, then
76 * the match will be considered a failure.
78 * ☡ If the provided source regular expression uses nongreedy
79 * quantifiers, it may not match the whole string even if a match
80 * with the whole string is possible. Surround the regular
81 * expression with `^(?:` and `)$` if you don’t want nongreedy
82 * regular expressions to fail when shorter matches are possible.
84 constructor(source
, name
= undefined, constraint
= null) {
87 if (typeof $ !== "string") {
88 // The provided value is not a string.
91 // The provided value is a string. Set the `lastIndex` of
92 // the regular expression to 0 and see if the first attempt
93 // at a match matches the whole string and passes the
94 // provided constraint (if present).
96 const result
= call(reExec
, regExp
, [$]);
97 return result
?.[0] === $ &&
98 (constraint
=== null || constraint($, result
, this));
102 const regExp
= this.#regExp
= (() => {
104 call(reExec
, source
, [""]); // throws if source not a RegExp
106 return new RE(`${source}`, "u");
108 const unicode
= call(getUnicode
, source
, []);
110 // The provided regular expression does not have a unicode
113 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
116 // The provided regular expression has a unicode flag.
117 return new RE(source
);
120 if (constraint
!== null && typeof constraint
!== "function") {
122 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
125 this.#constraint
= constraint
;
126 return defineOwnProperties(
127 setPrototype(this, matcherPrototype
),
138 : `Matcher(${call(reToString, regExp, [])})`,
145 /** Gets whether the dotAll flag is present on this Matcher. */
147 return call(getDotAll
, this.#regExp
, []);
151 * Executes this Matcher on the provided value and returns the
152 * result if there is a match, or null otherwise.
154 * Matchers only match if they can match the entire value on the
157 * ☡ The match result returned by this method will be the same as
158 * that passed to the constraint function—and may have been
159 * modified by said function prior to being returned.
162 const regExp
= this.#regExp
;
163 const constraint
= this.#constraint
;
164 const string
= `${$}`;
165 regExp
.lastIndex
= 0;
166 const result
= call(reExec
, regExp
, [string
]);
168 result
?.[0] === string
&&
169 (constraint
=== null || constraint(string
, result
, this))
171 // The entire string was matched and the constraint, if
172 // present, returned a truthy value.
175 // The entire string was not matched or the constraint returned
182 * Gets the flags present on this Matcher.
184 * ※ This needs to be defined because the internal RegExp object
185 * may have flags which are not yet recognized by ♓🌟 Piscēs.
188 return call(getFlags
, this.#regExp
, []);
191 /** Gets whether the global flag is present on this Matcher. */
193 return call(getGlobal
, this.#regExp
, []);
196 /** Gets whether the hasIndices flag is present on this Matcher. */
198 return call(getHasIndices
, this.#regExp
, []);
201 /** Gets whether the ignoreCase flag is present on this Matcher. */
203 return call(getIgnoreCase
, this.#regExp
, []);
206 /** Gets whether the multiline flag is present on this Matcher. */
208 return call(getMultiline
, this.#regExp
, []);
211 /** Gets the regular expression source for this Matcher. */
213 return call(getSource
, this.#regExp
, []);
216 /** Gets whether the sticky flag is present on this Matcher. */
218 return call(getSticky
, this.#regExp
, []);
222 * Gets whether the unicode flag is present on this Matcher.
224 * ※ This will always be true.
227 return call(getUnicode
, this.#regExp
, []);
231 const matcherConstructor
= defineOwnProperties(
232 class extends RegExp
{
233 constructor(...args
) {
234 return new Matcher(...args
);
238 name
: { value
: "Matcher" },
239 length
: { value
: 1 },
242 const matcherPrototype
= defineOwnProperties(
243 matcherConstructor
.prototype,
244 getOwnPropertyDescriptors(Matcher
.prototype),
245 { constructor: { value
: matcherConstructor
} },
248 return { Matcher
: matcherConstructor
};
253 * Returns the result of converting the provided value to A·S·C·I·I
259 * Returns the result of converting the provided value to A·S·C·I·I
265 toLowerCase
: stringToLowercase
,
266 toUpperCase
: stringToUppercase
,
267 } = String
.prototype;
269 asciiLowercase
: ($) =>
273 makeCallable(stringToLowercase
),
275 asciiUppercase
: ($) =>
279 makeCallable(stringToUppercase
),
286 * Returns an iterator over the code units in the string
287 * representation of the provided value.
292 * Returns an iterator over the codepoints in the string
293 * representation of the provided value.
298 * Returns an iterator over the scalar values in the string
299 * representation of the provided value.
301 * Codepoints which are not valid Unicode scalar values are replaced
307 * Returns the result of converting the provided value to a string of
308 * scalar values by replacing (unpaired) surrogate values with
314 iterator
: iteratorSymbol
,
315 toStringTag
: toStringTagSymbol
,
317 const { [iteratorSymbol
]: arrayIterator
} = Array
.prototype;
318 const arrayIteratorPrototype
= Object
.getPrototypeOf(
319 [][iteratorSymbol
](),
321 const { next
: arrayIteratorNext
} = arrayIteratorPrototype
;
322 const iteratorPrototype
= Object
.getPrototypeOf(
323 arrayIteratorPrototype
,
325 const { [iteratorSymbol
]: stringIterator
} = String
.prototype;
326 const stringIteratorPrototype
= Object
.getPrototypeOf(
327 ""[iteratorSymbol
](),
329 const { next
: stringIteratorNext
} = stringIteratorPrototype
;
332 * An iterator object for iterating over code values (either code
333 * units or codepoints) in a string.
335 * ※ This class is not exposed, although its methods are (through
336 * the prototypes of string code value iterator objects).
338 const StringCodeValueIterator
= class extends identity
{
343 * Constructs a new string code value iterator from the provided
346 * If the provided base iterator is an array iterator, this is a
347 * code unit iterator. If the provided iterator is a string
348 * iterator and surrogates are allowed, this is a codepoint
349 * iterator. If the provided iterator is a string iterator and
350 * surrogates are not allowed, this is a scalar value iterator.
352 constructor(baseIterator
, allowSurrogates
= true) {
353 super(objectCreate(stringCodeValueIteratorPrototype
));
354 this.#allowSurrogates
= !!allowSurrogates
;
355 this.#baseIterator
= baseIterator
;
358 /** Provides the next code value in the iterator. */
360 const baseIterator
= this.#baseIterator
;
361 switch (getPrototype(baseIterator
)) {
362 case arrayIteratorPrototype
: {
363 // The base iterator is iterating over U·C·S characters.
367 } = call(arrayIteratorNext
, baseIterator
, []);
369 ? { value
: undefined, done
: true }
370 : { value
: getCodeUnit(ucsCharacter
, 0), done
: false };
372 case stringIteratorPrototype
: {
373 // The base iterator is iterating over Unicode characters.
377 } = call(stringIteratorNext
, baseIterator
, []);
379 // The base iterator has been exhausted.
380 return { value
: undefined, done
: true };
382 // The base iterator provided a character; yield the
384 const codepoint
= getCodepoint(character
, 0);
386 value
: this.#allowSurrogates
|| codepoint
<= 0xD7FF ||
395 // Should not be possible!
397 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
405 next
: stringCodeValueIteratorNext
,
406 } = StringCodeValueIterator
.prototype;
407 const stringCodeValueIteratorPrototype
= objectCreate(
413 value
: stringCodeValueIteratorNext
,
416 [toStringTagSymbol
]: {
419 value
: "String Code Value Iterator",
424 const scalarValueIterablePrototype
= {
428 stringCodeValueIteratorNext
,
429 new StringCodeValueIterator(
430 call(stringIterator
, this.source
, []),
441 new StringCodeValueIterator(call(arrayIterator
, `${$}`, [])),
443 new StringCodeValueIterator(
444 call(stringIterator
, `${$}`, []),
448 new StringCodeValueIterator(
449 call(stringIterator
, `${$}`, []),
452 scalarValueString
: ($) =>
453 stringFromCodepoints(...objectCreate(
454 scalarValueIterablePrototype
,
455 { source
: { value
: `${$}` } },
461 * Returns an iterator over the codepoints in the string representation
462 * of the provided value according to the algorithm of
463 * String::[Symbol.iterator].
465 export const characters
= makeCallable(
466 String
.prototype[Symbol
.iterator
],
470 * Returns the character at the provided position in the string
471 * representation of the provided value according to the algorithm of
472 * String::codePointAt.
474 export const getCharacter
= ($, pos
) => {
475 const codepoint
= getCodepoint($, pos
);
476 return codepoint
== null
478 : stringFromCodepoints(codepoint
);
482 * Returns the code unit at the provided position in the string
483 * representation of the provided value according to the algorithm of
486 export const getCodeUnit
= makeCallable(String
.prototype.charCodeAt
);
489 * Returns the codepoint at the provided position in the string
490 * representation of the provided value according to the algorithm of
491 * String::codePointAt.
493 export const getCodepoint
= makeCallable(String
.prototype.codePointAt
);
496 * Returns the index of the first occurrence of the search string in
497 * the string representation of the provided value according to the
498 * algorithm of String::indexOf.
500 export const getFirstSubstringIndex
= makeCallable(
501 String
.prototype.indexOf
,
505 * Returns the index of the last occurrence of the search string in the
506 * string representation of the provided value according to the
507 * algorithm of String::lastIndexOf.
509 export const getLastSubstringIndex
= makeCallable(
510 String
.prototype.lastIndexOf
,
514 * Returns the result of joining the provided iterable.
516 * If no separator is provided, it defaults to ",".
518 * If a value is nullish, it will be stringified as the empty string.
520 export const join
= (() => {
521 const { join
: arrayJoin
} = Array
.prototype;
522 const join
= ($, separator
= ",") =>
523 call(arrayJoin
, [...$], [`${separator}`]);
529 * Returns a string created from the raw value of the tagged template
532 * ※ This is an alias for String.raw.
537 * Returns a string created from the provided code units.
539 * ※ This is an alias for String.fromCharCode.
541 fromCharCode
: stringFromCodeUnits
,
544 * Returns a string created from the provided codepoints.
546 * ※ This is an alias for String.fromCodePoint.
548 fromCodePoint
: stringFromCodepoints
,
552 * Returns the result of splitting the provided value on A·S·C·I·I
555 export const splitOnASCIIWhitespace
= ($) =>
556 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
559 * Returns the result of splitting the provided value on commas,
560 * trimming A·S·C·I·I whitespace from the resulting tokens.
562 export const splitOnCommas
= ($) =>
564 stripLeadingAndTrailingASCIIWhitespace(
567 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
575 * Returns the result of catenating the string representations of the
576 * provided values, returning a new string according to the algorithm
579 export const stringCatenate
= makeCallable(String
.prototype.concat
);
582 * Returns whether the string representation of the provided value ends
583 * with the provided search string according to the algorithm of
586 export const stringEndsWith
= makeCallable(String
.prototype.endsWith
);
589 * Returns whether the string representation of the provided value
590 * contains the provided search string according to the algorithm of
593 export const stringIncludes
= makeCallable(String
.prototype.includes
);
596 * Returns the result of matching the string representation of the
597 * provided value with the provided matcher according to the algorithm
600 export const stringMatch
= makeCallable(String
.prototype.match
);
603 * Returns the result of matching the string representation of the
604 * provided value with the provided matcher according to the algorithm
605 * of String::matchAll.
607 export const stringMatchAll
= makeCallable(String
.prototype.matchAll
);
610 * Returns the normalized form of the string representation of the
611 * provided value according to the algorithm of String::matchAll.
613 export const stringNormalize
= makeCallable(
614 String
.prototype.normalize
,
618 * Returns the result of padding the end of the string representation
619 * of the provided value padded until it is the desired length
620 * according to the algorithm of String::padEnd.
622 export const stringPadEnd
= makeCallable(String
.prototype.padEnd
);
625 * Returns the result of padding the start of the string representation
626 * of the provided value padded until it is the desired length
627 * according to the algorithm of String::padStart.
629 export const stringPadStart
= makeCallable(String
.prototype.padStart
);
632 * Returns the result of repeating the string representation of the
633 * provided value the provided number of times according to the
634 * algorithm of String::repeat.
636 export const stringRepeat
= makeCallable(String
.prototype.repeat
);
639 * Returns the result of replacing the string representation of the
640 * provided value with the provided replacement, using the provided
641 * matcher and according to the algorithm of String::replace.
643 export const stringReplace
= makeCallable(String
.prototype.replace
);
646 * Returns the result of replacing the string representation of the
647 * provided value with the provided replacement, using the provided
648 * matcher and according to the algorithm of String::replaceAll.
650 export const stringReplaceAll
= makeCallable(
651 String
.prototype.replaceAll
,
655 * Returns the result of searching the string representation of the
656 * provided value using the provided matcher and according to the
657 * algorithm of String::search.
659 export const stringSearch
= makeCallable(String
.prototype.search
);
662 * Returns a slice of the string representation of the provided value
663 * according to the algorithm of String::slice.
665 export const stringSlice
= makeCallable(String
.prototype.slice
);
668 * Returns the result of splitting of the string representation of the
669 * provided value on the provided separator according to the algorithm
672 export const stringSplit
= makeCallable(String
.prototype.split
);
675 * Returns whether the string representation of the provided value
676 * starts with the provided search string according to the algorithm of
677 * String::startsWith.
679 export const stringStartsWith
= makeCallable(
680 String
.prototype.startsWith
,
684 * Returns the `[[StringData]]` of the provided value.
686 * ☡ This function will throw if the provided object does not have a
687 * `[[StringData]]` internal slot.
689 export const stringValue
= makeCallable(String
.prototype.valueOf
);
692 * Returns the result of stripping leading and trailing A·S·C·I·I
693 * whitespace from the provided value and collapsing other A·S·C·I·I
694 * whitespace in the string representation of the provided value.
696 export const stripAndCollapseASCIIWhitespace
= ($) =>
697 stripLeadingAndTrailingASCIIWhitespace(
706 * Returns the result of stripping leading and trailing A·S·C·I·I
707 * whitespace from the string representation of the provided value.
709 export const stripLeadingAndTrailingASCIIWhitespace
= (() => {
710 const { exec
: reExec
} = RegExp
.prototype;
712 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
716 * Returns a substring of the string representation of the provided
717 * value according to the algorithm of String::substring.
719 export const substring
= makeCallable(String
.prototype.substring
);
722 * Returns the result of converting the provided value to a string.
724 * ☡ This method throws for symbols and other objects without a string
727 export const toString
= ($) => `${$}`;