1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
10 import { bind
, call
, identity
, makeCallable
} from "./function.js";
20 * A RegExp·like object which only matches entire strings, and may
21 * have additional constraints specified.
23 * Matchers are callable objects and will return true if they are
24 * called with a string that they match, and false otherwise.
25 * Matchers will always return false if called with nonstrings,
26 * although other methods like `exec` coerce their arguments and may
32 const { prototype: rePrototype
} = RE
;
33 const { exec
: reExec
, toString
: reToString
} = rePrototype
;
35 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
37 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
39 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
41 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
43 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
45 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
47 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
49 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
51 const Matcher
= class extends identity
{
56 * Constructs a new Matcher from the provided source.
58 * If the provided source is a regular expression, then it must
59 * have the unicode flag set. Otherwise, it is interpreted as the
60 * string source of a regular expression with the unicode flag set.
62 * Other flags are taken from the provided regular expression
63 * object, if any are present.
65 * A name for the matcher may be provided as the second argument.
67 * A callable constraint on acceptable inputs may be provided as a
68 * third argument. If provided, it will be called with two
69 * arguments whenever a match appears successful: first, the string
70 * being matched, and second, the Matcher object itself. If the
71 * return value of this call is falsey, then the match will be
72 * considered a failure.
74 * ☡ If the provided source regular expression uses nongreedy
75 * quantifiers, it may not match the whole string even if a match
76 * with the whole string is possible. Surround the regular
77 * expression with `^(?:` and `)$` if you don’t want nongreedy
78 * regular expressions to fail when shorter matches are possible.
80 constructor(source
, name
= undefined, constraint
= null) {
83 if (typeof $ !== "string") {
84 // The provided value is not a string.
87 // The provided value is a string. Set the `lastIndex` of
88 // the regular expression to 0 and see if the first attempt
89 // at a match matches the whole string and passes the
90 // provided constraint (if present).
92 return call(reExec
, regExp
, [$])?.[0] === $ &&
93 (constraint
=== null || constraint($, this));
97 const regExp
= this.#regExp
= (() => {
99 call(reExec
, source
, [""]); // throws if source not a RegExp
101 return new RE(`${source}`, "u");
103 const unicode
= call(getUnicode
, source
, []);
105 // The provided regular expression does not have a unicode
108 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
111 // The provided regular expression has a unicode flag.
112 return new RE(source
);
115 if (constraint
!== null && typeof constraint
!== "function") {
117 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
120 this.#constraint
= constraint
;
121 return defineOwnProperties(
122 setPrototype(this, matcherPrototype
),
133 : `Matcher(${call(reToString, regExp, [])})`,
140 /** Gets whether the dotAll flag is present on this Matcher. */
142 return call(getDotAll
, this.#regExp
, []);
146 * Executes this Matcher on the provided value and returns the
147 * result if there is a match, or null otherwise.
149 * Matchers only match if they can match the entire value on the
153 const regExp
= this.#regExp
;
154 const constraint
= this.#constraint
;
155 const string
= `${$}`;
156 regExp
.lastIndex
= 0;
157 const result
= call(reExec
, regExp
, [string
]);
159 result
?.[0] === string
&&
160 (constraint
=== null || constraint(string
, this))
162 // The entire string was matched and the constraint, if
163 // present, returned a truthy value.
166 // The entire string was not matched or the constraint returned
172 /** Gets whether the global flag is present on this Matcher. */
174 return call(getGlobal
, this.#regExp
, []);
177 /** Gets whether the hasIndices flag is present on this Matcher. */
179 return call(getHasIndices
, this.#regExp
, []);
182 /** Gets whether the ignoreCase flag is present on this Matcher. */
184 return call(getIgnoreCase
, this.#regExp
, []);
187 /** Gets whether the multiline flag is present on this Matcher. */
189 return call(getMultiline
, this.#regExp
, []);
192 /** Gets the regular expression source for this Matcher. */
194 return call(getSource
, this.#regExp
, []);
197 /** Gets whether the sticky flag is present on this Matcher. */
199 return call(getSticky
, this.#regExp
, []);
203 * Gets whether the unicode flag is present on this Matcher.
205 * ※ This will always be true.
208 return call(getUnicode
, this.#regExp
, []);
211 const matcherPrototype
= setPrototype(
221 * Returns the result of converting the provided value to A·S·C·I·I
227 * Returns the result of converting the provided value to A·S·C·I·I
233 toLowerCase
: stringToLowercase
,
234 toUpperCase
: stringToUppercase
,
235 } = String
.prototype;
237 asciiLowercase
: ($) =>
241 makeCallable(stringToLowercase
),
243 asciiUppercase
: ($) =>
247 makeCallable(stringToUppercase
),
254 * Returns an iterator over the code units in the string
255 * representation of the provided value.
260 * Returns an iterator over the codepoints in the string
261 * representation of the provided value.
266 * Returns an iterator over the scalar values in the string
267 * representation of the provided value.
269 * Codepoints which are not valid Unicode scalar values are replaced
275 * Returns the result of converting the provided value to a string of
276 * scalar values by replacing (unpaired) surrogate values with
282 iterator
: iteratorSymbol
,
283 toStringTag
: toStringTagSymbol
,
285 const { [iteratorSymbol
]: arrayIterator
} = Array
.prototype;
286 const arrayIteratorPrototype
= Object
.getPrototypeOf(
287 [][iteratorSymbol
](),
289 const { next
: arrayIteratorNext
} = arrayIteratorPrototype
;
290 const iteratorPrototype
= Object
.getPrototypeOf(
291 arrayIteratorPrototype
,
293 const { [iteratorSymbol
]: stringIterator
} = String
.prototype;
294 const stringIteratorPrototype
= Object
.getPrototypeOf(
295 ""[iteratorSymbol
](),
297 const { next
: stringIteratorNext
} = stringIteratorPrototype
;
300 * An iterator object for iterating over code values (either code
301 * units or codepoints) in a string.
303 * ※ This class is not exposed, although its methods are (through
304 * the prototypes of string code value iterator objects).
306 const StringCodeValueIterator
= class extends identity
{
311 * Constructs a new string code value iterator from the provided
314 * If the provided base iterator is an array iterator, this is a
315 * code unit iterator. If the provided iterator is a string
316 * iterator and surrogates are allowed, this is a codepoint
317 * iterator. If the provided iterator is a string iterator and
318 * surrogates are not allowed, this is a scalar value iterator.
320 constructor(baseIterator
, allowSurrogates
= true) {
321 super(objectCreate(stringCodeValueIteratorPrototype
));
322 this.#allowSurrogates
= !!allowSurrogates
;
323 this.#baseIterator
= baseIterator
;
326 /** Provides the next code value in the iterator. */
328 const baseIterator
= this.#baseIterator
;
329 switch (getPrototype(baseIterator
)) {
330 case arrayIteratorPrototype
: {
331 // The base iterator is iterating over U·C·S characters.
335 } = call(arrayIteratorNext
, baseIterator
, []);
337 ? { value
: undefined, done
: true }
338 : { value
: getCodeUnit(ucsCharacter
, 0), done
: false };
340 case stringIteratorPrototype
: {
341 // The base iterator is iterating over Unicode characters.
345 } = call(stringIteratorNext
, baseIterator
, []);
347 // The base iterator has been exhausted.
348 return { value
: undefined, done
: true };
350 // The base iterator provided a character; yield the
352 const codepoint
= getCodepoint(character
, 0);
354 value
: this.#allowSurrogates
|| codepoint
<= 0xD7FF ||
363 // Should not be possible!
365 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
373 next
: stringCodeValueIteratorNext
,
374 } = StringCodeValueIterator
.prototype;
375 const stringCodeValueIteratorPrototype
= objectCreate(
381 value
: stringCodeValueIteratorNext
,
384 [toStringTagSymbol
]: {
387 value
: "String Code Value Iterator",
392 const scalarValueIterablePrototype
= {
396 stringCodeValueIteratorNext
,
397 new StringCodeValueIterator(
398 call(stringIterator
, this.source
, []),
409 new StringCodeValueIterator(call(arrayIterator
, `${$}`, [])),
411 new StringCodeValueIterator(
412 call(stringIterator
, `${$}`, []),
416 new StringCodeValueIterator(
417 call(stringIterator
, `${$}`, []),
420 scalarValueString
: ($) =>
421 stringFromCodepoints(...objectCreate(
422 scalarValueIterablePrototype
,
423 { source
: { value
: `${$}` } },
429 * Returns an iterator over the codepoints in the string representation
430 * of the provided value according to the algorithm of
431 * String::[Symbol.iterator].
433 export const characters
= makeCallable(
434 String
.prototype[Symbol
.iterator
],
438 * Returns the character at the provided position in the string
439 * representation of the provided value according to the algorithm of
440 * String::codePointAt.
442 export const getCharacter
= ($, pos
) => {
443 const codepoint
= getCodepoint($, pos
);
444 return codepoint
== null
446 : stringFromCodepoints(codepoint
);
450 * Returns the code unit at the provided position in the string
451 * representation of the provided value according to the algorithm of
454 export const getCodeUnit
= makeCallable(String
.prototype.charCodeAt
);
457 * Returns the codepoint at the provided position in the string
458 * representation of the provided value according to the algorithm of
459 * String::codePointAt.
461 export const getCodepoint
= makeCallable(String
.prototype.codePointAt
);
464 * Returns the index of the first occurrence of the search string in
465 * the string representation of the provided value according to the
466 * algorithm of String::indexOf.
468 export const getFirstSubstringIndex
= makeCallable(
469 String
.prototype.indexOf
,
473 * Returns the index of the last occurrence of the search string in the
474 * string representation of the provided value according to the
475 * algorithm of String::lastIndexOf.
477 export const getLastSubstringIndex
= makeCallable(
478 String
.prototype.lastIndexOf
,
482 * Returns the result of joining the provided iterable.
484 * If no separator is provided, it defaults to ",".
486 * If a value is nullish, it will be stringified as the empty string.
488 export const join
= (() => {
489 const { join
: arrayJoin
} = Array
.prototype;
490 const join
= ($, separator
= ",") =>
491 call(arrayJoin
, [...$], [`${separator}`]);
497 * Returns a string created from the raw value of the tagged template
500 * ※ This is an alias for String.raw.
505 * Returns a string created from the provided code units.
507 * ※ This is an alias for String.fromCharCode.
509 fromCharCode
: stringFromCodeUnits
,
512 * Returns a string created from the provided codepoints.
514 * ※ This is an alias for String.fromCodePoint.
516 fromCodePoint
: stringFromCodepoints
,
520 * Returns the result of splitting the provided value on A·S·C·I·I
523 export const splitOnASCIIWhitespace
= ($) =>
524 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
527 * Returns the result of splitting the provided value on commas,
528 * trimming A·S·C·I·I whitespace from the resulting tokens.
530 export const splitOnCommas
= ($) =>
532 stripLeadingAndTrailingASCIIWhitespace(
535 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
543 * Returns the result of catenating the string representations of the
544 * provided values, returning a new string according to the algorithm
547 export const stringCatenate
= makeCallable(String
.prototype.concat
);
550 * Returns whether the string representation of the provided value ends
551 * with the provided search string according to the algorithm of
554 export const stringEndsWith
= makeCallable(String
.prototype.endsWith
);
557 * Returns whether the string representation of the provided value
558 * contains the provided search string according to the algorithm of
561 export const stringIncludes
= makeCallable(String
.prototype.includes
);
564 * Returns the result of matching the string representation of the
565 * provided value with the provided matcher according to the algorithm
568 export const stringMatch
= makeCallable(String
.prototype.match
);
571 * Returns the result of matching the string representation of the
572 * provided value with the provided matcher according to the algorithm
573 * of String::matchAll.
575 export const stringMatchAll
= makeCallable(String
.prototype.matchAll
);
578 * Returns the normalized form of the string representation of the
579 * provided value according to the algorithm of String::matchAll.
581 export const stringNormalize
= makeCallable(
582 String
.prototype.normalize
,
586 * Returns the result of padding the end of the string representation
587 * of the provided value padded until it is the desired length
588 * according to the algorithm of String::padEnd.
590 export const stringPadEnd
= makeCallable(String
.prototype.padEnd
);
593 * Returns the result of padding the start of the string representation
594 * of the provided value padded until it is the desired length
595 * according to the algorithm of String::padStart.
597 export const stringPadStart
= makeCallable(String
.prototype.padStart
);
600 * Returns the result of repeating the string representation of the
601 * provided value the provided number of times according to the
602 * algorithm of String::repeat.
604 export const stringRepeat
= makeCallable(String
.prototype.repeat
);
607 * Returns the result of replacing the string representation of the
608 * provided value with the provided replacement, using the provided
609 * matcher and according to the algorithm of String::replace.
611 export const stringReplace
= makeCallable(String
.prototype.replace
);
614 * Returns the result of replacing the string representation of the
615 * provided value with the provided replacement, using the provided
616 * matcher and according to the algorithm of String::replaceAll.
618 export const stringReplaceAll
= makeCallable(
619 String
.prototype.replaceAll
,
623 * Returns the result of searching the string representation of the
624 * provided value using the provided matcher and according to the
625 * algorithm of String::search.
627 export const stringSearch
= makeCallable(String
.prototype.search
);
630 * Returns a slice of the string representation of the provided value
631 * according to the algorithm of String::slice.
633 export const stringSlice
= makeCallable(String
.prototype.slice
);
636 * Returns the result of splitting of the string representation of the
637 * provided value on the provided separator according to the algorithm
640 export const stringSplit
= makeCallable(String
.prototype.split
);
643 * Returns whether the string representation of the provided value
644 * starts with the provided search string according to the algorithm of
645 * String::startsWith.
647 export const stringStartsWith
= makeCallable(
648 String
.prototype.startsWith
,
652 * Returns the `[[StringData]]` of the provided value.
654 * ☡ This function will throw if the provided object does not have a
655 * `[[StringData]]` internal slot.
657 export const stringValue
= makeCallable(String
.prototype.valueOf
);
660 * Returns the result of stripping leading and trailing A·S·C·I·I
661 * whitespace from the provided value and collapsing other A·S·C·I·I
662 * whitespace in the string representation of the provided value.
664 export const stripAndCollapseASCIIWhitespace
= ($) =>
665 stripLeadingAndTrailingASCIIWhitespace(
674 * Returns the result of stripping leading and trailing A·S·C·I·I
675 * whitespace from the string representation of the provided value.
677 export const stripLeadingAndTrailingASCIIWhitespace
= (() => {
678 const { exec
: reExec
} = RegExp
.prototype;
680 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
684 * Returns a substring of the string representation of the provided
685 * value according to the algorithm of String::substring.
687 export const substring
= makeCallable(String
.prototype.substring
);
690 * Returns the result of converting the provided value to a string.
692 * ☡ This method throws for symbols and other objects without a string
695 export const toString
= ($) => `${$}`;