1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
10 import { bind
, call
, identity
, makeCallable
} from "./function.js";
12 arrayIteratorFunction
,
13 stringIteratorFunction
,
14 } from "./iterable.js";
17 getOwnPropertyDescriptors
,
22 import { ITERATOR
} from "./value.js";
25 const { prototype: rePrototype
} = RE
;
26 const { prototype: arrayPrototype
} = Array
;
27 const { prototype: stringPrototype
} = String
;
29 const { exec
: reExec
} = rePrototype
;
33 * A `RegExp`like object which only matches entire strings, and may
34 * have additional constraints specified.
36 * Matchers are callable objects and will return true if they are
37 * called with a string that they match, and false otherwise.
38 * Matchers will always return false if called with nonstrings,
39 * although other methods like `::exec` coerce their arguments and
40 * may still return true.
44 const { toString
: reToString
} = rePrototype
;
46 Object
.getOwnPropertyDescriptor(rePrototype
, "dotAll").get;
48 Object
.getOwnPropertyDescriptor(rePrototype
, "flags").get;
50 Object
.getOwnPropertyDescriptor(rePrototype
, "global").get;
52 Object
.getOwnPropertyDescriptor(rePrototype
, "hasIndices").get;
54 Object
.getOwnPropertyDescriptor(rePrototype
, "ignoreCase").get;
56 Object
.getOwnPropertyDescriptor(rePrototype
, "multiline").get;
58 Object
.getOwnPropertyDescriptor(rePrototype
, "source").get;
60 Object
.getOwnPropertyDescriptor(rePrototype
, "sticky").get;
62 Object
.getOwnPropertyDescriptor(rePrototype
, "unicode").get;
64 const Matcher
= class extends identity
{
69 * Constructs a new `Matcher` from the provided source.
71 * If the provided source is a regular expression, then it must
72 * have the unicode flag set. Otherwise, it is interpreted as the
73 * string source of a regular expression with the unicode flag set.
75 * Other flags are taken from the provided regular expression
76 * object, if any are present.
78 * A name for the matcher may be provided as the second argument.
80 * A callable constraint on acceptable inputs may be provided as a
81 * third argument. If provided, it will be called with three
82 * arguments whenever a match appears successful: first, the string
83 * being matched, second, the match result, and third, the
84 * `Matcher` object itself. If the return value of this call is
85 * falsey, then the match will be considered a failure.
87 * ☡ If the provided source regular expression uses nongreedy
88 * quantifiers, it may not match the whole string even if a match
89 * with the whole string is possible. Surround the regular
90 * expression with `^(?:` and `)$` if you don’t want nongreedy
91 * regular expressions to fail when shorter matches are possible.
93 constructor(source
, name
= undefined, constraint
= null) {
96 if (typeof $ !== "string") {
97 // The provided value is not a string.
100 // The provided value is a string. Set the `.lastIndex` of
101 // the regular expression to 0 and see if the first attempt
102 // at a match matches the whole string and passes the
103 // provided constraint (if present).
104 regExp
.lastIndex
= 0;
105 const result
= call(reExec
, regExp
, [$]);
106 return result
?.[0] === $ &&
107 (constraint
=== null || constraint($, result
, this));
111 const regExp
= this.#regExp
= (() => {
113 call(reExec
, source
, [""]); // throws if source not a RegExp
115 return new RE(`${source}`, "u");
117 const unicode
= call(getUnicode
, source
, []);
119 // The provided regular expression does not have a unicode
122 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
125 // The provided regular expression has a unicode flag.
126 return new RE(source
);
129 if (constraint
!== null && typeof constraint
!== "function") {
131 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
134 this.#constraint
= constraint
;
135 return defineOwnProperties(
136 setPrototype(this, matcherPrototype
),
147 : `Matcher(${call(reToString, regExp, [])})`,
154 /** Gets whether the dot‐all flag is present on this `Matcher`. */
156 return call(getDotAll
, this.#regExp
, []);
160 * Executes this `Matcher` on the provided value and returns the
161 * result if there is a match, or null otherwise.
163 * Matchers only match if they can match the entire value on the
166 * ☡ The match result returned by this method will be the same as
167 * that passed to the constraint function—and may have been
168 * modified by said function prior to being returned.
171 const regExp
= this.#regExp
;
172 const constraint
= this.#constraint
;
173 const string
= `${$}`;
174 regExp
.lastIndex
= 0;
175 const result
= call(reExec
, regExp
, [string
]);
177 result
?.[0] === string
&&
178 (constraint
=== null || constraint(string
, result
, this))
180 // The entire string was matched and the constraint, if
181 // present, returned a truthy value.
184 // The entire string was not matched or the constraint returned
191 * Gets the flags present on this `Matcher`.
193 * ※ This needs to be defined because the internal `RegExp` object
194 * may have flags which are not yet recognized by ♓🌟 Piscēs.
197 return call(getFlags
, this.#regExp
, []);
200 /** Gets whether the global flag is present on this `Matcher`. */
202 return call(getGlobal
, this.#regExp
, []);
206 * Gets whether the has‐indices flag is present on this `Matcher`.
209 return call(getHasIndices
, this.#regExp
, []);
213 * Gets whether the ignore‐case flag is present on this `Matcher`.
216 return call(getIgnoreCase
, this.#regExp
, []);
220 * Gets whether the multiline flag is present on this `Matcher`.
223 return call(getMultiline
, this.#regExp
, []);
226 /** Gets the regular expression source for this `Matcher`. */
228 return call(getSource
, this.#regExp
, []);
231 /** Gets whether the sticky flag is present on this `Matcher`. */
233 return call(getSticky
, this.#regExp
, []);
237 * Gets whether the unicode flag is present on this `Matcher`.
239 * ※ This will always be true.
242 return call(getUnicode
, this.#regExp
, []);
246 const matcherConstructor
= defineOwnProperties(
247 class extends RegExp
{
248 constructor(...args
) {
249 return new Matcher(...args
);
253 name
: { value
: "Matcher" },
254 length
: { value
: 1 },
257 const matcherPrototype
= defineOwnProperties(
258 matcherConstructor
.prototype,
259 getOwnPropertyDescriptors(Matcher
.prototype),
260 { constructor: { value
: matcherConstructor
} },
263 return { Matcher
: matcherConstructor
};
268 * Returns the result of converting the provided value to A·S·C·I·I
274 * Returns the result of converting the provided value to A·S·C·I·I
280 toLowerCase
: stringToLowercase
,
281 toUpperCase
: stringToUppercase
,
284 asciiLowercase
: ($) =>
288 makeCallable(stringToLowercase
),
290 asciiUppercase
: ($) =>
294 makeCallable(stringToUppercase
),
301 * Returns an iterator over the code units in the string
302 * representation of the provided value.
307 * Returns an iterator over the codepoints in the string
308 * representation of the provided value.
313 * Returns an iterator over the scalar values in the string
314 * representation of the provided value.
316 * Codepoints which are not valid Unicode scalar values are replaced
322 * Returns the result of converting the provided value to a string of
323 * scalar values by replacing (unpaired) surrogate values with
328 const generateCodeUnits
= function* (ucsCharacter
) {
329 yield getCodeUnit(ucsCharacter
, 0);
331 const generateCodepoints
= function* (character
) {
332 const { allowSurrogates
} = this;
333 const codepoint
= getCodepoint(character
, 0);
334 yield allowSurrogates
|| codepoint
<= 0xD7FF || codepoint
>= 0xE000
339 const codeUnitsIterator
= arrayIteratorFunction(
341 "String Code Unit Iterator",
343 const codepointsIterator
= stringIteratorFunction(
344 bind(generateCodepoints
, { allowSurrogates
: true }, []),
345 "String Codepoint Iterator",
347 const scalarValuesIterator
= stringIteratorFunction(
348 bind(generateCodepoints
, { allowSurrogates
: false }, []),
349 "String Scalar Value Iterator",
352 next
: scalarValuesNext
,
353 } = getPrototype(scalarValuesIterator(""));
354 const scalarValueIterablePrototype
= {
359 scalarValuesIterator(this.source
),
367 codeUnits
: ($) => codeUnitsIterator(`${$}`),
368 codepoints
: ($) => codepointsIterator(`${$}`),
369 scalarValues
: ($) => scalarValuesIterator(`${$}`),
370 scalarValueString
: ($) =>
371 stringFromCodepoints(...objectCreate(
372 scalarValueIterablePrototype
,
373 { source
: { value
: `${$}` } },
379 * Returns an iterator over the codepoints in the string representation
380 * of the provided value according to the algorithm of
381 * `String::[Symbol.iterator]`.
383 export const characters
= makeCallable(
384 stringPrototype
[ITERATOR
],
388 * Returns the character at the provided position in the string
389 * representation of the provided value according to the algorithm of
390 * `String::codePointAt`.
392 export const getCharacter
= ($, pos
) => {
393 const codepoint
= getCodepoint($, pos
);
394 return codepoint
== null
396 : stringFromCodepoints(codepoint
);
400 * Returns the code unit at the provided position in the string
401 * representation of the provided value according to the algorithm of
404 export const getCodeUnit
= makeCallable(stringPrototype
.charCodeAt
);
407 * Returns the codepoint at the provided position in the string
408 * representation of the provided value according to the algorithm of
409 * `String::codePointAt`.
411 export const getCodepoint
= makeCallable(stringPrototype
.codePointAt
);
414 * Returns the index of the first occurrence of the search string in
415 * the string representation of the provided value according to the
416 * algorithm of `String::indexOf`.
418 export const getFirstSubstringIndex
= makeCallable(
419 stringPrototype
.indexOf
,
423 * Returns the index of the last occurrence of the search string in the
424 * string representation of the provided value according to the
425 * algorithm of `String::lastIndexOf`.
427 export const getLastSubstringIndex
= makeCallable(
428 stringPrototype
.lastIndexOf
,
432 * Returns the result of joining the provided iterable.
434 * If no separator is provided, it defaults to ",".
436 * If a value is nullish, it will be stringified as the empty string.
438 export const join
= (() => {
439 const { join
: arrayJoin
} = arrayPrototype
;
440 const join
= ($, separator
= ",") =>
441 call(arrayJoin
, [...$], [`${separator}`]);
447 * Returns a string created from the raw value of the tagged template
450 * ※ This is an alias for `String.raw`.
455 * Returns a string created from the provided code units.
457 * ※ This is an alias for `String.fromCharCode`.
459 fromCharCode
: stringFromCodeUnits
,
462 * Returns a string created from the provided codepoints.
464 * ※ This is an alias for `String.fromCodePoint`.
466 fromCodePoint
: stringFromCodepoints
,
470 * Returns the result of splitting the provided value on A·S·C·I·I
473 export const splitOnASCIIWhitespace
= ($) =>
474 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
477 * Returns the result of splitting the provided value on commas,
478 * trimming A·S·C·I·I whitespace from the resulting tokens.
480 export const splitOnCommas
= ($) =>
482 stripLeadingAndTrailingASCIIWhitespace(
485 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
493 * Returns the result of catenating the string representations of the
494 * provided values, returning a new string according to the algorithm
495 * of `String::concat`.
497 export const stringCatenate
= makeCallable(stringPrototype
.concat
);
500 * Returns whether the string representation of the provided value ends
501 * with the provided search string according to the algorithm of
502 * `String::endsWith`.
504 export const stringEndsWith
= makeCallable(stringPrototype
.endsWith
);
507 * Returns whether the string representation of the provided value
508 * contains the provided search string according to the algorithm of
509 * `String::includes`.
511 export const stringIncludes
= makeCallable(stringPrototype
.includes
);
514 * Returns the result of matching the string representation of the
515 * provided value with the provided matcher according to the algorithm
516 * of `String::match`.
518 export const stringMatch
= makeCallable(stringPrototype
.match
);
521 * Returns the result of matching the string representation of the
522 * provided value with the provided matcher according to the algorithm
523 * of `String::matchAll`.
525 export const stringMatchAll
= makeCallable(stringPrototype
.matchAll
);
528 * Returns the normalized form of the string representation of the
529 * provided value according to the algorithm of `String::matchAll`.
531 export const stringNormalize
= makeCallable(
532 stringPrototype
.normalize
,
536 * Returns the result of padding the end of the string representation
537 * of the provided value padded until it is the desired length
538 * according to the algorithm of `String::padEnd`.
540 export const stringPadEnd
= makeCallable(stringPrototype
.padEnd
);
543 * Returns the result of padding the start of the string representation
544 * of the provided value padded until it is the desired length
545 * according to the algorithm of `String::padStart`.
547 export const stringPadStart
= makeCallable(stringPrototype
.padStart
);
550 * Returns the result of repeating the string representation of the
551 * provided value the provided number of times according to the
552 * algorithm of `String::repeat`.
554 export const stringRepeat
= makeCallable(stringPrototype
.repeat
);
557 * Returns the result of replacing the string representation of the
558 * provided value with the provided replacement, using the provided
559 * matcher and according to the algorithm of `String::replace`.
561 export const stringReplace
= makeCallable(stringPrototype
.replace
);
564 * Returns the result of replacing the string representation of the
565 * provided value with the provided replacement, using the provided
566 * matcher and according to the algorithm of `String::replaceAll`.
568 export const stringReplaceAll
= makeCallable(
569 stringPrototype
.replaceAll
,
573 * Returns the result of searching the string representation of the
574 * provided value using the provided matcher and according to the
575 * algorithm of `String::search`.
577 export const stringSearch
= makeCallable(stringPrototype
.search
);
580 * Returns a slice of the string representation of the provided value
581 * according to the algorithm of `String::slice`.
583 export const stringSlice
= makeCallable(stringPrototype
.slice
);
586 * Returns the result of splitting of the string representation of the
587 * provided value on the provided separator according to the algorithm
588 * of `String::split`.
590 export const stringSplit
= makeCallable(stringPrototype
.split
);
593 * Returns whether the string representation of the provided value
594 * starts with the provided search string according to the algorithm of
595 * `String::startsWith`.
597 export const stringStartsWith
= makeCallable(
598 stringPrototype
.startsWith
,
602 * Returns the `[[StringData]]` of the provided value.
604 * ☡ This function will throw if the provided object does not have a
605 * `[[StringData]]` internal slot.
607 export const stringValue
= makeCallable(stringPrototype
.valueOf
);
610 * Returns the result of stripping leading and trailing A·S·C·I·I
611 * whitespace from the provided value and collapsing other A·S·C·I·I
612 * whitespace in the string representation of the provided value.
614 export const stripAndCollapseASCIIWhitespace
= ($) =>
615 stripLeadingAndTrailingASCIIWhitespace(
624 * Returns the result of stripping leading and trailing A·S·C·I·I
625 * whitespace from the string representation of the provided value.
627 export const stripLeadingAndTrailingASCIIWhitespace
= ($) =>
628 call(reExec
, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
631 * Returns a substring of the string representation of the provided
632 * value according to the algorithm of `String::substring`.
634 export const substring
= makeCallable(stringPrototype
.substring
);
637 * Returns the result of converting the provided value to a string.
639 * ☡ This method throws for symbols and other objects without a string
642 export const toString
= ($) => `${$}`;