X-Git-Url: https://git.ladys.computer/Pisces/blobdiff_plain/15a0a4d62a2816fc55f98064a81034ae2fc13404..6e6d4e3261c1c943fe44fa9e381bcf8bf1441fd6:/string.js?ds=sidebyside diff --git a/string.js b/string.js index 17e8736..eb6f6fb 100644 --- a/string.js +++ b/string.js @@ -1,81 +1,727 @@ // ♓🌟 Piscēs ∷ string.js // ==================================================================== // -// Copyright © 2022 Lady [@ Lady’s Computer]. +// Copyright © 2022–2023 Lady [@ Lady’s Computer]. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at . +import { bind, call, identity, makeCallable } from "./function.js"; +import { + defineOwnProperties, + getOwnPropertyDescriptors, + getPrototype, + objectCreate, + setPrototype, +} from "./object.js"; +import { type } from "./value.js"; + +export const { + /** + * A RegExp·like object which only matches entire strings, and may + * have additional constraints specified. + * + * Matchers are callable objects and will return true if they are + * called with a string that they match, and false otherwise. + * Matchers will always return false if called with nonstrings, + * although other methods like `exec` coerce their arguments and may + * still return true. + */ + Matcher, +} = (() => { + const RE = RegExp; + const { prototype: rePrototype } = RE; + const { exec: reExec, toString: reToString } = rePrototype; + const getDotAll = + Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get; + const getFlags = + Object.getOwnPropertyDescriptor(rePrototype, "flags").get; + const getGlobal = + Object.getOwnPropertyDescriptor(rePrototype, "global").get; + const getHasIndices = + Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get; + const getIgnoreCase = + Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get; + const getMultiline = + Object.getOwnPropertyDescriptor(rePrototype, "multiline").get; + const getSource = + Object.getOwnPropertyDescriptor(rePrototype, "source").get; + const getSticky = + Object.getOwnPropertyDescriptor(rePrototype, "sticky").get; + const getUnicode = + Object.getOwnPropertyDescriptor(rePrototype, "unicode").get; + + const Matcher = class extends identity { + #constraint; + #regExp; + + /** + * Constructs a new Matcher from the provided source. + * + * If the provided source is a regular expression, then it must + * have the unicode flag set. Otherwise, it is interpreted as the + * string source of a regular expression with the unicode flag set. + * + * Other flags are taken from the provided regular expression + * object, if any are present. + * + * A name for the matcher may be provided as the second argument. + * + * A callable constraint on acceptable inputs may be provided as a + * third argument. If provided, it will be called with three + * arguments whenever a match appears successful: first, the string + * being matched, second, the match result, and third, the Matcher + * object itself. If the return value of this call is falsey, then + * the match will be considered a failure. + * + * ☡ If the provided source regular expression uses nongreedy + * quantifiers, it may not match the whole string even if a match + * with the whole string is possible. Surround the regular + * expression with `^(?:` and `)$` if you don’t want nongreedy + * regular expressions to fail when shorter matches are possible. + */ + constructor(source, name = undefined, constraint = null) { + super( + ($) => { + if (typeof $ !== "string") { + // The provided value is not a string. + return false; + } else { + // The provided value is a string. Set the `lastIndex` of + // the regular expression to 0 and see if the first attempt + // at a match matches the whole string and passes the + // provided constraint (if present). + regExp.lastIndex = 0; + const result = call(reExec, regExp, [$]); + return result?.[0] === $ && + (constraint === null || constraint($, result, this)); + } + }, + ); + const regExp = this.#regExp = (() => { + try { + call(reExec, source, [""]); // throws if source not a RegExp + } catch { + return new RE(`${source}`, "u"); + } + const unicode = call(getUnicode, source, []); + if (!unicode) { + // The provided regular expression does not have a unicode + // flag. + throw new TypeError( + `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`, + ); + } else { + // The provided regular expression has a unicode flag. + return new RE(source); + } + })(); + if (constraint !== null && typeof constraint !== "function") { + throw new TypeError( + "Piscēs: Cannot construct Matcher: Constraint is not callable.", + ); + } else { + this.#constraint = constraint; + return defineOwnProperties( + setPrototype(this, matcherPrototype), + { + lastIndex: { + configurable: false, + enumerable: false, + value: 0, + writable: false, + }, + name: { + value: name != null + ? `${name}` + : `Matcher(${call(reToString, regExp, [])})`, + }, + }, + ); + } + } + + /** Gets whether the dotAll flag is present on this Matcher. */ + get dotAll() { + return call(getDotAll, this.#regExp, []); + } + + /** + * Executes this Matcher on the provided value and returns the + * result if there is a match, or null otherwise. + * + * Matchers only match if they can match the entire value on the + * first attempt. + * + * ☡ The match result returned by this method will be the same as + * that passed to the constraint function—and may have been + * modified by said function prior to being returned. + */ + exec($) { + const regExp = this.#regExp; + const constraint = this.#constraint; + const string = `${$}`; + regExp.lastIndex = 0; + const result = call(reExec, regExp, [string]); + if ( + result?.[0] === string && + (constraint === null || constraint(string, result, this)) + ) { + // The entire string was matched and the constraint, if + // present, returned a truthy value. + return result; + } else { + // The entire string was not matched or the constraint returned + // a falsey value. + return null; + } + } + + /** + * Gets the flags present on this Matcher. + * + * ※ This needs to be defined because the internal RegExp object + * may have flags which are not yet recognized by ♓🌟 Piscēs. + */ + get flags() { + return call(getFlags, this.#regExp, []); + } + + /** Gets whether the global flag is present on this Matcher. */ + get global() { + return call(getGlobal, this.#regExp, []); + } + + /** Gets whether the hasIndices flag is present on this Matcher. */ + get hasIndices() { + return call(getHasIndices, this.#regExp, []); + } + + /** Gets whether the ignoreCase flag is present on this Matcher. */ + get ignoreCase() { + return call(getIgnoreCase, this.#regExp, []); + } + + /** Gets whether the multiline flag is present on this Matcher. */ + get multiline() { + return call(getMultiline, this.#regExp, []); + } + + /** Gets the regular expression source for this Matcher. */ + get source() { + return call(getSource, this.#regExp, []); + } + + /** Gets whether the sticky flag is present on this Matcher. */ + get sticky() { + return call(getSticky, this.#regExp, []); + } + + /** + * Gets whether the unicode flag is present on this Matcher. + * + * ※ This will always be true. + */ + get unicode() { + return call(getUnicode, this.#regExp, []); + } + }; + + const matcherConstructor = defineOwnProperties( + class extends RegExp { + constructor(...args) { + return new Matcher(...args); + } + }, + { + name: { value: "Matcher" }, + length: { value: 1 }, + }, + ); + const matcherPrototype = defineOwnProperties( + matcherConstructor.prototype, + getOwnPropertyDescriptors(Matcher.prototype), + { constructor: { value: matcherConstructor } }, + ); + + return { Matcher: matcherConstructor }; +})(); + +export const { + /** + * Returns the result of converting the provided value to A·S·C·I·I + * lowercase. + */ + asciiLowercase, + + /** + * Returns the result of converting the provided value to A·S·C·I·I + * uppercase. + */ + asciiUppercase, +} = (() => { + const { + toLowerCase: stringToLowercase, + toUpperCase: stringToUppercase, + } = String.prototype; + return { + asciiLowercase: ($) => + stringReplaceAll( + `${$}`, + /[A-Z]/gu, + makeCallable(stringToLowercase), + ), + asciiUppercase: ($) => + stringReplaceAll( + `${$}`, + /[a-z]/gu, + makeCallable(stringToUppercase), + ), + }; +})(); + +export const { + /** + * Returns an iterator over the code units in the string + * representation of the provided value. + */ + codeUnits, + + /** + * Returns an iterator over the codepoints in the string + * representation of the provided value. + */ + codepoints, + + /** + * Returns an iterator over the scalar values in the string + * representation of the provided value. + * + * Codepoints which are not valid Unicode scalar values are replaced + * with U+FFFF. + */ + scalarValues, + + /** + * Returns the result of converting the provided value to a string of + * scalar values by replacing (unpaired) surrogate values with + * U+FFFD. + */ + scalarValueString, +} = (() => { + const { + iterator: iteratorSymbol, + toStringTag: toStringTagSymbol, + } = Symbol; + const { [iteratorSymbol]: arrayIterator } = Array.prototype; + const arrayIteratorPrototype = Object.getPrototypeOf( + [][iteratorSymbol](), + ); + const { next: arrayIteratorNext } = arrayIteratorPrototype; + const iteratorPrototype = Object.getPrototypeOf( + arrayIteratorPrototype, + ); + const { [iteratorSymbol]: stringIterator } = String.prototype; + const stringIteratorPrototype = Object.getPrototypeOf( + ""[iteratorSymbol](), + ); + const { next: stringIteratorNext } = stringIteratorPrototype; + + /** + * An iterator object for iterating over code values (either code + * units or codepoints) in a string. + * + * ※ This class is not exposed, although its methods are (through + * the prototypes of string code value iterator objects). + */ + const StringCodeValueIterator = class extends identity { + #allowSurrogates; + #baseIterator; + + /** + * Constructs a new string code value iterator from the provided + * base iterator. + * + * If the provided base iterator is an array iterator, this is a + * code unit iterator. If the provided iterator is a string + * iterator and surrogates are allowed, this is a codepoint + * iterator. If the provided iterator is a string iterator and + * surrogates are not allowed, this is a scalar value iterator. + */ + constructor(baseIterator, allowSurrogates = true) { + super(objectCreate(stringCodeValueIteratorPrototype)); + this.#allowSurrogates = !!allowSurrogates; + this.#baseIterator = baseIterator; + } + + /** Provides the next code value in the iterator. */ + next() { + const baseIterator = this.#baseIterator; + switch (getPrototype(baseIterator)) { + case arrayIteratorPrototype: { + // The base iterator is iterating over U·C·S characters. + const { + value: ucsCharacter, + done, + } = call(arrayIteratorNext, baseIterator, []); + return done + ? { value: undefined, done: true } + : { value: getCodeUnit(ucsCharacter, 0), done: false }; + } + case stringIteratorPrototype: { + // The base iterator is iterating over Unicode characters. + const { + value: character, + done, + } = call(stringIteratorNext, baseIterator, []); + if (done) { + // The base iterator has been exhausted. + return { value: undefined, done: true }; + } else { + // The base iterator provided a character; yield the + // codepoint. + const codepoint = getCodepoint(character, 0); + return { + value: this.#allowSurrogates || codepoint <= 0xD7FF || + codepoint >= 0xE000 + ? codepoint + : 0xFFFD, + done: false, + }; + } + } + default: { + // Should not be possible! + throw new TypeError( + "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.", + ); + } + } + } + }; + + const { + next: stringCodeValueIteratorNext, + } = StringCodeValueIterator.prototype; + const stringCodeValueIteratorPrototype = objectCreate( + iteratorPrototype, + { + next: { + configurable: true, + enumerable: false, + value: stringCodeValueIteratorNext, + writable: true, + }, + [toStringTagSymbol]: { + configurable: true, + enumerable: false, + value: "String Code Value Iterator", + writable: false, + }, + }, + ); + const scalarValueIterablePrototype = { + [iteratorSymbol]() { + return { + next: bind( + stringCodeValueIteratorNext, + new StringCodeValueIterator( + call(stringIterator, this.source, []), + false, + ), + [], + ), + }; + }, + }; + + return { + codeUnits: ($) => + new StringCodeValueIterator(call(arrayIterator, `${$}`, [])), + codepoints: ($) => + new StringCodeValueIterator( + call(stringIterator, `${$}`, []), + true, + ), + scalarValues: ($) => + new StringCodeValueIterator( + call(stringIterator, `${$}`, []), + false, + ), + scalarValueString: ($) => + stringFromCodepoints(...objectCreate( + scalarValueIterablePrototype, + { source: { value: `${$}` } }, + )), + }; +})(); + /** - * Returns the result of converting the provided value to A·S·C·I·I - * lowercase. + * Returns an iterator over the codepoints in the string representation + * of the provided value according to the algorithm of + * String::[Symbol.iterator]. */ -export const asciiLowercase = ($) => - `${$}`.replaceAll( - /[A-Z]/gu, - Function.prototype.call.bind(String.prototype.toLowerCase), - ); +export const characters = makeCallable( + String.prototype[Symbol.iterator], +); /** - * Returns the result of converting the provided value to A·S·C·I·I - * uppercase. + * Returns the character at the provided position in the string + * representation of the provided value according to the algorithm of + * String::codePointAt. */ -export const asciiUppercase = ($) => - `${$}`.replaceAll( - /[a-z]/gu, - Function.prototype.call.bind(String.prototype.toUpperCase), - ); +export const getCharacter = ($, pos) => { + const codepoint = getCodepoint($, pos); + return codepoint == null + ? undefined + : stringFromCodepoints(codepoint); +}; /** - * Returns the result of converting the provided value to a string of - * scalar values by replacing (unpaired) surrogate values with U+FFFD. + * Returns the code unit at the provided position in the string + * representation of the provided value according to the algorithm of + * String::charAt. */ -export const scalarValueString = ($) => - String.fromCodePoint( - ...function* () { - for (const char of `${$}`) { - const scalar = char.codePointAt(0); - yield scalar >= 0xD800 && scalar <= 0xDFFF ? 0xFFFD : scalar; - } - }(), - ); +export const getCodeUnit = makeCallable(String.prototype.charCodeAt); + +/** + * Returns the codepoint at the provided position in the string + * representation of the provided value according to the algorithm of + * String::codePointAt. + */ +export const getCodepoint = makeCallable(String.prototype.codePointAt); + +/** + * Returns the index of the first occurrence of the search string in + * the string representation of the provided value according to the + * algorithm of String::indexOf. + */ +export const getFirstSubstringIndex = makeCallable( + String.prototype.indexOf, +); + +/** + * Returns the index of the last occurrence of the search string in the + * string representation of the provided value according to the + * algorithm of String::lastIndexOf. + */ +export const getLastSubstringIndex = makeCallable( + String.prototype.lastIndexOf, +); + +/** + * Returns the result of joining the provided iterable. + * + * If no separator is provided, it defaults to ",". + * + * If a value is nullish, it will be stringified as the empty string. + */ +export const join = (() => { + const { join: arrayJoin } = Array.prototype; + const join = ($, separator = ",") => + call(arrayJoin, [...$], [`${separator}`]); + return join; +})(); + +export const { + /** + * Returns a string created from the raw value of the tagged template + * literal. + * + * ※ This is an alias for String.raw. + */ + raw: rawString, + + /** + * Returns a string created from the provided code units. + * + * ※ This is an alias for String.fromCharCode. + */ + fromCharCode: stringFromCodeUnits, + + /** + * Returns a string created from the provided codepoints. + * + * ※ This is an alias for String.fromCodePoint. + */ + fromCodePoint: stringFromCodepoints, +} = String; /** * Returns the result of splitting the provided value on A·S·C·I·I * whitespace. */ export const splitOnASCIIWhitespace = ($) => - stripAndCollapseASCIIWhitespace($).split(" "); + stringSplit(stripAndCollapseASCIIWhitespace($), " "); /** * Returns the result of splitting the provided value on commas, * trimming A·S·C·I·I whitespace from the resulting tokens. */ export const splitOnCommas = ($) => - stripLeadingAndTrailingASCIIWhitespace( - `${$}`.replaceAll( - /[\n\r\t\f ]*,[\n\r\t\f ]*/gu, - ",", + stringSplit( + stripLeadingAndTrailingASCIIWhitespace( + stringReplaceAll( + `${$}`, + /[\n\r\t\f ]*,[\n\r\t\f ]*/gu, + ",", + ), ), - ).split(","); + ",", + ); /** - * Returns the result of stripping leading and trailing A·S·C·I·I - * whitespace from the provided value. + * Returns the result of catenating the string representations of the + * provided values, returning a new string according to the algorithm + * of String::concat. + */ +export const stringCatenate = makeCallable(String.prototype.concat); + +/** + * Returns whether the string representation of the provided value ends + * with the provided search string according to the algorithm of + * String::endsWith. + */ +export const stringEndsWith = makeCallable(String.prototype.endsWith); + +/** + * Returns whether the string representation of the provided value + * contains the provided search string according to the algorithm of + * String::includes. + */ +export const stringIncludes = makeCallable(String.prototype.includes); + +/** + * Returns the result of matching the string representation of the + * provided value with the provided matcher according to the algorithm + * of String::match. + */ +export const stringMatch = makeCallable(String.prototype.match); + +/** + * Returns the result of matching the string representation of the + * provided value with the provided matcher according to the algorithm + * of String::matchAll. + */ +export const stringMatchAll = makeCallable(String.prototype.matchAll); + +/** + * Returns the normalized form of the string representation of the + * provided value according to the algorithm of String::matchAll. + */ +export const stringNormalize = makeCallable( + String.prototype.normalize, +); + +/** + * Returns the result of padding the end of the string representation + * of the provided value padded until it is the desired length + * according to the algorithm of String::padEnd. + */ +export const stringPadEnd = makeCallable(String.prototype.padEnd); + +/** + * Returns the result of padding the start of the string representation + * of the provided value padded until it is the desired length + * according to the algorithm of String::padStart. + */ +export const stringPadStart = makeCallable(String.prototype.padStart); + +/** + * Returns the result of repeating the string representation of the + * provided value the provided number of times according to the + * algorithm of String::repeat. + */ +export const stringRepeat = makeCallable(String.prototype.repeat); + +/** + * Returns the result of replacing the string representation of the + * provided value with the provided replacement, using the provided + * matcher and according to the algorithm of String::replace. */ -export const stripLeadingAndTrailingASCIIWhitespace = ($) => - /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u.exec($)[1]; +export const stringReplace = makeCallable(String.prototype.replace); + +/** + * Returns the result of replacing the string representation of the + * provided value with the provided replacement, using the provided + * matcher and according to the algorithm of String::replaceAll. + */ +export const stringReplaceAll = makeCallable( + String.prototype.replaceAll, +); + +/** + * Returns the result of searching the string representation of the + * provided value using the provided matcher and according to the + * algorithm of String::search. + */ +export const stringSearch = makeCallable(String.prototype.search); + +/** + * Returns a slice of the string representation of the provided value + * according to the algorithm of String::slice. + */ +export const stringSlice = makeCallable(String.prototype.slice); + +/** + * Returns the result of splitting of the string representation of the + * provided value on the provided separator according to the algorithm + * of String::split. + */ +export const stringSplit = makeCallable(String.prototype.split); + +/** + * Returns whether the string representation of the provided value + * starts with the provided search string according to the algorithm of + * String::startsWith. + */ +export const stringStartsWith = makeCallable( + String.prototype.startsWith, +); + +/** + * Returns the `[[StringData]]` of the provided value. + * + * ☡ This function will throw if the provided object does not have a + * `[[StringData]]` internal slot. + */ +export const stringValue = makeCallable(String.prototype.valueOf); /** * Returns the result of stripping leading and trailing A·S·C·I·I * whitespace from the provided value and collapsing other A·S·C·I·I - * whitespace in the provided value. + * whitespace in the string representation of the provided value. */ export const stripAndCollapseASCIIWhitespace = ($) => stripLeadingAndTrailingASCIIWhitespace( - `${$}`.replaceAll( + stringReplaceAll( + `${$}`, /[\n\r\t\f ]+/gu, " ", ), ); + +/** + * Returns the result of stripping leading and trailing A·S·C·I·I + * whitespace from the string representation of the provided value. + */ +export const stripLeadingAndTrailingASCIIWhitespace = (() => { + const { exec: reExec } = RegExp.prototype; + return ($) => + call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1]; +})(); + +/** + * Returns a substring of the string representation of the provided + * value according to the algorithm of String::substring. + */ +export const substring = makeCallable(String.prototype.substring); + +/** + * Returns the result of converting the provided value to a string. + * + * ☡ This method throws for symbols and other objects without a string + * representation. + */ +export const toString = ($) => `${$}`;