X-Git-Url: https://git.ladys.computer/Pisces/blobdiff_plain/15a0a4d62a2816fc55f98064a81034ae2fc13404..refs/heads/current:/string.js?ds=inline diff --git a/string.js b/string.js index 17e8736..e8126d1 100644 --- a/string.js +++ b/string.js @@ -1,81 +1,809 @@ // ♓🌟 Piscēs ∷ string.js // ==================================================================== // -// Copyright © 2022 Lady [@ Lady’s Computer]. +// Copyright © 2022–2023 Lady [@ Lady’s Computer]. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at . +import { + bind, + call, + createArrowFunction, + createCallableFunction, + identity, +} from "./function.js"; +import { + arrayIteratorFunction, + stringIteratorFunction, +} from "./iterable.js"; +import { + defineOwnDataProperty, + defineOwnProperties, + getOwnPropertyDescriptors, + objectCreate, + setPropertyValues, + setPrototype, +} from "./object.js"; +import { sameValue, toLength, UNDEFINED } from "./value.js"; + +const RE = RegExp; +const { prototype: rePrototype } = RE; +const { prototype: arrayPrototype } = Array; +const { prototype: stringPrototype } = String; + +const { exec: reExec } = rePrototype; + +export const { + /** + * A `RegExp`like object which only matches entire strings, and may + * have additional constraints specified. + * + * Matchers are callable objects and will return true if they are + * called with a string that they match, and false otherwise. + * Matchers will always return false if called with nonstrings, + * although other methods like `::exec` coerce their arguments and + * may still return true. + */ + Matcher, +} = (() => { + const { toString: reToString } = rePrototype; + const getDotAll = + Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get; + const getFlags = + Object.getOwnPropertyDescriptor(rePrototype, "flags").get; + const getGlobal = + Object.getOwnPropertyDescriptor(rePrototype, "global").get; + const getHasIndices = + Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get; + const getIgnoreCase = + Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get; + const getMultiline = + Object.getOwnPropertyDescriptor(rePrototype, "multiline").get; + const getSource = + Object.getOwnPropertyDescriptor(rePrototype, "source").get; + const getSticky = + Object.getOwnPropertyDescriptor(rePrototype, "sticky").get; + const getUnicode = + Object.getOwnPropertyDescriptor(rePrototype, "unicode").get; + + const Matcher = class extends identity { + #constraint; + #regExp; + + /** + * Constructs a new `Matcher` from the provided source. + * + * If the provided source is a regular expression, then it must + * have the unicode flag set. Otherwise, it is interpreted as the + * string source of a regular expression with the unicode flag set. + * + * Other flags are taken from the provided regular expression + * object, if any are present. + * + * A name for the matcher may be provided as the second argument. + * + * A callable constraint on acceptable inputs may be provided as a + * third argument. If provided, it will be called with three + * arguments whenever a match appears successful: first, the string + * being matched, second, the match result, and third, the + * `Matcher` object itself. If the return value of this call is + * falsey, then the match will be considered a failure. + * + * ☡ If the provided source regular expression uses nongreedy + * quantifiers, it may not match the whole string even if a match + * with the whole string is possible. Surround the regular + * expression with `^(?:` and `)$` if you don’t want nongreedy + * regular expressions to fail when shorter matches are possible. + */ + constructor(source, name = UNDEFINED, constraint = null) { + super( + ($) => { + if (typeof $ !== "string") { + // The provided value is not a string. + return false; + } else { + // The provided value is a string. Set the `.lastIndex` of + // the regular expression to 0 and see if the first attempt + // at a match matches the whole string and passes the + // provided constraint (if present). + regExp.lastIndex = 0; + const result = call(reExec, regExp, [$]); + return result?.[0] === $ && + (constraint === null || constraint($, result, this)); + } + }, + ); + const regExp = this.#regExp = (() => { + try { + call(reExec, source, [""]); // throws if source not a RegExp + } catch { + return new RE(`${source}`, "u"); + } + const unicode = call(getUnicode, source, []); + if (!unicode) { + // The provided regular expression does not have a unicode + // flag. + throw new TypeError( + `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`, + ); + } else { + // The provided regular expression has a unicode flag. + return new RE(source); + } + })(); + if (constraint !== null && typeof constraint !== "function") { + throw new TypeError( + "Piscēs: Cannot construct Matcher: Constraint is not callable.", + ); + } else { + this.#constraint = constraint; + return defineOwnProperties( + setPrototype(this, matcherPrototype), + { + lastIndex: setPropertyValues(objectCreate(null), { + configurable: false, + enumerable: false, + value: 0, + writable: false, + }), + name: defineOwnDataProperty( + objectCreate(null), + "value", + name != null + ? `${name}` + : `Matcher(${call(reToString, regExp, [])})`, + ), + }, + ); + } + } + + /** Gets whether the dot‐all flag is present on this `Matcher`. */ + get dotAll() { + return call(getDotAll, this.#regExp, []); + } + + /** + * Executes this `Matcher` on the provided value and returns the + * result if there is a match, or null otherwise. + * + * Matchers only match if they can match the entire value on the + * first attempt. + * + * ☡ The match result returned by this method will be the same as + * that passed to the constraint function—and may have been + * modified by said function prior to being returned. + */ + exec($) { + const regExp = this.#regExp; + const constraint = this.#constraint; + const string = `${$}`; + regExp.lastIndex = 0; + const result = call(reExec, regExp, [string]); + if ( + result?.[0] === string && + (constraint === null || constraint(string, result, this)) + ) { + // The entire string was matched and the constraint, if + // present, returned a truthy value. + return result; + } else { + // The entire string was not matched or the constraint returned + // a falsey value. + return null; + } + } + + /** + * Gets the flags present on this `Matcher`. + * + * ※ This needs to be defined because the internal `RegExp` object + * may have flags which are not yet recognized by ♓🌟 Piscēs. + */ + get flags() { + return call(getFlags, this.#regExp, []); + } + + /** Gets whether the global flag is present on this `Matcher`. */ + get global() { + return call(getGlobal, this.#regExp, []); + } + + /** + * Gets whether the has‐indices flag is present on this `Matcher`. + */ + get hasIndices() { + return call(getHasIndices, this.#regExp, []); + } + + /** + * Gets whether the ignore‐case flag is present on this `Matcher`. + */ + get ignoreCase() { + return call(getIgnoreCase, this.#regExp, []); + } + + /** + * Gets whether the multiline flag is present on this `Matcher`. + */ + get multiline() { + return call(getMultiline, this.#regExp, []); + } + + /** Gets the regular expression source for this `Matcher`. */ + get source() { + return call(getSource, this.#regExp, []); + } + + /** Gets whether the sticky flag is present on this `Matcher`. */ + get sticky() { + return call(getSticky, this.#regExp, []); + } + + /** + * Gets whether the unicode flag is present on this `Matcher`. + * + * ※ This will always be true. + */ + get unicode() { + return call(getUnicode, this.#regExp, []); + } + }; + + const matcherConstructor = Object.defineProperties( + class extends RegExp { + constructor(...args) { + return new Matcher(...args); + } + }, + { + name: defineOwnDataProperty( + Object.create(null), + "value", + "Matcher", + ), + length: defineOwnDataProperty(Object.create(null), "value", 1), + }, + ); + const matcherPrototype = defineOwnProperties( + matcherConstructor.prototype, + getOwnPropertyDescriptors(Matcher.prototype), + { + constructor: defineOwnDataProperty( + Object.create(null), + "value", + matcherConstructor, + ), + }, + ); + + return { Matcher: matcherConstructor }; +})(); + +export const { + /** + * Returns the result of converting the provided value to A·S·C·I·I + * lowercase. + */ + asciiLowercase, + + /** + * Returns the result of converting the provided value to A·S·C·I·I + * uppercase. + */ + asciiUppercase, +} = (() => { + const { + toLowerCase: stringToLowercase, + toUpperCase: stringToUppercase, + } = stringPrototype; + return { + asciiLowercase: ($) => + stringReplaceAll( + `${$}`, + /[A-Z]/gu, + createCallableFunction(stringToLowercase), + ), + asciiUppercase: ($) => + stringReplaceAll( + `${$}`, + /[a-z]/gu, + createCallableFunction(stringToUppercase), + ), + }; +})(); + /** - * Returns the result of converting the provided value to A·S·C·I·I - * lowercase. + * Returns −0 if the provided argument is "-0"; returns a number + * representing the index if the provided argument is a canonical + * numeric index string; otherwise, returns undefined. + * + * There is no clamping of the numeric index, but note that numbers + * above 2^53 − 1 are not safe nor valid integer indices. */ -export const asciiLowercase = ($) => - `${$}`.replaceAll( - /[A-Z]/gu, - Function.prototype.call.bind(String.prototype.toLowerCase), +export const canonicalNumericIndexString = ($) => { + if (typeof $ !== "string") { + return UNDEFINED; + } else if ($ === "-0") { + return -0; + } else { + const n = +$; + return $ === `${n}` ? n : UNDEFINED; + } +}; + +export const { + /** + * Returns an iterator over the codepoints in the string representation + * of the provided value according to the algorithm of + * `String::[Symbol.iterator]`. + */ + characters, + + /** + * Returns an iterator over the code units in the string + * representation of the provided value. + */ + codeUnits, + + /** + * Returns an iterator over the codepoints in the string + * representation of the provided value. + */ + codepoints, + + /** + * Returns an iterator over the scalar values in the string + * representation of the provided value. + * + * Codepoints which are not valid Unicode scalar values are replaced + * with U+FFFD. + */ + scalarValues, +} = (() => { + const generateCharacters = function* (character) { + yield character; + }; + const generateCodeUnits = function* (ucsCharacter) { + yield getCodeUnit(ucsCharacter, 0); + }; + const generateCodepoints = function* (character) { + const { allowSurrogates } = this; + const codepoint = getCodepoint(character, 0); + yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000 + ? codepoint + : 0xFFFD; + }; + + const charactersIterator = stringIteratorFunction( + generateCharacters, + "String Character Iterator", ); + const codeUnitsIterator = arrayIteratorFunction( + generateCodeUnits, + "String Code Unit Iterator", + ); + const codepointsIterator = stringIteratorFunction( + bind(generateCodepoints, { allowSurrogates: true }, []), + "String Codepoint Iterator", + ); + const scalarValuesIterator = stringIteratorFunction( + bind(generateCodepoints, { allowSurrogates: false }, []), + "String Scalar Value Iterator", + ); + + return { + characters: ($) => charactersIterator(`${$}`), + codeUnits: ($) => codeUnitsIterator(`${$}`), + codepoints: ($) => codepointsIterator(`${$}`), + scalarValues: ($) => scalarValuesIterator(`${$}`), + }; +})(); /** - * Returns the result of converting the provided value to A·S·C·I·I - * uppercase. + * Returns the character at the provided position in the string + * representation of the provided value according to the algorithm of + * `String::codePointAt`. */ -export const asciiUppercase = ($) => - `${$}`.replaceAll( - /[a-z]/gu, - Function.prototype.call.bind(String.prototype.toUpperCase), - ); +export const getCharacter = ($, pos) => { + const codepoint = getCodepoint($, pos); + return codepoint == null + ? UNDEFINED + : stringFromCodepoints(codepoint); +}; + +export const { + /** + * Returns the code unit at the provided position in the string + * representation of the provided value according to the algorithm of + * `String::charAt`, except that out‐of‐bounds values return undefined + * in place of nan. + */ + getCodeUnit, + + /** + * Returns a string created from the provided code units. + * + * ※ This is effectively an alias for `String.fromCharCode`, but + * with the same error behaviour as `String.fromCodePoint`. + * + * ☡ This function throws an error if provided with an argument which + * is not an integral number from 0 to FFFF₁₆ inclusive. + */ + stringFromCodeUnits, + + /** + * Returns the result of catenating the string representations of the + * provided values, returning a new string according to the algorithm + * of `String::concat`. + * + * ※ If no arguments are given, this function returns the empty + * string. This is different behaviour than if an explicit undefined + * first argument is given, in which case the resulting string will + * begin with `"undefined"`. + */ + stringCatenate, +} = (() => { + const { fromCharCode } = String; + const { charCodeAt, concat } = String.prototype; + const { + isInteger: isIntegralNumber, + isNaN: isNan, + } = Number; + + return { + getCodeUnit: ($, n) => { + const codeUnit = call(charCodeAt, $, [n]); + return isNan(codeUnit) ? UNDEFINED : codeUnit; + }, + stringCatenate: Object.defineProperties( + (...args) => call(concat, "", args), + { name: { value: "stringCatenate" }, length: { value: 2 } }, + ), + stringFromCodeUnits: Object.defineProperties( + (...codeUnits) => { + for (let index = 0; index < codeUnits.length; ++index) { + // Iterate over each provided code unit and throw if it is + // out of range. + const nextCU = +codeUnits[index]; + if ( + !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF + ) { + // The code unit is not an integral number between 0 and + // 0xFFFF. + throw new RangeError( + `Piscēs: Code unit out of range: ${nextCU}.`, + ); + } else { + // The code unit is acceptable. + /* do nothing */ + } + } + return call(fromCharCode, UNDEFINED, codeUnits); + }, + { name: { value: "stringFromCodeUnits" }, length: { value: 1 } }, + ), + }; +})(); /** - * Returns the result of converting the provided value to a string of - * scalar values by replacing (unpaired) surrogate values with U+FFFD. - */ -export const scalarValueString = ($) => - String.fromCodePoint( - ...function* () { - for (const char of `${$}`) { - const scalar = char.codePointAt(0); - yield scalar >= 0xD800 && scalar <= 0xDFFF ? 0xFFFD : scalar; - } - }(), - ); + * Returns the codepoint at the provided position in the string + * representation of the provided value according to the algorithm of + * `String::codePointAt`. + */ +export const getCodepoint = createCallableFunction( + stringPrototype.codePointAt, + { name: "getCodepoint" }, +); + +/** + * Returns the index of the first occurrence of the search string in + * the string representation of the provided value according to the + * algorithm of `String::indexOf`. + */ +export const getFirstSubstringIndex = createCallableFunction( + stringPrototype.indexOf, + { name: "getFirstSubstringIndex" }, +); + +/** + * Returns the index of the last occurrence of the search string in the + * string representation of the provided value according to the + * algorithm of `String::lastIndexOf`. + */ +export const getLastSubstringIndex = createCallableFunction( + stringPrototype.lastIndexOf, + { name: "getLastSubstringIndex" }, +); + +/** Returns whether the provided value is an array index. */ +export const isArrayIndexString = ($) => { + const value = canonicalNumericIndexString($); + if (value !== UNDEFINED) { + // The provided value is a canonical numeric index string; return + // whether it is in range for array indices. + return sameValue(value, 0) || + value === toLength(value) && value > 0 && value < -1 >>> 0; + } else { + // The provided value is not a canonical numeric index string. + return false; + } +}; + +/** Returns whether the provided value is an integer index string. */ +export const isIntegerIndexString = ($) => { + const value = canonicalNumericIndexString($); + if (value !== UNDEFINED) { + // The provided value is a canonical numeric index string; return + // whether it is in range for integer indices. + return sameValue(value, 0) || + value === toLength(value) && value > 0; + } else { + // The provided value is not a canonical numeric index string. + return false; + } +}; + +/** + * Returns the result of joining the provided iterable. + * + * If no separator is provided, it defaults to ",". + * + * If a value is nullish, it will be stringified as the empty string. + */ +export const join = (() => { + const { join: arrayJoin } = arrayPrototype; + const join = ($, separator) => + call( + arrayJoin, + [...$], + [separator === UNDEFINED ? "," : `${separator}`], + ); + return join; +})(); + +/** + * Returns a string created from the raw value of the tagged template + * literal. + * + * ※ This is effectively an alias for `String.raw`. + */ +export const rawString = createArrowFunction(String.raw, { + name: "rawString", +}); /** - * Returns the result of splitting the provided value on A·S·C·I·I + * Returns a string created from the provided codepoints. + * + * ※ This is effectively an alias for `String.fromCodePoint`. + * + * ☡ This function throws an error if provided with an argument which + * is not an integral number from 0 to 10FFFF₁₆ inclusive. + */ +export const stringFromCodepoints = createArrowFunction( + String.fromCodePoint, + { name: "stringFromCodepoints" }, +); + +/** + * Returns the result of splitting the provided value on Ascii * whitespace. */ -export const splitOnASCIIWhitespace = ($) => - stripAndCollapseASCIIWhitespace($).split(" "); +export const splitOnAsciiWhitespace = ($) => + stringSplit(stripAndCollapseAsciiWhitespace($), " "); /** * Returns the result of splitting the provided value on commas, - * trimming A·S·C·I·I whitespace from the resulting tokens. + * trimming Ascii whitespace from the resulting tokens. */ export const splitOnCommas = ($) => - stripLeadingAndTrailingASCIIWhitespace( - `${$}`.replaceAll( - /[\n\r\t\f ]*,[\n\r\t\f ]*/gu, - ",", + stringSplit( + stripLeadingAndTrailingAsciiWhitespace( + stringReplaceAll( + `${$}`, + /[\n\r\t\f ]*,[\n\r\t\f ]*/gu, + ",", + ), ), - ).split(","); + ",", + ); + +/** + * Returns whether the string representation of the provided value ends + * with the provided search string according to the algorithm of + * `String::endsWith`. + */ +export const stringEndsWith = createCallableFunction( + stringPrototype.endsWith, + { name: "stringEndsWith" }, +); + +/** + * Returns whether the string representation of the provided value + * contains the provided search string according to the algorithm of + * `String::includes`. + */ +export const stringIncludes = createCallableFunction( + stringPrototype.includes, + { name: "stringIncludes" }, +); + +/** + * Returns the result of matching the string representation of the + * provided value with the provided matcher according to the algorithm + * of `String::match`. + */ +export const stringMatch = createCallableFunction( + stringPrototype.match, + { name: "stringMatch" }, +); + +/** + * Returns the result of matching the string representation of the + * provided value with the provided matcher according to the algorithm + * of `String::matchAll`. + */ +export const stringMatchAll = createCallableFunction( + stringPrototype.matchAll, + { name: "stringMatchAll" }, +); + +/** + * Returns the normalized form of the string representation of the + * provided value according to the algorithm of `String::normalize`. + */ +export const stringNormalize = createCallableFunction( + stringPrototype.normalize, + { name: "stringNormalize" }, +); /** - * Returns the result of stripping leading and trailing A·S·C·I·I - * whitespace from the provided value. + * Returns the result of padding the end of the string representation + * of the provided value padded until it is the desired length + * according to the algorithm of `String::padEnd`. */ -export const stripLeadingAndTrailingASCIIWhitespace = ($) => - /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u.exec($)[1]; +export const stringPadEnd = createCallableFunction( + stringPrototype.padEnd, + { name: "stringPadEnd" }, +); /** - * Returns the result of stripping leading and trailing A·S·C·I·I - * whitespace from the provided value and collapsing other A·S·C·I·I - * whitespace in the provided value. + * Returns the result of padding the start of the string representation + * of the provided value padded until it is the desired length + * according to the algorithm of `String::padStart`. */ -export const stripAndCollapseASCIIWhitespace = ($) => - stripLeadingAndTrailingASCIIWhitespace( - `${$}`.replaceAll( +export const stringPadStart = createCallableFunction( + stringPrototype.padStart, + { name: "stringPadStart" }, +); + +/** + * Returns the result of repeating the string representation of the + * provided value the provided number of times according to the + * algorithm of `String::repeat`. + */ +export const stringRepeat = createCallableFunction( + stringPrototype.repeat, + { name: "stringRepeat" }, +); + +/** + * Returns the result of replacing the string representation of the + * provided value with the provided replacement, using the provided + * matcher and according to the algorithm of `String::replace`. + */ +export const stringReplace = createCallableFunction( + stringPrototype.replace, + { name: "stringReplace" }, +); + +/** + * Returns the result of replacing the string representation of the + * provided value with the provided replacement, using the provided + * matcher and according to the algorithm of `String::replaceAll`. + */ +export const stringReplaceAll = createCallableFunction( + stringPrototype.replaceAll, + { name: "stringReplaceAll" }, +); + +/** + * Returns the result of searching the string representation of the + * provided value using the provided matcher and according to the + * algorithm of `String::search`. + */ +export const stringSearch = createCallableFunction( + stringPrototype.search, + { name: "stringSearch" }, +); + +/** + * Returns a slice of the string representation of the provided value + * according to the algorithm of `String::slice`. + */ +export const stringSlice = createCallableFunction( + stringPrototype.slice, + { name: "stringSlice" }, +); + +/** + * Returns the result of splitting of the string representation of the + * provided value on the provided separator according to the algorithm + * of `String::split`. + */ +export const stringSplit = createCallableFunction( + stringPrototype.split, + { name: "stringSplit" }, +); + +/** + * Returns whether the string representation of the provided value + * starts with the provided search string according to the algorithm of + * `String::startsWith`. + */ +export const stringStartsWith = createCallableFunction( + stringPrototype.startsWith, + { name: "stringStartsWith" }, +); + +/** + * Returns the value of the provided string. + * + * ※ This is effectively an alias for the `String::valueOf`. + * + * ☡ This function throws if the provided argument is not a string and + * does not have a `[[StringData]]` slot. + */ +export const stringValue = createCallableFunction( + stringPrototype.valueOf, + { name: "stringValue" }, +); + +/** + * Returns the result of stripping leading and trailing Ascii + * whitespace from the provided value and collapsing other Ascii + * whitespace in the string representation of the provided value. + */ +export const stripAndCollapseAsciiWhitespace = ($) => + stripLeadingAndTrailingAsciiWhitespace( + stringReplaceAll( + `${$}`, /[\n\r\t\f ]+/gu, " ", ), ); + +/** + * Returns the result of stripping leading and trailing Ascii + * whitespace from the string representation of the provided value. + */ +export const stripLeadingAndTrailingAsciiWhitespace = ($) => + call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1]; + +/** + * Returns a substring of the string representation of the provided + * value according to the algorithm of `String::substring`. + */ +export const substring = createCallableFunction( + stringPrototype.substring, +); + +/** + * Returns the result of converting the provided value to a string of + * scalar values by replacing (unpaired) surrogate values with + * U+FFFD. + */ +export const toScalarValueString = createCallableFunction( + String.prototype.toWellFormed, + { name: "toScalarValueString" }, +); + +/** + * Returns the result of converting the provided value to a string. + * + * ☡ This method throws for symbols and other objects without a string + * representation. + */ +export const toString = ($) => `${$}`;