X-Git-Url: https://git.ladys.computer/Pisces/blobdiff_plain/58d78d7c0602b17a9599e28232cc8a2ff1d8fc65..refs/heads/current:/string.js?ds=sidebyside diff --git a/string.js b/string.js index e697a1f..e8126d1 100644 --- a/string.js +++ b/string.js @@ -7,15 +7,26 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at . -import { bind, call, identity, makeCallable } from "./function.js"; import { + bind, + call, + createArrowFunction, + createCallableFunction, + identity, +} from "./function.js"; +import { + arrayIteratorFunction, + stringIteratorFunction, +} from "./iterable.js"; +import { + defineOwnDataProperty, defineOwnProperties, getOwnPropertyDescriptors, - getPrototype, objectCreate, + setPropertyValues, setPrototype, } from "./object.js"; -import { ITERATOR, TO_STRING_TAG } from "./value.js"; +import { sameValue, toLength, UNDEFINED } from "./value.js"; const RE = RegExp; const { prototype: rePrototype } = RE; @@ -86,7 +97,7 @@ export const { * expression with `^(?:` and `)$` if you don’t want nongreedy * regular expressions to fail when shorter matches are possible. */ - constructor(source, name = undefined, constraint = null) { + constructor(source, name = UNDEFINED, constraint = null) { super( ($) => { if (typeof $ !== "string") { @@ -131,17 +142,19 @@ export const { return defineOwnProperties( setPrototype(this, matcherPrototype), { - lastIndex: { + lastIndex: setPropertyValues(objectCreate(null), { configurable: false, enumerable: false, value: 0, writable: false, - }, - name: { - value: name != null + }), + name: defineOwnDataProperty( + objectCreate(null), + "value", + name != null ? `${name}` : `Matcher(${call(reToString, regExp, [])})`, - }, + ), }, ); } @@ -239,21 +252,31 @@ export const { } }; - const matcherConstructor = defineOwnProperties( + const matcherConstructor = Object.defineProperties( class extends RegExp { constructor(...args) { return new Matcher(...args); } }, { - name: { value: "Matcher" }, - length: { value: 1 }, + name: defineOwnDataProperty( + Object.create(null), + "value", + "Matcher", + ), + length: defineOwnDataProperty(Object.create(null), "value", 1), }, ); const matcherPrototype = defineOwnProperties( matcherConstructor.prototype, getOwnPropertyDescriptors(Matcher.prototype), - { constructor: { value: matcherConstructor } }, + { + constructor: defineOwnDataProperty( + Object.create(null), + "value", + matcherConstructor, + ), + }, ); return { Matcher: matcherConstructor }; @@ -281,18 +304,44 @@ export const { stringReplaceAll( `${$}`, /[A-Z]/gu, - makeCallable(stringToLowercase), + createCallableFunction(stringToLowercase), ), asciiUppercase: ($) => stringReplaceAll( `${$}`, /[a-z]/gu, - makeCallable(stringToUppercase), + createCallableFunction(stringToUppercase), ), }; })(); +/** + * Returns −0 if the provided argument is "-0"; returns a number + * representing the index if the provided argument is a canonical + * numeric index string; otherwise, returns undefined. + * + * There is no clamping of the numeric index, but note that numbers + * above 2^53 − 1 are not safe nor valid integer indices. + */ +export const canonicalNumericIndexString = ($) => { + if (typeof $ !== "string") { + return UNDEFINED; + } else if ($ === "-0") { + return -0; + } else { + const n = +$; + return $ === `${n}` ? n : UNDEFINED; + } +}; + export const { + /** + * Returns an iterator over the codepoints in the string representation + * of the provided value according to the algorithm of + * `String::[Symbol.iterator]`. + */ + characters, + /** * Returns an iterator over the code units in the string * representation of the provided value. @@ -310,169 +359,49 @@ export const { * representation of the provided value. * * Codepoints which are not valid Unicode scalar values are replaced - * with U+FFFF. + * with U+FFFD. */ scalarValues, - - /** - * Returns the result of converting the provided value to a string of - * scalar values by replacing (unpaired) surrogate values with - * U+FFFD. - */ - scalarValueString, } = (() => { - const { [ITERATOR]: arrayIterator } = arrayPrototype; - const arrayIteratorPrototype = Object.getPrototypeOf( - [][ITERATOR](), + const generateCharacters = function* (character) { + yield character; + }; + const generateCodeUnits = function* (ucsCharacter) { + yield getCodeUnit(ucsCharacter, 0); + }; + const generateCodepoints = function* (character) { + const { allowSurrogates } = this; + const codepoint = getCodepoint(character, 0); + yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000 + ? codepoint + : 0xFFFD; + }; + + const charactersIterator = stringIteratorFunction( + generateCharacters, + "String Character Iterator", ); - const { next: arrayIteratorNext } = arrayIteratorPrototype; - const iteratorPrototype = Object.getPrototypeOf( - arrayIteratorPrototype, + const codeUnitsIterator = arrayIteratorFunction( + generateCodeUnits, + "String Code Unit Iterator", ); - const { [ITERATOR]: stringIterator } = stringPrototype; - const stringIteratorPrototype = Object.getPrototypeOf( - ""[ITERATOR](), + const codepointsIterator = stringIteratorFunction( + bind(generateCodepoints, { allowSurrogates: true }, []), + "String Codepoint Iterator", ); - const { next: stringIteratorNext } = stringIteratorPrototype; - - /** - * An iterator object for iterating over code values (either code - * units or codepoints) in a string. - * - * ※ This class is not exposed, although its methods are (through - * the prototypes of string code value iterator objects). - */ - const StringCodeValueIterator = class extends identity { - #allowSurrogates; - #baseIterator; - - /** - * Constructs a new string code value iterator from the provided - * base iterator. - * - * If the provided base iterator is an array iterator, this is a - * code unit iterator. If the provided iterator is a string - * iterator and surrogates are allowed, this is a codepoint - * iterator. If the provided iterator is a string iterator and - * surrogates are not allowed, this is a scalar value iterator. - */ - constructor(baseIterator, allowSurrogates = true) { - super(objectCreate(stringCodeValueIteratorPrototype)); - this.#allowSurrogates = !!allowSurrogates; - this.#baseIterator = baseIterator; - } - - /** Provides the next code value in the iterator. */ - next() { - const baseIterator = this.#baseIterator; - switch (getPrototype(baseIterator)) { - case arrayIteratorPrototype: { - // The base iterator is iterating over U·C·S characters. - const { - value: ucsCharacter, - done, - } = call(arrayIteratorNext, baseIterator, []); - return done - ? { value: undefined, done: true } - : { value: getCodeUnit(ucsCharacter, 0), done: false }; - } - case stringIteratorPrototype: { - // The base iterator is iterating over Unicode characters. - const { - value: character, - done, - } = call(stringIteratorNext, baseIterator, []); - if (done) { - // The base iterator has been exhausted. - return { value: undefined, done: true }; - } else { - // The base iterator provided a character; yield the - // codepoint. - const codepoint = getCodepoint(character, 0); - return { - value: this.#allowSurrogates || codepoint <= 0xD7FF || - codepoint >= 0xE000 - ? codepoint - : 0xFFFD, - done: false, - }; - } - } - default: { - // Should not be possible! - throw new TypeError( - "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.", - ); - } - } - } - }; - - const { - next: stringCodeValueIteratorNext, - } = StringCodeValueIterator.prototype; - const stringCodeValueIteratorPrototype = objectCreate( - iteratorPrototype, - { - next: { - configurable: true, - enumerable: false, - value: stringCodeValueIteratorNext, - writable: true, - }, - [TO_STRING_TAG]: { - configurable: true, - enumerable: false, - value: "String Code Value Iterator", - writable: false, - }, - }, + const scalarValuesIterator = stringIteratorFunction( + bind(generateCodepoints, { allowSurrogates: false }, []), + "String Scalar Value Iterator", ); - const scalarValueIterablePrototype = { - [ITERATOR]() { - return { - next: bind( - stringCodeValueIteratorNext, - new StringCodeValueIterator( - call(stringIterator, this.source, []), - false, - ), - [], - ), - }; - }, - }; return { - codeUnits: ($) => - new StringCodeValueIterator(call(arrayIterator, `${$}`, [])), - codepoints: ($) => - new StringCodeValueIterator( - call(stringIterator, `${$}`, []), - true, - ), - scalarValues: ($) => - new StringCodeValueIterator( - call(stringIterator, `${$}`, []), - false, - ), - scalarValueString: ($) => - stringFromCodepoints(...objectCreate( - scalarValueIterablePrototype, - { source: { value: `${$}` } }, - )), + characters: ($) => charactersIterator(`${$}`), + codeUnits: ($) => codeUnitsIterator(`${$}`), + codepoints: ($) => codepointsIterator(`${$}`), + scalarValues: ($) => scalarValuesIterator(`${$}`), }; })(); -/** - * Returns an iterator over the codepoints in the string representation - * of the provided value according to the algorithm of - * `String::[Symbol.iterator]`. - */ -export const characters = makeCallable( - stringPrototype[ITERATOR], -); - /** * Returns the character at the provided position in the string * representation of the provided value according to the algorithm of @@ -481,31 +410,102 @@ export const characters = makeCallable( export const getCharacter = ($, pos) => { const codepoint = getCodepoint($, pos); return codepoint == null - ? undefined + ? UNDEFINED : stringFromCodepoints(codepoint); }; -/** - * Returns the code unit at the provided position in the string - * representation of the provided value according to the algorithm of - * `String::charAt`. - */ -export const getCodeUnit = makeCallable(stringPrototype.charCodeAt); +export const { + /** + * Returns the code unit at the provided position in the string + * representation of the provided value according to the algorithm of + * `String::charAt`, except that out‐of‐bounds values return undefined + * in place of nan. + */ + getCodeUnit, + + /** + * Returns a string created from the provided code units. + * + * ※ This is effectively an alias for `String.fromCharCode`, but + * with the same error behaviour as `String.fromCodePoint`. + * + * ☡ This function throws an error if provided with an argument which + * is not an integral number from 0 to FFFF₁₆ inclusive. + */ + stringFromCodeUnits, + + /** + * Returns the result of catenating the string representations of the + * provided values, returning a new string according to the algorithm + * of `String::concat`. + * + * ※ If no arguments are given, this function returns the empty + * string. This is different behaviour than if an explicit undefined + * first argument is given, in which case the resulting string will + * begin with `"undefined"`. + */ + stringCatenate, +} = (() => { + const { fromCharCode } = String; + const { charCodeAt, concat } = String.prototype; + const { + isInteger: isIntegralNumber, + isNaN: isNan, + } = Number; + + return { + getCodeUnit: ($, n) => { + const codeUnit = call(charCodeAt, $, [n]); + return isNan(codeUnit) ? UNDEFINED : codeUnit; + }, + stringCatenate: Object.defineProperties( + (...args) => call(concat, "", args), + { name: { value: "stringCatenate" }, length: { value: 2 } }, + ), + stringFromCodeUnits: Object.defineProperties( + (...codeUnits) => { + for (let index = 0; index < codeUnits.length; ++index) { + // Iterate over each provided code unit and throw if it is + // out of range. + const nextCU = +codeUnits[index]; + if ( + !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF + ) { + // The code unit is not an integral number between 0 and + // 0xFFFF. + throw new RangeError( + `Piscēs: Code unit out of range: ${nextCU}.`, + ); + } else { + // The code unit is acceptable. + /* do nothing */ + } + } + return call(fromCharCode, UNDEFINED, codeUnits); + }, + { name: { value: "stringFromCodeUnits" }, length: { value: 1 } }, + ), + }; +})(); /** * Returns the codepoint at the provided position in the string * representation of the provided value according to the algorithm of * `String::codePointAt`. */ -export const getCodepoint = makeCallable(stringPrototype.codePointAt); +export const getCodepoint = createCallableFunction( + stringPrototype.codePointAt, + { name: "getCodepoint" }, +); /** * Returns the index of the first occurrence of the search string in * the string representation of the provided value according to the * algorithm of `String::indexOf`. */ -export const getFirstSubstringIndex = makeCallable( +export const getFirstSubstringIndex = createCallableFunction( stringPrototype.indexOf, + { name: "getFirstSubstringIndex" }, ); /** @@ -513,10 +513,39 @@ export const getFirstSubstringIndex = makeCallable( * string representation of the provided value according to the * algorithm of `String::lastIndexOf`. */ -export const getLastSubstringIndex = makeCallable( +export const getLastSubstringIndex = createCallableFunction( stringPrototype.lastIndexOf, + { name: "getLastSubstringIndex" }, ); +/** Returns whether the provided value is an array index. */ +export const isArrayIndexString = ($) => { + const value = canonicalNumericIndexString($); + if (value !== UNDEFINED) { + // The provided value is a canonical numeric index string; return + // whether it is in range for array indices. + return sameValue(value, 0) || + value === toLength(value) && value > 0 && value < -1 >>> 0; + } else { + // The provided value is not a canonical numeric index string. + return false; + } +}; + +/** Returns whether the provided value is an integer index string. */ +export const isIntegerIndexString = ($) => { + const value = canonicalNumericIndexString($); + if (value !== UNDEFINED) { + // The provided value is a canonical numeric index string; return + // whether it is in range for integer indices. + return sameValue(value, 0) || + value === toLength(value) && value > 0; + } else { + // The provided value is not a canonical numeric index string. + return false; + } +}; + /** * Returns the result of joining the provided iterable. * @@ -526,49 +555,52 @@ export const getLastSubstringIndex = makeCallable( */ export const join = (() => { const { join: arrayJoin } = arrayPrototype; - const join = ($, separator = ",") => - call(arrayJoin, [...$], [`${separator}`]); + const join = ($, separator) => + call( + arrayJoin, + [...$], + [separator === UNDEFINED ? "," : `${separator}`], + ); return join; })(); -export const { - /** - * Returns a string created from the raw value of the tagged template - * literal. - * - * ※ This is an alias for `String.raw`. - */ - raw: rawString, - - /** - * Returns a string created from the provided code units. - * - * ※ This is an alias for `String.fromCharCode`. - */ - fromCharCode: stringFromCodeUnits, +/** + * Returns a string created from the raw value of the tagged template + * literal. + * + * ※ This is effectively an alias for `String.raw`. + */ +export const rawString = createArrowFunction(String.raw, { + name: "rawString", +}); - /** - * Returns a string created from the provided codepoints. - * - * ※ This is an alias for `String.fromCodePoint`. - */ - fromCodePoint: stringFromCodepoints, -} = String; +/** + * Returns a string created from the provided codepoints. + * + * ※ This is effectively an alias for `String.fromCodePoint`. + * + * ☡ This function throws an error if provided with an argument which + * is not an integral number from 0 to 10FFFF₁₆ inclusive. + */ +export const stringFromCodepoints = createArrowFunction( + String.fromCodePoint, + { name: "stringFromCodepoints" }, +); /** - * Returns the result of splitting the provided value on A·S·C·I·I + * Returns the result of splitting the provided value on Ascii * whitespace. */ -export const splitOnASCIIWhitespace = ($) => - stringSplit(stripAndCollapseASCIIWhitespace($), " "); +export const splitOnAsciiWhitespace = ($) => + stringSplit(stripAndCollapseAsciiWhitespace($), " "); /** * Returns the result of splitting the provided value on commas, - * trimming A·S·C·I·I whitespace from the resulting tokens. + * trimming Ascii whitespace from the resulting tokens. */ export const splitOnCommas = ($) => stringSplit( - stripLeadingAndTrailingASCIIWhitespace( + stripLeadingAndTrailingAsciiWhitespace( stringReplaceAll( `${$}`, /[\n\r\t\f ]*,[\n\r\t\f ]*/gu, @@ -578,47 +610,53 @@ export const splitOnCommas = ($) => ",", ); -/** - * Returns the result of catenating the string representations of the - * provided values, returning a new string according to the algorithm - * of `String::concat`. - */ -export const stringCatenate = makeCallable(stringPrototype.concat); - /** * Returns whether the string representation of the provided value ends * with the provided search string according to the algorithm of * `String::endsWith`. */ -export const stringEndsWith = makeCallable(stringPrototype.endsWith); +export const stringEndsWith = createCallableFunction( + stringPrototype.endsWith, + { name: "stringEndsWith" }, +); /** * Returns whether the string representation of the provided value * contains the provided search string according to the algorithm of * `String::includes`. */ -export const stringIncludes = makeCallable(stringPrototype.includes); +export const stringIncludes = createCallableFunction( + stringPrototype.includes, + { name: "stringIncludes" }, +); /** * Returns the result of matching the string representation of the * provided value with the provided matcher according to the algorithm * of `String::match`. */ -export const stringMatch = makeCallable(stringPrototype.match); +export const stringMatch = createCallableFunction( + stringPrototype.match, + { name: "stringMatch" }, +); /** * Returns the result of matching the string representation of the * provided value with the provided matcher according to the algorithm * of `String::matchAll`. */ -export const stringMatchAll = makeCallable(stringPrototype.matchAll); +export const stringMatchAll = createCallableFunction( + stringPrototype.matchAll, + { name: "stringMatchAll" }, +); /** * Returns the normalized form of the string representation of the - * provided value according to the algorithm of `String::matchAll`. + * provided value according to the algorithm of `String::normalize`. */ -export const stringNormalize = makeCallable( +export const stringNormalize = createCallableFunction( stringPrototype.normalize, + { name: "stringNormalize" }, ); /** @@ -626,36 +664,49 @@ export const stringNormalize = makeCallable( * of the provided value padded until it is the desired length * according to the algorithm of `String::padEnd`. */ -export const stringPadEnd = makeCallable(stringPrototype.padEnd); +export const stringPadEnd = createCallableFunction( + stringPrototype.padEnd, + { name: "stringPadEnd" }, +); /** * Returns the result of padding the start of the string representation * of the provided value padded until it is the desired length * according to the algorithm of `String::padStart`. */ -export const stringPadStart = makeCallable(stringPrototype.padStart); +export const stringPadStart = createCallableFunction( + stringPrototype.padStart, + { name: "stringPadStart" }, +); /** * Returns the result of repeating the string representation of the * provided value the provided number of times according to the * algorithm of `String::repeat`. */ -export const stringRepeat = makeCallable(stringPrototype.repeat); +export const stringRepeat = createCallableFunction( + stringPrototype.repeat, + { name: "stringRepeat" }, +); /** * Returns the result of replacing the string representation of the * provided value with the provided replacement, using the provided * matcher and according to the algorithm of `String::replace`. */ -export const stringReplace = makeCallable(stringPrototype.replace); +export const stringReplace = createCallableFunction( + stringPrototype.replace, + { name: "stringReplace" }, +); /** * Returns the result of replacing the string representation of the * provided value with the provided replacement, using the provided * matcher and according to the algorithm of `String::replaceAll`. */ -export const stringReplaceAll = makeCallable( +export const stringReplaceAll = createCallableFunction( stringPrototype.replaceAll, + { name: "stringReplaceAll" }, ); /** @@ -663,45 +714,60 @@ export const stringReplaceAll = makeCallable( * provided value using the provided matcher and according to the * algorithm of `String::search`. */ -export const stringSearch = makeCallable(stringPrototype.search); +export const stringSearch = createCallableFunction( + stringPrototype.search, + { name: "stringSearch" }, +); /** * Returns a slice of the string representation of the provided value * according to the algorithm of `String::slice`. */ -export const stringSlice = makeCallable(stringPrototype.slice); +export const stringSlice = createCallableFunction( + stringPrototype.slice, + { name: "stringSlice" }, +); /** * Returns the result of splitting of the string representation of the * provided value on the provided separator according to the algorithm * of `String::split`. */ -export const stringSplit = makeCallable(stringPrototype.split); +export const stringSplit = createCallableFunction( + stringPrototype.split, + { name: "stringSplit" }, +); /** * Returns whether the string representation of the provided value * starts with the provided search string according to the algorithm of * `String::startsWith`. */ -export const stringStartsWith = makeCallable( +export const stringStartsWith = createCallableFunction( stringPrototype.startsWith, + { name: "stringStartsWith" }, ); /** - * Returns the `[[StringData]]` of the provided value. + * Returns the value of the provided string. + * + * ※ This is effectively an alias for the `String::valueOf`. * - * ☡ This function will throw if the provided object does not have a - * `[[StringData]]` internal slot. + * ☡ This function throws if the provided argument is not a string and + * does not have a `[[StringData]]` slot. */ -export const stringValue = makeCallable(stringPrototype.valueOf); +export const stringValue = createCallableFunction( + stringPrototype.valueOf, + { name: "stringValue" }, +); /** - * Returns the result of stripping leading and trailing A·S·C·I·I - * whitespace from the provided value and collapsing other A·S·C·I·I + * Returns the result of stripping leading and trailing Ascii + * whitespace from the provided value and collapsing other Ascii * whitespace in the string representation of the provided value. */ -export const stripAndCollapseASCIIWhitespace = ($) => - stripLeadingAndTrailingASCIIWhitespace( +export const stripAndCollapseAsciiWhitespace = ($) => + stripLeadingAndTrailingAsciiWhitespace( stringReplaceAll( `${$}`, /[\n\r\t\f ]+/gu, @@ -710,17 +776,29 @@ export const stripAndCollapseASCIIWhitespace = ($) => ); /** - * Returns the result of stripping leading and trailing A·S·C·I·I + * Returns the result of stripping leading and trailing Ascii * whitespace from the string representation of the provided value. */ -export const stripLeadingAndTrailingASCIIWhitespace = ($) => +export const stripLeadingAndTrailingAsciiWhitespace = ($) => call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1]; /** * Returns a substring of the string representation of the provided * value according to the algorithm of `String::substring`. */ -export const substring = makeCallable(stringPrototype.substring); +export const substring = createCallableFunction( + stringPrototype.substring, +); + +/** + * Returns the result of converting the provided value to a string of + * scalar values by replacing (unpaired) surrogate values with + * U+FFFD. + */ +export const toScalarValueString = createCallableFunction( + String.prototype.toWellFormed, + { name: "toScalarValueString" }, +); /** * Returns the result of converting the provided value to a string.