From: Lady Date: Sat, 23 Jul 2022 04:27:35 +0000 (-0700) Subject: Add Matcher class for whole‐string matching X-Git-Tag: 0.1.0~3 X-Git-Url: https://git.ladys.computer/Pisces/commitdiff_plain/8669d6cba4a0e88ba9fb0e4f9f025bcb417c3cbc?ds=sidebyside Add Matcher class for whole‐string matching --- diff --git a/string.js b/string.js index d65d294..fb41e62 100644 --- a/string.js +++ b/string.js @@ -8,7 +8,188 @@ // file, You can obtain one at . import { bind, call, identity, makeCallable } from "./function.js"; -import { getPrototype, objectCreate } from "./object.js"; +import { + defineOwnProperties, + getPrototype, + objectCreate, + setPrototype, +} from "./object.js"; + +export const { + /** + * A RegExp·like object which only matches entire strings. + * + * Matchers are callable objects and will return true if they are + * called with a string that they match, and false otherwise. + * Matchers will always return false if called with nonstrings, + * although other methods like `exec` may still return true. + */ + Matcher, +} = (() => { + const RE = RegExp; + const { prototype: rePrototype } = RE; + const { exec: reExec, toString: reToString } = rePrototype; + const getDotAll = + Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get; + const getGlobal = + Object.getOwnPropertyDescriptor(rePrototype, "global").get; + const getHasIndices = + Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get; + const getIgnoreCase = + Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get; + const getMultiline = + Object.getOwnPropertyDescriptor(rePrototype, "multiline").get; + const getSource = + Object.getOwnPropertyDescriptor(rePrototype, "source").get; + const getSticky = + Object.getOwnPropertyDescriptor(rePrototype, "sticky").get; + const getUnicode = + Object.getOwnPropertyDescriptor(rePrototype, "unicode").get; + + const Matcher = class extends identity { + #regExp; + + /** + * Constructs a new Matcher from the provided source. + * + * If the provided source is a regular expression, then it must + * have the unicode flag set. Otherwise, it is interpreted as the + * string source of a regular expression with the unicode flag set. + * + * Other flags are taken from the provided regular expression + * object, if any are present. + * + * A name for the matcher may be provided as the second argument. + * + * ☡ If the provided source regular expression uses nongreedy + * quantifiers, it may not match the whole string even if a match + * with the whole string is possible. Surround the regular + * expression with `^(?:` and `)$` if you don’t want nongreedy + * regular expressions to fail when shorter matches are possible. + */ + constructor(source, name = undefined) { + super( + ($) => { + if (typeof $ !== "string") { + // The provided value is not a string. + return false; + } else { + // The provided value is a string. Set the `lastIndex` of + // the regular expression to 0 and see if the first attempt + // at a match matches the whole string. + regExp.lastIndex = 0; + return call(reExec, regExp, [$])?.[0] === $; + } + }, + ); + const regExp = this.#regExp = (() => { + try { + call(reExec, source, [""]); // throws if source not a RegExp + } catch { + return new RE(`${source}`, "u"); + } + const unicode = call(getUnicode, source, []); + if (!unicode) { + // The provided regular expression does not have a unicode + // flag. + throw new TypeError( + `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`, + ); + } else { + // The provided regular expression has a unicode flag. + return new RE(source); + } + })(); + return defineOwnProperties( + setPrototype(this, matcherPrototype), + { + lastIndex: { + configurable: false, + enumerable: false, + value: 0, + writable: false, + }, + name: { + value: name != null + ? `${name}` + : `Matcher(${call(reToString, regExp, [])})`, + }, + }, + ); + } + + /** Gets whether the dotAll flag is present on this Matcher. */ + get dotAll() { + return call(getDotAll, this.#regExp, []); + } + + /** + * Executes this Matcher on the provided value and returns the + * result if there is a match, or null otherwise. + * + * Matchers only match if they can match the entire value on the + * first attempt. + */ + exec($) { + const regExp = this.#regExp; + const string = `${$}`; + regExp.lastIndex = 0; + const result = call(reExec, regExp, [string]); + if (result?.[0] === string) { + // The entire string was matched. + return result; + } else { + // The entire string was not matched. + return null; + } + } + + /** Gets whether the global flag is present on this Matcher. */ + get global() { + return call(getGlobal, this.#regExp, []); + } + + /** Gets whether the hasIndices flag is present on this Matcher. */ + get hasIndices() { + return call(getHasIndices, this.#regExp, []); + } + + /** Gets whether the ignoreCase flag is present on this Matcher. */ + get ignoreCase() { + return call(getIgnoreCase, this.#regExp, []); + } + + /** Gets whether the multiline flag is present on this Matcher. */ + get multiline() { + return call(getMultiline, this.#regExp, []); + } + + /** Gets the regular expression source for this Matcher. */ + get source() { + return call(getSource, this.#regExp, []); + } + + /** Gets whether the sticky flag is present on this Matcher. */ + get sticky() { + return call(getSticky, this.#regExp, []); + } + + /** + * Gets whether the unicode flag is present on this Matcher. + * + * ※ This will always be true. + */ + get unicode() { + return call(getUnicode, this.#regExp, []); + } + }; + const matcherPrototype = setPrototype( + Matcher.prototype, + rePrototype, + ); + + return { Matcher }; +})(); export const { /** diff --git a/string.test.js b/string.test.js index 5d713f0..f70faf6 100644 --- a/string.test.js +++ b/string.test.js @@ -8,8 +8,10 @@ // file, You can obtain one at . import { + assert, assertEquals, assertStrictEquals, + assertThrows, describe, it, } from "./dev-deps.js"; @@ -20,6 +22,7 @@ import { codeUnits, getCharacter, join, + Matcher, scalarValues, scalarValueString, splitOnASCIIWhitespace, @@ -28,6 +31,163 @@ import { stripLeadingAndTrailingASCIIWhitespace, } from "./string.js"; +describe("Matcher", () => { + it("[[Construct]] accepts a string first argument", () => { + assert(new Matcher("")); + }); + + it("[[Construct]] accepts a unicode regular expression first argument", () => { + assert(new Matcher(/(?:)/u)); + }); + + it("[[Construct]] throws with a non·unicode regular expression first argument", () => { + assertThrows(() => new Matcher(/(?:)/)); + }); + + it("[[Construct]] creates a callable object", () => { + assertStrictEquals(typeof new Matcher(""), "function"); + }); + + it("[[Construct]] creates a new Matcher", () => { + assertStrictEquals( + Object.getPrototypeOf(new Matcher("")), + Matcher.prototype, + ); + }); + + it("[[Construct]] creates an object which inherits from RegExp", () => { + assert(new Matcher("") instanceof RegExp); + }); + + describe("::dotAll", () => { + it("[[Get]] returns true when the dotAll flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/su).dotAll, true); + }); + + it("[[Get]] returns false when the dotAll flag is not present", () => { + assertStrictEquals(new Matcher(/(?:)/u).dotAll, false); + }); + }); + + describe("::exec", () => { + it("[[Call]] returns the match object given a complete match", () => { + assertEquals( + [...new Matcher(/.(?(?:.(?=.))*)(.)?/u).exec("success")], + ["success", "ucces", "s"], + ); + }); + + it("[[Call]] returns null given a partial match", () => { + assertEquals(new Matcher("").exec("failure"), null); + }); + }); + + describe("::global", () => { + it("[[Get]] returns true when the global flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/gu).global, true); + }); + + it("[[Get]] returns false when the global flag is not present", () => { + assertStrictEquals(new Matcher(/(?:)/u).global, false); + }); + }); + + describe("::hasIndices", () => { + it("[[Get]] returns true when the hasIndices flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/du).hasIndices, true); + }); + + it("[[Get]] returns false when the hasIndices flag is not present", () => { + assertStrictEquals(new Matcher(/(?:)/u).hasIndices, false); + }); + }); + + describe("::ignoreCase", () => { + it("[[Get]] returns true when the ignoreCase flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/iu).ignoreCase, true); + }); + + it("[[Get]] returns false when the ignoreCase flag is not present", () => { + assertStrictEquals(new Matcher(/(?:)/u).ignoreCase, false); + }); + }); + + describe("::multiline", () => { + it("[[Get]] returns true when the multiline flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/mu).multiline, true); + }); + + it("[[Get]] returns false when the multiline flag is not present", () => { + assertStrictEquals(new Matcher(/(?:)/u).multiline, false); + }); + }); + + describe("::source", () => { + it("[[Get]] returns the RegExp source", () => { + assertStrictEquals(new Matcher("").source, "(?:)"); + assertStrictEquals(new Matcher(/.*/su).source, ".*"); + }); + }); + + describe("::sticky", () => { + it("[[Get]] returns true when the sticky flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/uy).sticky, true); + }); + + it("[[Get]] returns false when the sticky flag is not present", () => { + assertStrictEquals(new Matcher(/(?:)/u).sticky, false); + }); + }); + + describe("::unicode", () => { + it("[[Get]] returns true when the unicode flag is present", () => { + assertStrictEquals(new Matcher(/(?:)/u).unicode, true); + }); + }); + + describe("~", () => { + it("[[Call]] returns true for a complete match", () => { + assertStrictEquals(new Matcher("")(""), true); + assertStrictEquals(new Matcher(/.*/su)("success\nyay"), true); + }); + + it("[[Call]] returns false for a partial match", () => { + assertStrictEquals(new Matcher("")("failure"), false); + assertStrictEquals(new Matcher(/.*/u)("failure\nno"), false); + }); + }); + + describe("~lastIndex", () => { + it("[[Get]] returns zero", () => { + assertStrictEquals(new Matcher("").lastIndex, 0); + }); + + it("[[Set]] fails", () => { + assertThrows(() => (new Matcher("").lastIndex = 1)); + }); + }); + + describe("~length", () => { + it("[[Get]] returns one", () => { + assertStrictEquals(new Matcher("").length, 1); + }); + }); + + describe("~name", () => { + it("[[Get]] wraps the stringified regular expression if no name was provided", () => { + assertStrictEquals(new Matcher("").name, "Matcher(/(?:)/u)"); + assertStrictEquals( + new Matcher(/.*/gsu).name, + "Matcher(/.*/gsu)", + ); + }); + + it("[[Get]] uses the provided name if one was provided", () => { + assertStrictEquals(new Matcher("", "success").name, "success"); + }); + }); +}); + describe("asciiLowercase", () => { it("[[Call]] lowercases (just) A·S·C·I·I letters", () => { assertStrictEquals(asciiLowercase("aBſÆss FtɁɂß"), "abſÆss ftɁɂß");