]> Lady’s Gitweb - Pisces/blob - string.js
Add iterator function builders; use in string.js
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 arrayIteratorFunction,
13 stringIteratorFunction,
14 } from "./iterable.js";
15 import {
16 defineOwnProperties,
17 getOwnPropertyDescriptors,
18 getPrototype,
19 objectCreate,
20 setPrototype,
21 } from "./object.js";
22 import { ITERATOR } from "./value.js";
23
24 const RE = RegExp;
25 const { prototype: rePrototype } = RE;
26 const { prototype: arrayPrototype } = Array;
27 const { prototype: stringPrototype } = String;
28
29 const { exec: reExec } = rePrototype;
30
31 export const {
32 /**
33 * A `RegExp`like object which only matches entire strings, and may
34 * have additional constraints specified.
35 *
36 * Matchers are callable objects and will return true if they are
37 * called with a string that they match, and false otherwise.
38 * Matchers will always return false if called with nonstrings,
39 * although other methods like `::exec` coerce their arguments and
40 * may still return true.
41 */
42 Matcher,
43 } = (() => {
44 const { toString: reToString } = rePrototype;
45 const getDotAll =
46 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
47 const getFlags =
48 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
49 const getGlobal =
50 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
51 const getHasIndices =
52 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
53 const getIgnoreCase =
54 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
55 const getMultiline =
56 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
57 const getSource =
58 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
59 const getSticky =
60 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
61 const getUnicode =
62 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
63
64 const Matcher = class extends identity {
65 #constraint;
66 #regExp;
67
68 /**
69 * Constructs a new `Matcher` from the provided source.
70 *
71 * If the provided source is a regular expression, then it must
72 * have the unicode flag set. Otherwise, it is interpreted as the
73 * string source of a regular expression with the unicode flag set.
74 *
75 * Other flags are taken from the provided regular expression
76 * object, if any are present.
77 *
78 * A name for the matcher may be provided as the second argument.
79 *
80 * A callable constraint on acceptable inputs may be provided as a
81 * third argument. If provided, it will be called with three
82 * arguments whenever a match appears successful: first, the string
83 * being matched, second, the match result, and third, the
84 * `Matcher` object itself. If the return value of this call is
85 * falsey, then the match will be considered a failure.
86 *
87 * ☡ If the provided source regular expression uses nongreedy
88 * quantifiers, it may not match the whole string even if a match
89 * with the whole string is possible. Surround the regular
90 * expression with `^(?:` and `)$` if you don’t want nongreedy
91 * regular expressions to fail when shorter matches are possible.
92 */
93 constructor(source, name = undefined, constraint = null) {
94 super(
95 ($) => {
96 if (typeof $ !== "string") {
97 // The provided value is not a string.
98 return false;
99 } else {
100 // The provided value is a string. Set the `.lastIndex` of
101 // the regular expression to 0 and see if the first attempt
102 // at a match matches the whole string and passes the
103 // provided constraint (if present).
104 regExp.lastIndex = 0;
105 const result = call(reExec, regExp, [$]);
106 return result?.[0] === $ &&
107 (constraint === null || constraint($, result, this));
108 }
109 },
110 );
111 const regExp = this.#regExp = (() => {
112 try {
113 call(reExec, source, [""]); // throws if source not a RegExp
114 } catch {
115 return new RE(`${source}`, "u");
116 }
117 const unicode = call(getUnicode, source, []);
118 if (!unicode) {
119 // The provided regular expression does not have a unicode
120 // flag.
121 throw new TypeError(
122 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
123 );
124 } else {
125 // The provided regular expression has a unicode flag.
126 return new RE(source);
127 }
128 })();
129 if (constraint !== null && typeof constraint !== "function") {
130 throw new TypeError(
131 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
132 );
133 } else {
134 this.#constraint = constraint;
135 return defineOwnProperties(
136 setPrototype(this, matcherPrototype),
137 {
138 lastIndex: {
139 configurable: false,
140 enumerable: false,
141 value: 0,
142 writable: false,
143 },
144 name: {
145 value: name != null
146 ? `${name}`
147 : `Matcher(${call(reToString, regExp, [])})`,
148 },
149 },
150 );
151 }
152 }
153
154 /** Gets whether the dot‐all flag is present on this `Matcher`. */
155 get dotAll() {
156 return call(getDotAll, this.#regExp, []);
157 }
158
159 /**
160 * Executes this `Matcher` on the provided value and returns the
161 * result if there is a match, or null otherwise.
162 *
163 * Matchers only match if they can match the entire value on the
164 * first attempt.
165 *
166 * ☡ The match result returned by this method will be the same as
167 * that passed to the constraint function—and may have been
168 * modified by said function prior to being returned.
169 */
170 exec($) {
171 const regExp = this.#regExp;
172 const constraint = this.#constraint;
173 const string = `${$}`;
174 regExp.lastIndex = 0;
175 const result = call(reExec, regExp, [string]);
176 if (
177 result?.[0] === string &&
178 (constraint === null || constraint(string, result, this))
179 ) {
180 // The entire string was matched and the constraint, if
181 // present, returned a truthy value.
182 return result;
183 } else {
184 // The entire string was not matched or the constraint returned
185 // a falsey value.
186 return null;
187 }
188 }
189
190 /**
191 * Gets the flags present on this `Matcher`.
192 *
193 * ※ This needs to be defined because the internal `RegExp` object
194 * may have flags which are not yet recognized by ♓🌟 Piscēs.
195 */
196 get flags() {
197 return call(getFlags, this.#regExp, []);
198 }
199
200 /** Gets whether the global flag is present on this `Matcher`. */
201 get global() {
202 return call(getGlobal, this.#regExp, []);
203 }
204
205 /**
206 * Gets whether the has‐indices flag is present on this `Matcher`.
207 */
208 get hasIndices() {
209 return call(getHasIndices, this.#regExp, []);
210 }
211
212 /**
213 * Gets whether the ignore‐case flag is present on this `Matcher`.
214 */
215 get ignoreCase() {
216 return call(getIgnoreCase, this.#regExp, []);
217 }
218
219 /**
220 * Gets whether the multiline flag is present on this `Matcher`.
221 */
222 get multiline() {
223 return call(getMultiline, this.#regExp, []);
224 }
225
226 /** Gets the regular expression source for this `Matcher`. */
227 get source() {
228 return call(getSource, this.#regExp, []);
229 }
230
231 /** Gets whether the sticky flag is present on this `Matcher`. */
232 get sticky() {
233 return call(getSticky, this.#regExp, []);
234 }
235
236 /**
237 * Gets whether the unicode flag is present on this `Matcher`.
238 *
239 * ※ This will always be true.
240 */
241 get unicode() {
242 return call(getUnicode, this.#regExp, []);
243 }
244 };
245
246 const matcherConstructor = defineOwnProperties(
247 class extends RegExp {
248 constructor(...args) {
249 return new Matcher(...args);
250 }
251 },
252 {
253 name: { value: "Matcher" },
254 length: { value: 1 },
255 },
256 );
257 const matcherPrototype = defineOwnProperties(
258 matcherConstructor.prototype,
259 getOwnPropertyDescriptors(Matcher.prototype),
260 { constructor: { value: matcherConstructor } },
261 );
262
263 return { Matcher: matcherConstructor };
264 })();
265
266 export const {
267 /**
268 * Returns the result of converting the provided value to A·S·C·I·I
269 * lowercase.
270 */
271 asciiLowercase,
272
273 /**
274 * Returns the result of converting the provided value to A·S·C·I·I
275 * uppercase.
276 */
277 asciiUppercase,
278 } = (() => {
279 const {
280 toLowerCase: stringToLowercase,
281 toUpperCase: stringToUppercase,
282 } = stringPrototype;
283 return {
284 asciiLowercase: ($) =>
285 stringReplaceAll(
286 `${$}`,
287 /[A-Z]/gu,
288 makeCallable(stringToLowercase),
289 ),
290 asciiUppercase: ($) =>
291 stringReplaceAll(
292 `${$}`,
293 /[a-z]/gu,
294 makeCallable(stringToUppercase),
295 ),
296 };
297 })();
298
299 export const {
300 /**
301 * Returns an iterator over the code units in the string
302 * representation of the provided value.
303 */
304 codeUnits,
305
306 /**
307 * Returns an iterator over the codepoints in the string
308 * representation of the provided value.
309 */
310 codepoints,
311
312 /**
313 * Returns an iterator over the scalar values in the string
314 * representation of the provided value.
315 *
316 * Codepoints which are not valid Unicode scalar values are replaced
317 * with U+FFFD.
318 */
319 scalarValues,
320
321 /**
322 * Returns the result of converting the provided value to a string of
323 * scalar values by replacing (unpaired) surrogate values with
324 * U+FFFD.
325 */
326 scalarValueString,
327 } = (() => {
328 const generateCodeUnits = function* (ucsCharacter) {
329 yield getCodeUnit(ucsCharacter, 0);
330 };
331 const generateCodepoints = function* (character) {
332 const { allowSurrogates } = this;
333 const codepoint = getCodepoint(character, 0);
334 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
335 ? codepoint
336 : 0xFFFD;
337 };
338
339 const codeUnitsIterator = arrayIteratorFunction(
340 generateCodeUnits,
341 "String Code Unit Iterator",
342 );
343 const codepointsIterator = stringIteratorFunction(
344 bind(generateCodepoints, { allowSurrogates: true }, []),
345 "String Codepoint Iterator",
346 );
347 const scalarValuesIterator = stringIteratorFunction(
348 bind(generateCodepoints, { allowSurrogates: false }, []),
349 "String Scalar Value Iterator",
350 );
351 const {
352 next: scalarValuesNext,
353 } = getPrototype(scalarValuesIterator(""));
354 const scalarValueIterablePrototype = {
355 [ITERATOR]() {
356 return {
357 next: bind(
358 scalarValuesNext,
359 scalarValuesIterator(this.source),
360 [],
361 ),
362 };
363 },
364 };
365
366 return {
367 codeUnits: ($) => codeUnitsIterator(`${$}`),
368 codepoints: ($) => codepointsIterator(`${$}`),
369 scalarValues: ($) => scalarValuesIterator(`${$}`),
370 scalarValueString: ($) =>
371 stringFromCodepoints(...objectCreate(
372 scalarValueIterablePrototype,
373 { source: { value: `${$}` } },
374 )),
375 };
376 })();
377
378 /**
379 * Returns an iterator over the codepoints in the string representation
380 * of the provided value according to the algorithm of
381 * `String::[Symbol.iterator]`.
382 */
383 export const characters = makeCallable(
384 stringPrototype[ITERATOR],
385 );
386
387 /**
388 * Returns the character at the provided position in the string
389 * representation of the provided value according to the algorithm of
390 * `String::codePointAt`.
391 */
392 export const getCharacter = ($, pos) => {
393 const codepoint = getCodepoint($, pos);
394 return codepoint == null
395 ? undefined
396 : stringFromCodepoints(codepoint);
397 };
398
399 /**
400 * Returns the code unit at the provided position in the string
401 * representation of the provided value according to the algorithm of
402 * `String::charAt`.
403 */
404 export const getCodeUnit = makeCallable(stringPrototype.charCodeAt);
405
406 /**
407 * Returns the codepoint at the provided position in the string
408 * representation of the provided value according to the algorithm of
409 * `String::codePointAt`.
410 */
411 export const getCodepoint = makeCallable(stringPrototype.codePointAt);
412
413 /**
414 * Returns the index of the first occurrence of the search string in
415 * the string representation of the provided value according to the
416 * algorithm of `String::indexOf`.
417 */
418 export const getFirstSubstringIndex = makeCallable(
419 stringPrototype.indexOf,
420 );
421
422 /**
423 * Returns the index of the last occurrence of the search string in the
424 * string representation of the provided value according to the
425 * algorithm of `String::lastIndexOf`.
426 */
427 export const getLastSubstringIndex = makeCallable(
428 stringPrototype.lastIndexOf,
429 );
430
431 /**
432 * Returns the result of joining the provided iterable.
433 *
434 * If no separator is provided, it defaults to ",".
435 *
436 * If a value is nullish, it will be stringified as the empty string.
437 */
438 export const join = (() => {
439 const { join: arrayJoin } = arrayPrototype;
440 const join = ($, separator = ",") =>
441 call(arrayJoin, [...$], [`${separator}`]);
442 return join;
443 })();
444
445 export const {
446 /**
447 * Returns a string created from the raw value of the tagged template
448 * literal.
449 *
450 * ※ This is an alias for `String.raw`.
451 */
452 raw: rawString,
453
454 /**
455 * Returns a string created from the provided code units.
456 *
457 * ※ This is an alias for `String.fromCharCode`.
458 */
459 fromCharCode: stringFromCodeUnits,
460
461 /**
462 * Returns a string created from the provided codepoints.
463 *
464 * ※ This is an alias for `String.fromCodePoint`.
465 */
466 fromCodePoint: stringFromCodepoints,
467 } = String;
468
469 /**
470 * Returns the result of splitting the provided value on A·S·C·I·I
471 * whitespace.
472 */
473 export const splitOnASCIIWhitespace = ($) =>
474 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
475
476 /**
477 * Returns the result of splitting the provided value on commas,
478 * trimming A·S·C·I·I whitespace from the resulting tokens.
479 */
480 export const splitOnCommas = ($) =>
481 stringSplit(
482 stripLeadingAndTrailingASCIIWhitespace(
483 stringReplaceAll(
484 `${$}`,
485 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
486 ",",
487 ),
488 ),
489 ",",
490 );
491
492 /**
493 * Returns the result of catenating the string representations of the
494 * provided values, returning a new string according to the algorithm
495 * of `String::concat`.
496 */
497 export const stringCatenate = makeCallable(stringPrototype.concat);
498
499 /**
500 * Returns whether the string representation of the provided value ends
501 * with the provided search string according to the algorithm of
502 * `String::endsWith`.
503 */
504 export const stringEndsWith = makeCallable(stringPrototype.endsWith);
505
506 /**
507 * Returns whether the string representation of the provided value
508 * contains the provided search string according to the algorithm of
509 * `String::includes`.
510 */
511 export const stringIncludes = makeCallable(stringPrototype.includes);
512
513 /**
514 * Returns the result of matching the string representation of the
515 * provided value with the provided matcher according to the algorithm
516 * of `String::match`.
517 */
518 export const stringMatch = makeCallable(stringPrototype.match);
519
520 /**
521 * Returns the result of matching the string representation of the
522 * provided value with the provided matcher according to the algorithm
523 * of `String::matchAll`.
524 */
525 export const stringMatchAll = makeCallable(stringPrototype.matchAll);
526
527 /**
528 * Returns the normalized form of the string representation of the
529 * provided value according to the algorithm of `String::matchAll`.
530 */
531 export const stringNormalize = makeCallable(
532 stringPrototype.normalize,
533 );
534
535 /**
536 * Returns the result of padding the end of the string representation
537 * of the provided value padded until it is the desired length
538 * according to the algorithm of `String::padEnd`.
539 */
540 export const stringPadEnd = makeCallable(stringPrototype.padEnd);
541
542 /**
543 * Returns the result of padding the start of the string representation
544 * of the provided value padded until it is the desired length
545 * according to the algorithm of `String::padStart`.
546 */
547 export const stringPadStart = makeCallable(stringPrototype.padStart);
548
549 /**
550 * Returns the result of repeating the string representation of the
551 * provided value the provided number of times according to the
552 * algorithm of `String::repeat`.
553 */
554 export const stringRepeat = makeCallable(stringPrototype.repeat);
555
556 /**
557 * Returns the result of replacing the string representation of the
558 * provided value with the provided replacement, using the provided
559 * matcher and according to the algorithm of `String::replace`.
560 */
561 export const stringReplace = makeCallable(stringPrototype.replace);
562
563 /**
564 * Returns the result of replacing the string representation of the
565 * provided value with the provided replacement, using the provided
566 * matcher and according to the algorithm of `String::replaceAll`.
567 */
568 export const stringReplaceAll = makeCallable(
569 stringPrototype.replaceAll,
570 );
571
572 /**
573 * Returns the result of searching the string representation of the
574 * provided value using the provided matcher and according to the
575 * algorithm of `String::search`.
576 */
577 export const stringSearch = makeCallable(stringPrototype.search);
578
579 /**
580 * Returns a slice of the string representation of the provided value
581 * according to the algorithm of `String::slice`.
582 */
583 export const stringSlice = makeCallable(stringPrototype.slice);
584
585 /**
586 * Returns the result of splitting of the string representation of the
587 * provided value on the provided separator according to the algorithm
588 * of `String::split`.
589 */
590 export const stringSplit = makeCallable(stringPrototype.split);
591
592 /**
593 * Returns whether the string representation of the provided value
594 * starts with the provided search string according to the algorithm of
595 * `String::startsWith`.
596 */
597 export const stringStartsWith = makeCallable(
598 stringPrototype.startsWith,
599 );
600
601 /**
602 * Returns the `[[StringData]]` of the provided value.
603 *
604 * ☡ This function will throw if the provided object does not have a
605 * `[[StringData]]` internal slot.
606 */
607 export const stringValue = makeCallable(stringPrototype.valueOf);
608
609 /**
610 * Returns the result of stripping leading and trailing A·S·C·I·I
611 * whitespace from the provided value and collapsing other A·S·C·I·I
612 * whitespace in the string representation of the provided value.
613 */
614 export const stripAndCollapseASCIIWhitespace = ($) =>
615 stripLeadingAndTrailingASCIIWhitespace(
616 stringReplaceAll(
617 `${$}`,
618 /[\n\r\t\f ]+/gu,
619 " ",
620 ),
621 );
622
623 /**
624 * Returns the result of stripping leading and trailing A·S·C·I·I
625 * whitespace from the string representation of the provided value.
626 */
627 export const stripLeadingAndTrailingASCIIWhitespace = ($) =>
628 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
629
630 /**
631 * Returns a substring of the string representation of the provided
632 * value according to the algorithm of `String::substring`.
633 */
634 export const substring = makeCallable(stringPrototype.substring);
635
636 /**
637 * Returns the result of converting the provided value to a string.
638 *
639 * ☡ This method throws for symbols and other objects without a string
640 * representation.
641 */
642 export const toString = ($) => `${$}`;
This page took 0.105747 seconds and 5 git commands to generate.