]> Lady’s Gitweb - Pisces/blob - string.js
774261dbdf90f8a8e49e38e51f4e61069d318a6a
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import {
11 bind,
12 call,
13 createArrowFunction,
14 createCallableFunction,
15 identity,
16 } from "./function.js";
17 import {
18 arrayIteratorFunction,
19 stringIteratorFunction,
20 } from "./iterable.js";
21 import {
22 defineOwnProperties,
23 getOwnPropertyDescriptors,
24 getPrototype,
25 objectCreate,
26 setPrototype,
27 } from "./object.js";
28 import { ITERATOR } from "./value.js";
29
30 const RE = RegExp;
31 const { prototype: rePrototype } = RE;
32 const { prototype: arrayPrototype } = Array;
33 const { prototype: stringPrototype } = String;
34
35 const { exec: reExec } = rePrototype;
36
37 export const {
38 /**
39 * A `RegExp`like object which only matches entire strings, and may
40 * have additional constraints specified.
41 *
42 * Matchers are callable objects and will return true if they are
43 * called with a string that they match, and false otherwise.
44 * Matchers will always return false if called with nonstrings,
45 * although other methods like `::exec` coerce their arguments and
46 * may still return true.
47 */
48 Matcher,
49 } = (() => {
50 const { toString: reToString } = rePrototype;
51 const getDotAll =
52 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
53 const getFlags =
54 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
55 const getGlobal =
56 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
57 const getHasIndices =
58 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
59 const getIgnoreCase =
60 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
61 const getMultiline =
62 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
63 const getSource =
64 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
65 const getSticky =
66 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
67 const getUnicode =
68 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
69
70 const Matcher = class extends identity {
71 #constraint;
72 #regExp;
73
74 /**
75 * Constructs a new `Matcher` from the provided source.
76 *
77 * If the provided source is a regular expression, then it must
78 * have the unicode flag set. Otherwise, it is interpreted as the
79 * string source of a regular expression with the unicode flag set.
80 *
81 * Other flags are taken from the provided regular expression
82 * object, if any are present.
83 *
84 * A name for the matcher may be provided as the second argument.
85 *
86 * A callable constraint on acceptable inputs may be provided as a
87 * third argument. If provided, it will be called with three
88 * arguments whenever a match appears successful: first, the string
89 * being matched, second, the match result, and third, the
90 * `Matcher` object itself. If the return value of this call is
91 * falsey, then the match will be considered a failure.
92 *
93 * ☡ If the provided source regular expression uses nongreedy
94 * quantifiers, it may not match the whole string even if a match
95 * with the whole string is possible. Surround the regular
96 * expression with `^(?:` and `)$` if you don’t want nongreedy
97 * regular expressions to fail when shorter matches are possible.
98 */
99 constructor(source, name = undefined, constraint = null) {
100 super(
101 ($) => {
102 if (typeof $ !== "string") {
103 // The provided value is not a string.
104 return false;
105 } else {
106 // The provided value is a string. Set the `.lastIndex` of
107 // the regular expression to 0 and see if the first attempt
108 // at a match matches the whole string and passes the
109 // provided constraint (if present).
110 regExp.lastIndex = 0;
111 const result = call(reExec, regExp, [$]);
112 return result?.[0] === $ &&
113 (constraint === null || constraint($, result, this));
114 }
115 },
116 );
117 const regExp = this.#regExp = (() => {
118 try {
119 call(reExec, source, [""]); // throws if source not a RegExp
120 } catch {
121 return new RE(`${source}`, "u");
122 }
123 const unicode = call(getUnicode, source, []);
124 if (!unicode) {
125 // The provided regular expression does not have a unicode
126 // flag.
127 throw new TypeError(
128 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
129 );
130 } else {
131 // The provided regular expression has a unicode flag.
132 return new RE(source);
133 }
134 })();
135 if (constraint !== null && typeof constraint !== "function") {
136 throw new TypeError(
137 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
138 );
139 } else {
140 this.#constraint = constraint;
141 return defineOwnProperties(
142 setPrototype(this, matcherPrototype),
143 {
144 lastIndex: {
145 configurable: false,
146 enumerable: false,
147 value: 0,
148 writable: false,
149 },
150 name: {
151 value: name != null
152 ? `${name}`
153 : `Matcher(${call(reToString, regExp, [])})`,
154 },
155 },
156 );
157 }
158 }
159
160 /** Gets whether the dot‐all flag is present on this `Matcher`. */
161 get dotAll() {
162 return call(getDotAll, this.#regExp, []);
163 }
164
165 /**
166 * Executes this `Matcher` on the provided value and returns the
167 * result if there is a match, or null otherwise.
168 *
169 * Matchers only match if they can match the entire value on the
170 * first attempt.
171 *
172 * ☡ The match result returned by this method will be the same as
173 * that passed to the constraint function—and may have been
174 * modified by said function prior to being returned.
175 */
176 exec($) {
177 const regExp = this.#regExp;
178 const constraint = this.#constraint;
179 const string = `${$}`;
180 regExp.lastIndex = 0;
181 const result = call(reExec, regExp, [string]);
182 if (
183 result?.[0] === string &&
184 (constraint === null || constraint(string, result, this))
185 ) {
186 // The entire string was matched and the constraint, if
187 // present, returned a truthy value.
188 return result;
189 } else {
190 // The entire string was not matched or the constraint returned
191 // a falsey value.
192 return null;
193 }
194 }
195
196 /**
197 * Gets the flags present on this `Matcher`.
198 *
199 * ※ This needs to be defined because the internal `RegExp` object
200 * may have flags which are not yet recognized by ♓🌟 Piscēs.
201 */
202 get flags() {
203 return call(getFlags, this.#regExp, []);
204 }
205
206 /** Gets whether the global flag is present on this `Matcher`. */
207 get global() {
208 return call(getGlobal, this.#regExp, []);
209 }
210
211 /**
212 * Gets whether the has‐indices flag is present on this `Matcher`.
213 */
214 get hasIndices() {
215 return call(getHasIndices, this.#regExp, []);
216 }
217
218 /**
219 * Gets whether the ignore‐case flag is present on this `Matcher`.
220 */
221 get ignoreCase() {
222 return call(getIgnoreCase, this.#regExp, []);
223 }
224
225 /**
226 * Gets whether the multiline flag is present on this `Matcher`.
227 */
228 get multiline() {
229 return call(getMultiline, this.#regExp, []);
230 }
231
232 /** Gets the regular expression source for this `Matcher`. */
233 get source() {
234 return call(getSource, this.#regExp, []);
235 }
236
237 /** Gets whether the sticky flag is present on this `Matcher`. */
238 get sticky() {
239 return call(getSticky, this.#regExp, []);
240 }
241
242 /**
243 * Gets whether the unicode flag is present on this `Matcher`.
244 *
245 * ※ This will always be true.
246 */
247 get unicode() {
248 return call(getUnicode, this.#regExp, []);
249 }
250 };
251
252 const matcherConstructor = defineOwnProperties(
253 class extends RegExp {
254 constructor(...args) {
255 return new Matcher(...args);
256 }
257 },
258 {
259 name: { value: "Matcher" },
260 length: { value: 1 },
261 },
262 );
263 const matcherPrototype = defineOwnProperties(
264 matcherConstructor.prototype,
265 getOwnPropertyDescriptors(Matcher.prototype),
266 { constructor: { value: matcherConstructor } },
267 );
268
269 return { Matcher: matcherConstructor };
270 })();
271
272 export const {
273 /**
274 * Returns the result of converting the provided value to A·S·C·I·I
275 * lowercase.
276 */
277 asciiLowercase,
278
279 /**
280 * Returns the result of converting the provided value to A·S·C·I·I
281 * uppercase.
282 */
283 asciiUppercase,
284 } = (() => {
285 const {
286 toLowerCase: stringToLowercase,
287 toUpperCase: stringToUppercase,
288 } = stringPrototype;
289 return {
290 asciiLowercase: ($) =>
291 stringReplaceAll(
292 `${$}`,
293 /[A-Z]/gu,
294 createCallableFunction(stringToLowercase),
295 ),
296 asciiUppercase: ($) =>
297 stringReplaceAll(
298 `${$}`,
299 /[a-z]/gu,
300 createCallableFunction(stringToUppercase),
301 ),
302 };
303 })();
304
305 export const {
306 /**
307 * Returns an iterator over the codepoints in the string representation
308 * of the provided value according to the algorithm of
309 * `String::[Symbol.iterator]`.
310 */
311 characters,
312
313 /**
314 * Returns an iterator over the code units in the string
315 * representation of the provided value.
316 */
317 codeUnits,
318
319 /**
320 * Returns an iterator over the codepoints in the string
321 * representation of the provided value.
322 */
323 codepoints,
324
325 /**
326 * Returns an iterator over the scalar values in the string
327 * representation of the provided value.
328 *
329 * Codepoints which are not valid Unicode scalar values are replaced
330 * with U+FFFD.
331 */
332 scalarValues,
333
334 /**
335 * Returns the result of converting the provided value to a string of
336 * scalar values by replacing (unpaired) surrogate values with
337 * U+FFFD.
338 */
339 scalarValueString,
340 } = (() => {
341 const generateCharacters = function* (character) {
342 yield character;
343 };
344 const generateCodeUnits = function* (ucsCharacter) {
345 yield getCodeUnit(ucsCharacter, 0);
346 };
347 const generateCodepoints = function* (character) {
348 const { allowSurrogates } = this;
349 const codepoint = getCodepoint(character, 0);
350 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
351 ? codepoint
352 : 0xFFFD;
353 };
354
355 const charactersIterator = stringIteratorFunction(
356 generateCharacters,
357 "String Character Iterator",
358 );
359 const codeUnitsIterator = arrayIteratorFunction(
360 generateCodeUnits,
361 "String Code Unit Iterator",
362 );
363 const codepointsIterator = stringIteratorFunction(
364 bind(generateCodepoints, { allowSurrogates: true }, []),
365 "String Codepoint Iterator",
366 );
367 const scalarValuesIterator = stringIteratorFunction(
368 bind(generateCodepoints, { allowSurrogates: false }, []),
369 "String Scalar Value Iterator",
370 );
371 const {
372 next: scalarValuesNext,
373 } = getPrototype(scalarValuesIterator(""));
374 const scalarValueIterablePrototype = {
375 [ITERATOR]() {
376 return {
377 next: bind(
378 scalarValuesNext,
379 scalarValuesIterator(this.source),
380 [],
381 ),
382 };
383 },
384 };
385
386 return {
387 characters: ($) => charactersIterator(`${$}`),
388 codeUnits: ($) => codeUnitsIterator(`${$}`),
389 codepoints: ($) => codepointsIterator(`${$}`),
390 scalarValues: ($) => scalarValuesIterator(`${$}`),
391 scalarValueString: ($) =>
392 stringFromCodepoints(...objectCreate(
393 scalarValueIterablePrototype,
394 { source: { value: `${$}` } },
395 )),
396 };
397 })();
398
399 /**
400 * Returns the character at the provided position in the string
401 * representation of the provided value according to the algorithm of
402 * `String::codePointAt`.
403 */
404 export const getCharacter = ($, pos) => {
405 const codepoint = getCodepoint($, pos);
406 return codepoint == null
407 ? undefined
408 : stringFromCodepoints(codepoint);
409 };
410
411 /**
412 * Returns the code unit at the provided position in the string
413 * representation of the provided value according to the algorithm of
414 * `String::charAt`, except that out‐of‐bounds values return undefined
415 * in place of nan.
416 */
417 export const {
418 getCodeUnit,
419
420 /**
421 * Returns the result of catenating the string representations of the
422 * provided values, returning a new string according to the algorithm
423 * of `String::concat`.
424 *
425 * ※ If no arguments are given, this function returns the empty
426 * string. This is different behaviour than if an explicit undefined
427 * first argument is given, in which case the resulting string will
428 * begin with `"undefined"`.
429 */
430 stringCatenate,
431 } = (() => {
432 const { charCodeAt, concat } = String.prototype;
433 const { isNaN: isNan } = Number;
434
435 return {
436 getCodeUnit: ($, n) => {
437 const codeUnit = call(charCodeAt, $, [n]);
438 return isNan(codeUnit) ? undefined : codeUnit;
439 },
440 stringCatenate: defineOwnProperties(
441 (...args) => call(concat, "", args),
442 { name: { value: "stringCatenate" }, length: { value: 2 } },
443 ),
444 };
445 })();
446
447 /**
448 * Returns the codepoint at the provided position in the string
449 * representation of the provided value according to the algorithm of
450 * `String::codePointAt`.
451 */
452 export const getCodepoint = createCallableFunction(
453 stringPrototype.codePointAt,
454 { name: "getCodepoint" },
455 );
456
457 /**
458 * Returns the index of the first occurrence of the search string in
459 * the string representation of the provided value according to the
460 * algorithm of `String::indexOf`.
461 */
462 export const getFirstSubstringIndex = createCallableFunction(
463 stringPrototype.indexOf,
464 { name: "getFirstSubstringIndex" },
465 );
466
467 /**
468 * Returns the index of the last occurrence of the search string in the
469 * string representation of the provided value according to the
470 * algorithm of `String::lastIndexOf`.
471 */
472 export const getLastSubstringIndex = createCallableFunction(
473 stringPrototype.lastIndexOf,
474 { name: "getLastSubstringIndex" },
475 );
476
477 /**
478 * Returns the result of joining the provided iterable.
479 *
480 * If no separator is provided, it defaults to ",".
481 *
482 * If a value is nullish, it will be stringified as the empty string.
483 */
484 export const join = (() => {
485 const { join: arrayJoin } = arrayPrototype;
486 const join = ($, separator) =>
487 call(
488 arrayJoin,
489 [...$],
490 [separator === undefined ? "," : `${separator}`],
491 );
492 return join;
493 })();
494
495 /**
496 * Returns a string created from the raw value of the tagged template
497 * literal.
498 *
499 * ※ This is effectively an alias for `String.raw`.
500 */
501 export const rawString = createArrowFunction(String.raw, {
502 name: "rawString",
503 });
504
505 export const {
506 /**
507 * Returns a string created from the provided code units.
508 *
509 * ※ This is effectively an alias for `String.fromCharCode`, but
510 * with the same error behaviour as `String.fromCodePoint`.
511 *
512 * ☡ This function throws an error if provided with an argument which
513 * is not an integral number from 0 to FFFF₁₆ inclusive.
514 */
515 stringFromCodeUnits,
516 } = (() => {
517 const { fromCharCode } = String;
518 const { isInteger: isIntegralNumber } = Number;
519
520 return {
521 stringFromCodeUnits: defineOwnProperties(
522 (...codeUnits) => {
523 for (let index = 0; index < codeUnits.length; ++index) {
524 // Iterate over each provided code unit and throw if it is
525 // out of range.
526 const nextCU = +codeUnits[index];
527 if (
528 !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF
529 ) {
530 // The code unit is not an integral number between 0 and
531 // 0xFFFF.
532 throw new RangeError(
533 `Piscēs: Code unit out of range: ${nextCU}.`,
534 );
535 } else {
536 // The code unit is acceptable.
537 /* do nothing */
538 }
539 }
540 return call(fromCharCode, undefined, codeUnits);
541 },
542 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
543 ),
544 };
545 })();
546
547 /**
548 * Returns a string created from the provided codepoints.
549 *
550 * ※ This is effectively an alias for `String.fromCodePoint`.
551 *
552 * ☡ This function throws an error if provided with an argument which
553 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
554 */
555 export const stringFromCodepoints = createArrowFunction(
556 String.fromCodePoint,
557 { name: "stringFromCodepoints" },
558 );
559
560 /**
561 * Returns the result of splitting the provided value on A·S·C·I·I
562 * whitespace.
563 */
564 export const splitOnASCIIWhitespace = ($) =>
565 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
566
567 /**
568 * Returns the result of splitting the provided value on commas,
569 * trimming A·S·C·I·I whitespace from the resulting tokens.
570 */
571 export const splitOnCommas = ($) =>
572 stringSplit(
573 stripLeadingAndTrailingASCIIWhitespace(
574 stringReplaceAll(
575 `${$}`,
576 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
577 ",",
578 ),
579 ),
580 ",",
581 );
582
583 /**
584 * Returns whether the string representation of the provided value ends
585 * with the provided search string according to the algorithm of
586 * `String::endsWith`.
587 */
588 export const stringEndsWith = createCallableFunction(
589 stringPrototype.endsWith,
590 { name: "stringEndsWith" },
591 );
592
593 /**
594 * Returns whether the string representation of the provided value
595 * contains the provided search string according to the algorithm of
596 * `String::includes`.
597 */
598 export const stringIncludes = createCallableFunction(
599 stringPrototype.includes,
600 { name: "stringIncludes" },
601 );
602
603 /**
604 * Returns the result of matching the string representation of the
605 * provided value with the provided matcher according to the algorithm
606 * of `String::match`.
607 */
608 export const stringMatch = createCallableFunction(
609 stringPrototype.match,
610 { name: "stringMatch" },
611 );
612
613 /**
614 * Returns the result of matching the string representation of the
615 * provided value with the provided matcher according to the algorithm
616 * of `String::matchAll`.
617 */
618 export const stringMatchAll = createCallableFunction(
619 stringPrototype.matchAll,
620 { name: "stringMatchAll" },
621 );
622
623 /**
624 * Returns the normalized form of the string representation of the
625 * provided value according to the algorithm of `String::normalize`.
626 */
627 export const stringNormalize = createCallableFunction(
628 stringPrototype.normalize,
629 { name: "stringNormalize" },
630 );
631
632 /**
633 * Returns the result of padding the end of the string representation
634 * of the provided value padded until it is the desired length
635 * according to the algorithm of `String::padEnd`.
636 */
637 export const stringPadEnd = createCallableFunction(
638 stringPrototype.padEnd,
639 { name: "stringPadEnd" },
640 );
641
642 /**
643 * Returns the result of padding the start of the string representation
644 * of the provided value padded until it is the desired length
645 * according to the algorithm of `String::padStart`.
646 */
647 export const stringPadStart = createCallableFunction(
648 stringPrototype.padStart,
649 { name: "stringPadStart" },
650 );
651
652 /**
653 * Returns the result of repeating the string representation of the
654 * provided value the provided number of times according to the
655 * algorithm of `String::repeat`.
656 */
657 export const stringRepeat = createCallableFunction(
658 stringPrototype.repeat,
659 { name: "stringRepeat" },
660 );
661
662 /**
663 * Returns the result of replacing the string representation of the
664 * provided value with the provided replacement, using the provided
665 * matcher and according to the algorithm of `String::replace`.
666 */
667 export const stringReplace = createCallableFunction(
668 stringPrototype.replace,
669 { name: "stringReplace" },
670 );
671
672 /**
673 * Returns the result of replacing the string representation of the
674 * provided value with the provided replacement, using the provided
675 * matcher and according to the algorithm of `String::replaceAll`.
676 */
677 export const stringReplaceAll = createCallableFunction(
678 stringPrototype.replaceAll,
679 { name: "stringReplaceAll" },
680 );
681
682 /**
683 * Returns the result of searching the string representation of the
684 * provided value using the provided matcher and according to the
685 * algorithm of `String::search`.
686 */
687 export const stringSearch = createCallableFunction(
688 stringPrototype.search,
689 { name: "stringSearch" },
690 );
691
692 /**
693 * Returns a slice of the string representation of the provided value
694 * according to the algorithm of `String::slice`.
695 */
696 export const stringSlice = createCallableFunction(
697 stringPrototype.slice,
698 { name: "stringSlice" },
699 );
700
701 /**
702 * Returns the result of splitting of the string representation of the
703 * provided value on the provided separator according to the algorithm
704 * of `String::split`.
705 */
706 export const stringSplit = createCallableFunction(
707 stringPrototype.split,
708 { name: "stringSplit" },
709 );
710
711 /**
712 * Returns whether the string representation of the provided value
713 * starts with the provided search string according to the algorithm of
714 * `String::startsWith`.
715 */
716 export const stringStartsWith = createCallableFunction(
717 stringPrototype.startsWith,
718 { name: "stringStartsWith" },
719 );
720
721 /**
722 * Returns the value of the provided string.
723 *
724 * ※ This is effectively an alias for the `String::valueOf`.
725 *
726 * ☡ This function throws if the provided argument is not a string and
727 * does not have a `[[StringData]]` slot.
728 */
729 export const stringValue = createCallableFunction(
730 stringPrototype.valueOf,
731 { name: "stringValue" },
732 );
733
734 /**
735 * Returns the result of stripping leading and trailing A·S·C·I·I
736 * whitespace from the provided value and collapsing other A·S·C·I·I
737 * whitespace in the string representation of the provided value.
738 */
739 export const stripAndCollapseASCIIWhitespace = ($) =>
740 stripLeadingAndTrailingASCIIWhitespace(
741 stringReplaceAll(
742 `${$}`,
743 /[\n\r\t\f ]+/gu,
744 " ",
745 ),
746 );
747
748 /**
749 * Returns the result of stripping leading and trailing A·S·C·I·I
750 * whitespace from the string representation of the provided value.
751 */
752 export const stripLeadingAndTrailingASCIIWhitespace = ($) =>
753 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
754
755 /**
756 * Returns a substring of the string representation of the provided
757 * value according to the algorithm of `String::substring`.
758 */
759 export const substring = createCallableFunction(
760 stringPrototype.substring,
761 );
762
763 /**
764 * Returns the result of converting the provided value to a string.
765 *
766 * ☡ This method throws for symbols and other objects without a string
767 * representation.
768 */
769 export const toString = ($) => `${$}`;
This page took 0.109133 seconds and 3 git commands to generate.