]> Lady’s Gitweb - Pisces/blob - string.js
0bd5f9e03bc4dc0c9b5622a969757dfd2f78890b
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import {
11 bind,
12 call,
13 createArrowFunction,
14 createCallableFunction,
15 identity,
16 } from "./function.js";
17 import {
18 arrayIteratorFunction,
19 stringIteratorFunction,
20 } from "./iterable.js";
21 import {
22 defineOwnProperties,
23 getOwnPropertyDescriptors,
24 setPrototype,
25 } from "./object.js";
26 import { sameValue, toLength } from "./value.js";
27
28 const RE = RegExp;
29 const { prototype: rePrototype } = RE;
30 const { prototype: arrayPrototype } = Array;
31 const { prototype: stringPrototype } = String;
32
33 const { exec: reExec } = rePrototype;
34
35 export const {
36 /**
37 * A `RegExp`like object which only matches entire strings, and may
38 * have additional constraints specified.
39 *
40 * Matchers are callable objects and will return true if they are
41 * called with a string that they match, and false otherwise.
42 * Matchers will always return false if called with nonstrings,
43 * although other methods like `::exec` coerce their arguments and
44 * may still return true.
45 */
46 Matcher,
47 } = (() => {
48 const { toString: reToString } = rePrototype;
49 const getDotAll =
50 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
51 const getFlags =
52 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
53 const getGlobal =
54 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
55 const getHasIndices =
56 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
57 const getIgnoreCase =
58 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
59 const getMultiline =
60 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
61 const getSource =
62 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
63 const getSticky =
64 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
65 const getUnicode =
66 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
67
68 const Matcher = class extends identity {
69 #constraint;
70 #regExp;
71
72 /**
73 * Constructs a new `Matcher` from the provided source.
74 *
75 * If the provided source is a regular expression, then it must
76 * have the unicode flag set. Otherwise, it is interpreted as the
77 * string source of a regular expression with the unicode flag set.
78 *
79 * Other flags are taken from the provided regular expression
80 * object, if any are present.
81 *
82 * A name for the matcher may be provided as the second argument.
83 *
84 * A callable constraint on acceptable inputs may be provided as a
85 * third argument. If provided, it will be called with three
86 * arguments whenever a match appears successful: first, the string
87 * being matched, second, the match result, and third, the
88 * `Matcher` object itself. If the return value of this call is
89 * falsey, then the match will be considered a failure.
90 *
91 * ☡ If the provided source regular expression uses nongreedy
92 * quantifiers, it may not match the whole string even if a match
93 * with the whole string is possible. Surround the regular
94 * expression with `^(?:` and `)$` if you don’t want nongreedy
95 * regular expressions to fail when shorter matches are possible.
96 */
97 constructor(source, name = undefined, constraint = null) {
98 super(
99 ($) => {
100 if (typeof $ !== "string") {
101 // The provided value is not a string.
102 return false;
103 } else {
104 // The provided value is a string. Set the `.lastIndex` of
105 // the regular expression to 0 and see if the first attempt
106 // at a match matches the whole string and passes the
107 // provided constraint (if present).
108 regExp.lastIndex = 0;
109 const result = call(reExec, regExp, [$]);
110 return result?.[0] === $ &&
111 (constraint === null || constraint($, result, this));
112 }
113 },
114 );
115 const regExp = this.#regExp = (() => {
116 try {
117 call(reExec, source, [""]); // throws if source not a RegExp
118 } catch {
119 return new RE(`${source}`, "u");
120 }
121 const unicode = call(getUnicode, source, []);
122 if (!unicode) {
123 // The provided regular expression does not have a unicode
124 // flag.
125 throw new TypeError(
126 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
127 );
128 } else {
129 // The provided regular expression has a unicode flag.
130 return new RE(source);
131 }
132 })();
133 if (constraint !== null && typeof constraint !== "function") {
134 throw new TypeError(
135 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
136 );
137 } else {
138 this.#constraint = constraint;
139 return defineOwnProperties(
140 setPrototype(this, matcherPrototype),
141 {
142 lastIndex: {
143 configurable: false,
144 enumerable: false,
145 value: 0,
146 writable: false,
147 },
148 name: {
149 value: name != null
150 ? `${name}`
151 : `Matcher(${call(reToString, regExp, [])})`,
152 },
153 },
154 );
155 }
156 }
157
158 /** Gets whether the dot‐all flag is present on this `Matcher`. */
159 get dotAll() {
160 return call(getDotAll, this.#regExp, []);
161 }
162
163 /**
164 * Executes this `Matcher` on the provided value and returns the
165 * result if there is a match, or null otherwise.
166 *
167 * Matchers only match if they can match the entire value on the
168 * first attempt.
169 *
170 * ☡ The match result returned by this method will be the same as
171 * that passed to the constraint function—and may have been
172 * modified by said function prior to being returned.
173 */
174 exec($) {
175 const regExp = this.#regExp;
176 const constraint = this.#constraint;
177 const string = `${$}`;
178 regExp.lastIndex = 0;
179 const result = call(reExec, regExp, [string]);
180 if (
181 result?.[0] === string &&
182 (constraint === null || constraint(string, result, this))
183 ) {
184 // The entire string was matched and the constraint, if
185 // present, returned a truthy value.
186 return result;
187 } else {
188 // The entire string was not matched or the constraint returned
189 // a falsey value.
190 return null;
191 }
192 }
193
194 /**
195 * Gets the flags present on this `Matcher`.
196 *
197 * ※ This needs to be defined because the internal `RegExp` object
198 * may have flags which are not yet recognized by ♓🌟 Piscēs.
199 */
200 get flags() {
201 return call(getFlags, this.#regExp, []);
202 }
203
204 /** Gets whether the global flag is present on this `Matcher`. */
205 get global() {
206 return call(getGlobal, this.#regExp, []);
207 }
208
209 /**
210 * Gets whether the has‐indices flag is present on this `Matcher`.
211 */
212 get hasIndices() {
213 return call(getHasIndices, this.#regExp, []);
214 }
215
216 /**
217 * Gets whether the ignore‐case flag is present on this `Matcher`.
218 */
219 get ignoreCase() {
220 return call(getIgnoreCase, this.#regExp, []);
221 }
222
223 /**
224 * Gets whether the multiline flag is present on this `Matcher`.
225 */
226 get multiline() {
227 return call(getMultiline, this.#regExp, []);
228 }
229
230 /** Gets the regular expression source for this `Matcher`. */
231 get source() {
232 return call(getSource, this.#regExp, []);
233 }
234
235 /** Gets whether the sticky flag is present on this `Matcher`. */
236 get sticky() {
237 return call(getSticky, this.#regExp, []);
238 }
239
240 /**
241 * Gets whether the unicode flag is present on this `Matcher`.
242 *
243 * ※ This will always be true.
244 */
245 get unicode() {
246 return call(getUnicode, this.#regExp, []);
247 }
248 };
249
250 const matcherConstructor = defineOwnProperties(
251 class extends RegExp {
252 constructor(...args) {
253 return new Matcher(...args);
254 }
255 },
256 {
257 name: { value: "Matcher" },
258 length: { value: 1 },
259 },
260 );
261 const matcherPrototype = defineOwnProperties(
262 matcherConstructor.prototype,
263 getOwnPropertyDescriptors(Matcher.prototype),
264 { constructor: { value: matcherConstructor } },
265 );
266
267 return { Matcher: matcherConstructor };
268 })();
269
270 export const {
271 /**
272 * Returns the result of converting the provided value to A·S·C·I·I
273 * lowercase.
274 */
275 asciiLowercase,
276
277 /**
278 * Returns the result of converting the provided value to A·S·C·I·I
279 * uppercase.
280 */
281 asciiUppercase,
282 } = (() => {
283 const {
284 toLowerCase: stringToLowercase,
285 toUpperCase: stringToUppercase,
286 } = stringPrototype;
287 return {
288 asciiLowercase: ($) =>
289 stringReplaceAll(
290 `${$}`,
291 /[A-Z]/gu,
292 createCallableFunction(stringToLowercase),
293 ),
294 asciiUppercase: ($) =>
295 stringReplaceAll(
296 `${$}`,
297 /[a-z]/gu,
298 createCallableFunction(stringToUppercase),
299 ),
300 };
301 })();
302
303 /**
304 * Returns −0 if the provided argument is "-0"; returns a number
305 * representing the index if the provided argument is a canonical
306 * numeric index string; otherwise, returns undefined.
307 *
308 * There is no clamping of the numeric index, but note that numbers
309 * above 2^53 − 1 are not safe nor valid integer indices.
310 */
311 export const canonicalNumericIndexString = ($) => {
312 if (typeof $ !== "string") {
313 return undefined;
314 } else if ($ === "-0") {
315 return -0;
316 } else {
317 const n = +$;
318 return $ === `${n}` ? n : undefined;
319 }
320 };
321
322 export const {
323 /**
324 * Returns an iterator over the codepoints in the string representation
325 * of the provided value according to the algorithm of
326 * `String::[Symbol.iterator]`.
327 */
328 characters,
329
330 /**
331 * Returns an iterator over the code units in the string
332 * representation of the provided value.
333 */
334 codeUnits,
335
336 /**
337 * Returns an iterator over the codepoints in the string
338 * representation of the provided value.
339 */
340 codepoints,
341
342 /**
343 * Returns an iterator over the scalar values in the string
344 * representation of the provided value.
345 *
346 * Codepoints which are not valid Unicode scalar values are replaced
347 * with U+FFFD.
348 */
349 scalarValues,
350 } = (() => {
351 const generateCharacters = function* (character) {
352 yield character;
353 };
354 const generateCodeUnits = function* (ucsCharacter) {
355 yield getCodeUnit(ucsCharacter, 0);
356 };
357 const generateCodepoints = function* (character) {
358 const { allowSurrogates } = this;
359 const codepoint = getCodepoint(character, 0);
360 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
361 ? codepoint
362 : 0xFFFD;
363 };
364
365 const charactersIterator = stringIteratorFunction(
366 generateCharacters,
367 "String Character Iterator",
368 );
369 const codeUnitsIterator = arrayIteratorFunction(
370 generateCodeUnits,
371 "String Code Unit Iterator",
372 );
373 const codepointsIterator = stringIteratorFunction(
374 bind(generateCodepoints, { allowSurrogates: true }, []),
375 "String Codepoint Iterator",
376 );
377 const scalarValuesIterator = stringIteratorFunction(
378 bind(generateCodepoints, { allowSurrogates: false }, []),
379 "String Scalar Value Iterator",
380 );
381
382 return {
383 characters: ($) => charactersIterator(`${$}`),
384 codeUnits: ($) => codeUnitsIterator(`${$}`),
385 codepoints: ($) => codepointsIterator(`${$}`),
386 scalarValues: ($) => scalarValuesIterator(`${$}`),
387 };
388 })();
389
390 /**
391 * Returns the character at the provided position in the string
392 * representation of the provided value according to the algorithm of
393 * `String::codePointAt`.
394 */
395 export const getCharacter = ($, pos) => {
396 const codepoint = getCodepoint($, pos);
397 return codepoint == null
398 ? undefined
399 : stringFromCodepoints(codepoint);
400 };
401
402 export const {
403 /**
404 * Returns the code unit at the provided position in the string
405 * representation of the provided value according to the algorithm of
406 * `String::charAt`, except that out‐of‐bounds values return undefined
407 * in place of nan.
408 */
409 getCodeUnit,
410
411 /** Returns whether the provided value is an integer index string. */
412 isIntegerIndexString,
413
414 /**
415 * Returns a string created from the provided code units.
416 *
417 * ※ This is effectively an alias for `String.fromCharCode`, but
418 * with the same error behaviour as `String.fromCodePoint`.
419 *
420 * ☡ This function throws an error if provided with an argument which
421 * is not an integral number from 0 to FFFF₁₆ inclusive.
422 */
423 stringFromCodeUnits,
424
425 /**
426 * Returns the result of catenating the string representations of the
427 * provided values, returning a new string according to the algorithm
428 * of `String::concat`.
429 *
430 * ※ If no arguments are given, this function returns the empty
431 * string. This is different behaviour than if an explicit undefined
432 * first argument is given, in which case the resulting string will
433 * begin with `"undefined"`.
434 */
435 stringCatenate,
436 } = (() => {
437 const { fromCharCode } = String;
438 const { charCodeAt, concat } = String.prototype;
439 const {
440 MAX_SAFE_INTEGER: MAXIMUM_SAFE_INTEGRAL_NUMBER,
441 isInteger: isIntegralNumber,
442 isNaN: isNan,
443 } = Number;
444
445 return {
446 getCodeUnit: ($, n) => {
447 const codeUnit = call(charCodeAt, $, [n]);
448 return isNan(codeUnit) ? undefined : codeUnit;
449 },
450 isIntegerIndexString: ($) => {
451 const value = canonicalNumericIndexString($);
452 if (value !== undefined && isIntegralNumber(value)) {
453 // The provided value is an integral canonical numeric index
454 // string.
455 return sameValue(value, 0) ||
456 value > 0 && value <= MAXIMUM_SAFE_INTEGRAL_NUMBER &&
457 value === toLength(value);
458 } else {
459 // The provided value is not an integral canonical numeric
460 // index string.
461 return false;
462 }
463 },
464 stringCatenate: defineOwnProperties(
465 (...args) => call(concat, "", args),
466 { name: { value: "stringCatenate" }, length: { value: 2 } },
467 ),
468 stringFromCodeUnits: defineOwnProperties(
469 (...codeUnits) => {
470 for (let index = 0; index < codeUnits.length; ++index) {
471 // Iterate over each provided code unit and throw if it is
472 // out of range.
473 const nextCU = +codeUnits[index];
474 if (
475 !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF
476 ) {
477 // The code unit is not an integral number between 0 and
478 // 0xFFFF.
479 throw new RangeError(
480 `Piscēs: Code unit out of range: ${nextCU}.`,
481 );
482 } else {
483 // The code unit is acceptable.
484 /* do nothing */
485 }
486 }
487 return call(fromCharCode, undefined, codeUnits);
488 },
489 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
490 ),
491 };
492 })();
493
494 /**
495 * Returns the codepoint at the provided position in the string
496 * representation of the provided value according to the algorithm of
497 * `String::codePointAt`.
498 */
499 export const getCodepoint = createCallableFunction(
500 stringPrototype.codePointAt,
501 { name: "getCodepoint" },
502 );
503
504 /**
505 * Returns the index of the first occurrence of the search string in
506 * the string representation of the provided value according to the
507 * algorithm of `String::indexOf`.
508 */
509 export const getFirstSubstringIndex = createCallableFunction(
510 stringPrototype.indexOf,
511 { name: "getFirstSubstringIndex" },
512 );
513
514 /**
515 * Returns the index of the last occurrence of the search string in the
516 * string representation of the provided value according to the
517 * algorithm of `String::lastIndexOf`.
518 */
519 export const getLastSubstringIndex = createCallableFunction(
520 stringPrototype.lastIndexOf,
521 { name: "getLastSubstringIndex" },
522 );
523
524 /** Returns whether the provided value is an array index string. */
525 export const isArrayIndexString = ($) => {
526 const value = canonicalNumericIndexString($);
527 if (value !== undefined) {
528 // The provided value is a canonical numeric index string.
529 return sameValue(value, 0) || value > 0 && value < -1 >>> 0 &&
530 value === toLength(value);
531 } else {
532 // The provided value is not a canonical numeric index string.
533 return false;
534 }
535 };
536
537 /**
538 * Returns the result of joining the provided iterable.
539 *
540 * If no separator is provided, it defaults to ",".
541 *
542 * If a value is nullish, it will be stringified as the empty string.
543 */
544 export const join = (() => {
545 const { join: arrayJoin } = arrayPrototype;
546 const join = ($, separator) =>
547 call(
548 arrayJoin,
549 [...$],
550 [separator === undefined ? "," : `${separator}`],
551 );
552 return join;
553 })();
554
555 /**
556 * Returns a string created from the raw value of the tagged template
557 * literal.
558 *
559 * ※ This is effectively an alias for `String.raw`.
560 */
561 export const rawString = createArrowFunction(String.raw, {
562 name: "rawString",
563 });
564
565 /**
566 * Returns a string created from the provided codepoints.
567 *
568 * ※ This is effectively an alias for `String.fromCodePoint`.
569 *
570 * ☡ This function throws an error if provided with an argument which
571 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
572 */
573 export const stringFromCodepoints = createArrowFunction(
574 String.fromCodePoint,
575 { name: "stringFromCodepoints" },
576 );
577
578 /**
579 * Returns the result of splitting the provided value on Ascii
580 * whitespace.
581 */
582 export const splitOnAsciiWhitespace = ($) =>
583 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
584
585 /**
586 * Returns the result of splitting the provided value on commas,
587 * trimming Ascii whitespace from the resulting tokens.
588 */
589 export const splitOnCommas = ($) =>
590 stringSplit(
591 stripLeadingAndTrailingAsciiWhitespace(
592 stringReplaceAll(
593 `${$}`,
594 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
595 ",",
596 ),
597 ),
598 ",",
599 );
600
601 /**
602 * Returns whether the string representation of the provided value ends
603 * with the provided search string according to the algorithm of
604 * `String::endsWith`.
605 */
606 export const stringEndsWith = createCallableFunction(
607 stringPrototype.endsWith,
608 { name: "stringEndsWith" },
609 );
610
611 /**
612 * Returns whether the string representation of the provided value
613 * contains the provided search string according to the algorithm of
614 * `String::includes`.
615 */
616 export const stringIncludes = createCallableFunction(
617 stringPrototype.includes,
618 { name: "stringIncludes" },
619 );
620
621 /**
622 * Returns the result of matching the string representation of the
623 * provided value with the provided matcher according to the algorithm
624 * of `String::match`.
625 */
626 export const stringMatch = createCallableFunction(
627 stringPrototype.match,
628 { name: "stringMatch" },
629 );
630
631 /**
632 * Returns the result of matching the string representation of the
633 * provided value with the provided matcher according to the algorithm
634 * of `String::matchAll`.
635 */
636 export const stringMatchAll = createCallableFunction(
637 stringPrototype.matchAll,
638 { name: "stringMatchAll" },
639 );
640
641 /**
642 * Returns the normalized form of the string representation of the
643 * provided value according to the algorithm of `String::normalize`.
644 */
645 export const stringNormalize = createCallableFunction(
646 stringPrototype.normalize,
647 { name: "stringNormalize" },
648 );
649
650 /**
651 * Returns the result of padding the end of the string representation
652 * of the provided value padded until it is the desired length
653 * according to the algorithm of `String::padEnd`.
654 */
655 export const stringPadEnd = createCallableFunction(
656 stringPrototype.padEnd,
657 { name: "stringPadEnd" },
658 );
659
660 /**
661 * Returns the result of padding the start of the string representation
662 * of the provided value padded until it is the desired length
663 * according to the algorithm of `String::padStart`.
664 */
665 export const stringPadStart = createCallableFunction(
666 stringPrototype.padStart,
667 { name: "stringPadStart" },
668 );
669
670 /**
671 * Returns the result of repeating the string representation of the
672 * provided value the provided number of times according to the
673 * algorithm of `String::repeat`.
674 */
675 export const stringRepeat = createCallableFunction(
676 stringPrototype.repeat,
677 { name: "stringRepeat" },
678 );
679
680 /**
681 * Returns the result of replacing the string representation of the
682 * provided value with the provided replacement, using the provided
683 * matcher and according to the algorithm of `String::replace`.
684 */
685 export const stringReplace = createCallableFunction(
686 stringPrototype.replace,
687 { name: "stringReplace" },
688 );
689
690 /**
691 * Returns the result of replacing the string representation of the
692 * provided value with the provided replacement, using the provided
693 * matcher and according to the algorithm of `String::replaceAll`.
694 */
695 export const stringReplaceAll = createCallableFunction(
696 stringPrototype.replaceAll,
697 { name: "stringReplaceAll" },
698 );
699
700 /**
701 * Returns the result of searching the string representation of the
702 * provided value using the provided matcher and according to the
703 * algorithm of `String::search`.
704 */
705 export const stringSearch = createCallableFunction(
706 stringPrototype.search,
707 { name: "stringSearch" },
708 );
709
710 /**
711 * Returns a slice of the string representation of the provided value
712 * according to the algorithm of `String::slice`.
713 */
714 export const stringSlice = createCallableFunction(
715 stringPrototype.slice,
716 { name: "stringSlice" },
717 );
718
719 /**
720 * Returns the result of splitting of the string representation of the
721 * provided value on the provided separator according to the algorithm
722 * of `String::split`.
723 */
724 export const stringSplit = createCallableFunction(
725 stringPrototype.split,
726 { name: "stringSplit" },
727 );
728
729 /**
730 * Returns whether the string representation of the provided value
731 * starts with the provided search string according to the algorithm of
732 * `String::startsWith`.
733 */
734 export const stringStartsWith = createCallableFunction(
735 stringPrototype.startsWith,
736 { name: "stringStartsWith" },
737 );
738
739 /**
740 * Returns the value of the provided string.
741 *
742 * ※ This is effectively an alias for the `String::valueOf`.
743 *
744 * ☡ This function throws if the provided argument is not a string and
745 * does not have a `[[StringData]]` slot.
746 */
747 export const stringValue = createCallableFunction(
748 stringPrototype.valueOf,
749 { name: "stringValue" },
750 );
751
752 /**
753 * Returns the result of stripping leading and trailing Ascii
754 * whitespace from the provided value and collapsing other Ascii
755 * whitespace in the string representation of the provided value.
756 */
757 export const stripAndCollapseAsciiWhitespace = ($) =>
758 stripLeadingAndTrailingAsciiWhitespace(
759 stringReplaceAll(
760 `${$}`,
761 /[\n\r\t\f ]+/gu,
762 " ",
763 ),
764 );
765
766 /**
767 * Returns the result of stripping leading and trailing Ascii
768 * whitespace from the string representation of the provided value.
769 */
770 export const stripLeadingAndTrailingAsciiWhitespace = ($) =>
771 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
772
773 /**
774 * Returns a substring of the string representation of the provided
775 * value according to the algorithm of `String::substring`.
776 */
777 export const substring = createCallableFunction(
778 stringPrototype.substring,
779 );
780
781 /**
782 * Returns the result of converting the provided value to a string of
783 * scalar values by replacing (unpaired) surrogate values with
784 * U+FFFD.
785 */
786 export const toScalarValueString = createCallableFunction(
787 String.prototype.toWellFormed,
788 { name: "toScalarValueString" },
789 );
790
791 /**
792 * Returns the result of converting the provided value to a string.
793 *
794 * ☡ This method throws for symbols and other objects without a string
795 * representation.
796 */
797 export const toString = ($) => `${$}`;
This page took 0.11459 seconds and 3 git commands to generate.