]> Lady’s Gitweb - Pisces/blob - string.js
bffa6d2ac9198201af7e1b6760a9fe15a3962abe
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import {
11 bind,
12 call,
13 createArrowFunction,
14 createCallableFunction,
15 identity,
16 } from "./function.js";
17 import {
18 arrayIteratorFunction,
19 stringIteratorFunction,
20 } from "./iterable.js";
21 import {
22 defineOwnProperties,
23 getOwnPropertyDescriptors,
24 setPrototype,
25 } from "./object.js";
26 import { sameValue, toLength } from "./value.js";
27
28 const RE = RegExp;
29 const { prototype: rePrototype } = RE;
30 const { prototype: arrayPrototype } = Array;
31 const { prototype: stringPrototype } = String;
32
33 const { exec: reExec } = rePrototype;
34
35 export const {
36 /**
37 * A `RegExp`like object which only matches entire strings, and may
38 * have additional constraints specified.
39 *
40 * Matchers are callable objects and will return true if they are
41 * called with a string that they match, and false otherwise.
42 * Matchers will always return false if called with nonstrings,
43 * although other methods like `::exec` coerce their arguments and
44 * may still return true.
45 */
46 Matcher,
47 } = (() => {
48 const { toString: reToString } = rePrototype;
49 const getDotAll =
50 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
51 const getFlags =
52 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
53 const getGlobal =
54 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
55 const getHasIndices =
56 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
57 const getIgnoreCase =
58 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
59 const getMultiline =
60 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
61 const getSource =
62 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
63 const getSticky =
64 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
65 const getUnicode =
66 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
67
68 const Matcher = class extends identity {
69 #constraint;
70 #regExp;
71
72 /**
73 * Constructs a new `Matcher` from the provided source.
74 *
75 * If the provided source is a regular expression, then it must
76 * have the unicode flag set. Otherwise, it is interpreted as the
77 * string source of a regular expression with the unicode flag set.
78 *
79 * Other flags are taken from the provided regular expression
80 * object, if any are present.
81 *
82 * A name for the matcher may be provided as the second argument.
83 *
84 * A callable constraint on acceptable inputs may be provided as a
85 * third argument. If provided, it will be called with three
86 * arguments whenever a match appears successful: first, the string
87 * being matched, second, the match result, and third, the
88 * `Matcher` object itself. If the return value of this call is
89 * falsey, then the match will be considered a failure.
90 *
91 * ☡ If the provided source regular expression uses nongreedy
92 * quantifiers, it may not match the whole string even if a match
93 * with the whole string is possible. Surround the regular
94 * expression with `^(?:` and `)$` if you don’t want nongreedy
95 * regular expressions to fail when shorter matches are possible.
96 */
97 constructor(source, name = undefined, constraint = null) {
98 super(
99 ($) => {
100 if (typeof $ !== "string") {
101 // The provided value is not a string.
102 return false;
103 } else {
104 // The provided value is a string. Set the `.lastIndex` of
105 // the regular expression to 0 and see if the first attempt
106 // at a match matches the whole string and passes the
107 // provided constraint (if present).
108 regExp.lastIndex = 0;
109 const result = call(reExec, regExp, [$]);
110 return result?.[0] === $ &&
111 (constraint === null || constraint($, result, this));
112 }
113 },
114 );
115 const regExp = this.#regExp = (() => {
116 try {
117 call(reExec, source, [""]); // throws if source not a RegExp
118 } catch {
119 return new RE(`${source}`, "u");
120 }
121 const unicode = call(getUnicode, source, []);
122 if (!unicode) {
123 // The provided regular expression does not have a unicode
124 // flag.
125 throw new TypeError(
126 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
127 );
128 } else {
129 // The provided regular expression has a unicode flag.
130 return new RE(source);
131 }
132 })();
133 if (constraint !== null && typeof constraint !== "function") {
134 throw new TypeError(
135 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
136 );
137 } else {
138 this.#constraint = constraint;
139 return defineOwnProperties(
140 setPrototype(this, matcherPrototype),
141 {
142 lastIndex: {
143 configurable: false,
144 enumerable: false,
145 value: 0,
146 writable: false,
147 },
148 name: {
149 value: name != null
150 ? `${name}`
151 : `Matcher(${call(reToString, regExp, [])})`,
152 },
153 },
154 );
155 }
156 }
157
158 /** Gets whether the dot‐all flag is present on this `Matcher`. */
159 get dotAll() {
160 return call(getDotAll, this.#regExp, []);
161 }
162
163 /**
164 * Executes this `Matcher` on the provided value and returns the
165 * result if there is a match, or null otherwise.
166 *
167 * Matchers only match if they can match the entire value on the
168 * first attempt.
169 *
170 * ☡ The match result returned by this method will be the same as
171 * that passed to the constraint function—and may have been
172 * modified by said function prior to being returned.
173 */
174 exec($) {
175 const regExp = this.#regExp;
176 const constraint = this.#constraint;
177 const string = `${$}`;
178 regExp.lastIndex = 0;
179 const result = call(reExec, regExp, [string]);
180 if (
181 result?.[0] === string &&
182 (constraint === null || constraint(string, result, this))
183 ) {
184 // The entire string was matched and the constraint, if
185 // present, returned a truthy value.
186 return result;
187 } else {
188 // The entire string was not matched or the constraint returned
189 // a falsey value.
190 return null;
191 }
192 }
193
194 /**
195 * Gets the flags present on this `Matcher`.
196 *
197 * ※ This needs to be defined because the internal `RegExp` object
198 * may have flags which are not yet recognized by ♓🌟 Piscēs.
199 */
200 get flags() {
201 return call(getFlags, this.#regExp, []);
202 }
203
204 /** Gets whether the global flag is present on this `Matcher`. */
205 get global() {
206 return call(getGlobal, this.#regExp, []);
207 }
208
209 /**
210 * Gets whether the has‐indices flag is present on this `Matcher`.
211 */
212 get hasIndices() {
213 return call(getHasIndices, this.#regExp, []);
214 }
215
216 /**
217 * Gets whether the ignore‐case flag is present on this `Matcher`.
218 */
219 get ignoreCase() {
220 return call(getIgnoreCase, this.#regExp, []);
221 }
222
223 /**
224 * Gets whether the multiline flag is present on this `Matcher`.
225 */
226 get multiline() {
227 return call(getMultiline, this.#regExp, []);
228 }
229
230 /** Gets the regular expression source for this `Matcher`. */
231 get source() {
232 return call(getSource, this.#regExp, []);
233 }
234
235 /** Gets whether the sticky flag is present on this `Matcher`. */
236 get sticky() {
237 return call(getSticky, this.#regExp, []);
238 }
239
240 /**
241 * Gets whether the unicode flag is present on this `Matcher`.
242 *
243 * ※ This will always be true.
244 */
245 get unicode() {
246 return call(getUnicode, this.#regExp, []);
247 }
248 };
249
250 const matcherConstructor = defineOwnProperties(
251 class extends RegExp {
252 constructor(...args) {
253 return new Matcher(...args);
254 }
255 },
256 {
257 name: { value: "Matcher" },
258 length: { value: 1 },
259 },
260 );
261 const matcherPrototype = defineOwnProperties(
262 matcherConstructor.prototype,
263 getOwnPropertyDescriptors(Matcher.prototype),
264 { constructor: { value: matcherConstructor } },
265 );
266
267 return { Matcher: matcherConstructor };
268 })();
269
270 export const {
271 /**
272 * Returns the result of converting the provided value to A·S·C·I·I
273 * lowercase.
274 */
275 asciiLowercase,
276
277 /**
278 * Returns the result of converting the provided value to A·S·C·I·I
279 * uppercase.
280 */
281 asciiUppercase,
282 } = (() => {
283 const {
284 toLowerCase: stringToLowercase,
285 toUpperCase: stringToUppercase,
286 } = stringPrototype;
287 return {
288 asciiLowercase: ($) =>
289 stringReplaceAll(
290 `${$}`,
291 /[A-Z]/gu,
292 createCallableFunction(stringToLowercase),
293 ),
294 asciiUppercase: ($) =>
295 stringReplaceAll(
296 `${$}`,
297 /[a-z]/gu,
298 createCallableFunction(stringToUppercase),
299 ),
300 };
301 })();
302
303 /**
304 * Returns −0 if the provided argument is "-0"; returns a number
305 * representing the index if the provided argument is a canonical
306 * numeric index string; otherwise, returns undefined.
307 *
308 * There is no clamping of the numeric index, but note that numbers
309 * above 2^53 − 1 are not safe nor valid integer indices.
310 */
311 export const canonicalNumericIndexString = ($) => {
312 if (typeof $ !== "string") {
313 return undefined;
314 } else if ($ === "-0") {
315 return -0;
316 } else {
317 const n = +$;
318 return $ === `${n}` ? n : undefined;
319 }
320 };
321
322 export const {
323 /**
324 * Returns an iterator over the codepoints in the string representation
325 * of the provided value according to the algorithm of
326 * `String::[Symbol.iterator]`.
327 */
328 characters,
329
330 /**
331 * Returns an iterator over the code units in the string
332 * representation of the provided value.
333 */
334 codeUnits,
335
336 /**
337 * Returns an iterator over the codepoints in the string
338 * representation of the provided value.
339 */
340 codepoints,
341
342 /**
343 * Returns an iterator over the scalar values in the string
344 * representation of the provided value.
345 *
346 * Codepoints which are not valid Unicode scalar values are replaced
347 * with U+FFFD.
348 */
349 scalarValues,
350 } = (() => {
351 const generateCharacters = function* (character) {
352 yield character;
353 };
354 const generateCodeUnits = function* (ucsCharacter) {
355 yield getCodeUnit(ucsCharacter, 0);
356 };
357 const generateCodepoints = function* (character) {
358 const { allowSurrogates } = this;
359 const codepoint = getCodepoint(character, 0);
360 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
361 ? codepoint
362 : 0xFFFD;
363 };
364
365 const charactersIterator = stringIteratorFunction(
366 generateCharacters,
367 "String Character Iterator",
368 );
369 const codeUnitsIterator = arrayIteratorFunction(
370 generateCodeUnits,
371 "String Code Unit Iterator",
372 );
373 const codepointsIterator = stringIteratorFunction(
374 bind(generateCodepoints, { allowSurrogates: true }, []),
375 "String Codepoint Iterator",
376 );
377 const scalarValuesIterator = stringIteratorFunction(
378 bind(generateCodepoints, { allowSurrogates: false }, []),
379 "String Scalar Value Iterator",
380 );
381
382 return {
383 characters: ($) => charactersIterator(`${$}`),
384 codeUnits: ($) => codeUnitsIterator(`${$}`),
385 codepoints: ($) => codepointsIterator(`${$}`),
386 scalarValues: ($) => scalarValuesIterator(`${$}`),
387 };
388 })();
389
390 /**
391 * Returns the character at the provided position in the string
392 * representation of the provided value according to the algorithm of
393 * `String::codePointAt`.
394 */
395 export const getCharacter = ($, pos) => {
396 const codepoint = getCodepoint($, pos);
397 return codepoint == null
398 ? undefined
399 : stringFromCodepoints(codepoint);
400 };
401
402 export const {
403 /**
404 * Returns the code unit at the provided position in the string
405 * representation of the provided value according to the algorithm of
406 * `String::charAt`, except that out‐of‐bounds values return undefined
407 * in place of nan.
408 */
409 getCodeUnit,
410
411 /** Returns whether the provided value is an integer index string. */
412 isIntegerIndexString,
413
414 /**
415 * Returns a string created from the provided code units.
416 *
417 * ※ This is effectively an alias for `String.fromCharCode`, but
418 * with the same error behaviour as `String.fromCodePoint`.
419 *
420 * ☡ This function throws an error if provided with an argument which
421 * is not an integral number from 0 to FFFF₁₆ inclusive.
422 */
423 stringFromCodeUnits,
424
425 /**
426 * Returns the result of catenating the string representations of the
427 * provided values, returning a new string according to the algorithm
428 * of `String::concat`.
429 *
430 * ※ If no arguments are given, this function returns the empty
431 * string. This is different behaviour than if an explicit undefined
432 * first argument is given, in which case the resulting string will
433 * begin with `"undefined"`.
434 */
435 stringCatenate,
436 } = (() => {
437 const { fromCharCode } = String;
438 const { charCodeAt, concat } = String.prototype;
439 const {
440 MAX_SAFE_INTEGER: MAXIMUM_SAFE_INTEGRAL_NUMBER,
441 isInteger: isIntegralNumber,
442 isNaN: isNan,
443 } = Number;
444
445 return {
446 getCodeUnit: ($, n) => {
447 const codeUnit = call(charCodeAt, $, [n]);
448 return isNan(codeUnit) ? undefined : codeUnit;
449 },
450 isIntegerIndexString: ($) => {
451 const value = canonicalNumericIndexString($);
452 if (value !== undefined && isIntegralNumber(value)) {
453 // The provided value is an integral canonical numeric index
454 // string.
455 return sameValue(value, 0) ||
456 value > 0 && value <= MAXIMUM_SAFE_INTEGRAL_NUMBER &&
457 value === toLength(value);
458 } else {
459 // The provided value is not an integral canonical numeric
460 // index string.
461 return false;
462 }
463 },
464 stringCatenate: defineOwnProperties(
465 (...args) => call(concat, "", args),
466 { name: { value: "stringCatenate" }, length: { value: 2 } },
467 ),
468 stringFromCodeUnits: defineOwnProperties(
469 (...codeUnits) => {
470 for (let index = 0; index < codeUnits.length; ++index) {
471 // Iterate over each provided code unit and throw if it is
472 // out of range.
473 const nextCU = +codeUnits[index];
474 if (
475 !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF
476 ) {
477 // The code unit is not an integral number between 0 and
478 // 0xFFFF.
479 throw new RangeError(
480 `Piscēs: Code unit out of range: ${nextCU}.`,
481 );
482 } else {
483 // The code unit is acceptable.
484 /* do nothing */
485 }
486 }
487 return call(fromCharCode, undefined, codeUnits);
488 },
489 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
490 ),
491 };
492 })();
493
494 /**
495 * Returns the codepoint at the provided position in the string
496 * representation of the provided value according to the algorithm of
497 * `String::codePointAt`.
498 */
499 export const getCodepoint = createCallableFunction(
500 stringPrototype.codePointAt,
501 { name: "getCodepoint" },
502 );
503
504 /**
505 * Returns the index of the first occurrence of the search string in
506 * the string representation of the provided value according to the
507 * algorithm of `String::indexOf`.
508 */
509 export const getFirstSubstringIndex = createCallableFunction(
510 stringPrototype.indexOf,
511 { name: "getFirstSubstringIndex" },
512 );
513
514 /**
515 * Returns the index of the last occurrence of the search string in the
516 * string representation of the provided value according to the
517 * algorithm of `String::lastIndexOf`.
518 */
519 export const getLastSubstringIndex = createCallableFunction(
520 stringPrototype.lastIndexOf,
521 { name: "getLastSubstringIndex" },
522 );
523
524 /**
525 * Returns the result of joining the provided iterable.
526 *
527 * If no separator is provided, it defaults to ",".
528 *
529 * If a value is nullish, it will be stringified as the empty string.
530 */
531 export const join = (() => {
532 const { join: arrayJoin } = arrayPrototype;
533 const join = ($, separator) =>
534 call(
535 arrayJoin,
536 [...$],
537 [separator === undefined ? "," : `${separator}`],
538 );
539 return join;
540 })();
541
542 /**
543 * Returns a string created from the raw value of the tagged template
544 * literal.
545 *
546 * ※ This is effectively an alias for `String.raw`.
547 */
548 export const rawString = createArrowFunction(String.raw, {
549 name: "rawString",
550 });
551
552 /**
553 * Returns a string created from the provided codepoints.
554 *
555 * ※ This is effectively an alias for `String.fromCodePoint`.
556 *
557 * ☡ This function throws an error if provided with an argument which
558 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
559 */
560 export const stringFromCodepoints = createArrowFunction(
561 String.fromCodePoint,
562 { name: "stringFromCodepoints" },
563 );
564
565 /**
566 * Returns the result of splitting the provided value on Ascii
567 * whitespace.
568 */
569 export const splitOnAsciiWhitespace = ($) =>
570 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
571
572 /**
573 * Returns the result of splitting the provided value on commas,
574 * trimming Ascii whitespace from the resulting tokens.
575 */
576 export const splitOnCommas = ($) =>
577 stringSplit(
578 stripLeadingAndTrailingAsciiWhitespace(
579 stringReplaceAll(
580 `${$}`,
581 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
582 ",",
583 ),
584 ),
585 ",",
586 );
587
588 /**
589 * Returns whether the string representation of the provided value ends
590 * with the provided search string according to the algorithm of
591 * `String::endsWith`.
592 */
593 export const stringEndsWith = createCallableFunction(
594 stringPrototype.endsWith,
595 { name: "stringEndsWith" },
596 );
597
598 /**
599 * Returns whether the string representation of the provided value
600 * contains the provided search string according to the algorithm of
601 * `String::includes`.
602 */
603 export const stringIncludes = createCallableFunction(
604 stringPrototype.includes,
605 { name: "stringIncludes" },
606 );
607
608 /**
609 * Returns the result of matching the string representation of the
610 * provided value with the provided matcher according to the algorithm
611 * of `String::match`.
612 */
613 export const stringMatch = createCallableFunction(
614 stringPrototype.match,
615 { name: "stringMatch" },
616 );
617
618 /**
619 * Returns the result of matching the string representation of the
620 * provided value with the provided matcher according to the algorithm
621 * of `String::matchAll`.
622 */
623 export const stringMatchAll = createCallableFunction(
624 stringPrototype.matchAll,
625 { name: "stringMatchAll" },
626 );
627
628 /**
629 * Returns the normalized form of the string representation of the
630 * provided value according to the algorithm of `String::normalize`.
631 */
632 export const stringNormalize = createCallableFunction(
633 stringPrototype.normalize,
634 { name: "stringNormalize" },
635 );
636
637 /**
638 * Returns the result of padding the end of the string representation
639 * of the provided value padded until it is the desired length
640 * according to the algorithm of `String::padEnd`.
641 */
642 export const stringPadEnd = createCallableFunction(
643 stringPrototype.padEnd,
644 { name: "stringPadEnd" },
645 );
646
647 /**
648 * Returns the result of padding the start of the string representation
649 * of the provided value padded until it is the desired length
650 * according to the algorithm of `String::padStart`.
651 */
652 export const stringPadStart = createCallableFunction(
653 stringPrototype.padStart,
654 { name: "stringPadStart" },
655 );
656
657 /**
658 * Returns the result of repeating the string representation of the
659 * provided value the provided number of times according to the
660 * algorithm of `String::repeat`.
661 */
662 export const stringRepeat = createCallableFunction(
663 stringPrototype.repeat,
664 { name: "stringRepeat" },
665 );
666
667 /**
668 * Returns the result of replacing the string representation of the
669 * provided value with the provided replacement, using the provided
670 * matcher and according to the algorithm of `String::replace`.
671 */
672 export const stringReplace = createCallableFunction(
673 stringPrototype.replace,
674 { name: "stringReplace" },
675 );
676
677 /**
678 * Returns the result of replacing the string representation of the
679 * provided value with the provided replacement, using the provided
680 * matcher and according to the algorithm of `String::replaceAll`.
681 */
682 export const stringReplaceAll = createCallableFunction(
683 stringPrototype.replaceAll,
684 { name: "stringReplaceAll" },
685 );
686
687 /**
688 * Returns the result of searching the string representation of the
689 * provided value using the provided matcher and according to the
690 * algorithm of `String::search`.
691 */
692 export const stringSearch = createCallableFunction(
693 stringPrototype.search,
694 { name: "stringSearch" },
695 );
696
697 /**
698 * Returns a slice of the string representation of the provided value
699 * according to the algorithm of `String::slice`.
700 */
701 export const stringSlice = createCallableFunction(
702 stringPrototype.slice,
703 { name: "stringSlice" },
704 );
705
706 /**
707 * Returns the result of splitting of the string representation of the
708 * provided value on the provided separator according to the algorithm
709 * of `String::split`.
710 */
711 export const stringSplit = createCallableFunction(
712 stringPrototype.split,
713 { name: "stringSplit" },
714 );
715
716 /**
717 * Returns whether the string representation of the provided value
718 * starts with the provided search string according to the algorithm of
719 * `String::startsWith`.
720 */
721 export const stringStartsWith = createCallableFunction(
722 stringPrototype.startsWith,
723 { name: "stringStartsWith" },
724 );
725
726 /**
727 * Returns the value of the provided string.
728 *
729 * ※ This is effectively an alias for the `String::valueOf`.
730 *
731 * ☡ This function throws if the provided argument is not a string and
732 * does not have a `[[StringData]]` slot.
733 */
734 export const stringValue = createCallableFunction(
735 stringPrototype.valueOf,
736 { name: "stringValue" },
737 );
738
739 /**
740 * Returns the result of stripping leading and trailing Ascii
741 * whitespace from the provided value and collapsing other Ascii
742 * whitespace in the string representation of the provided value.
743 */
744 export const stripAndCollapseAsciiWhitespace = ($) =>
745 stripLeadingAndTrailingAsciiWhitespace(
746 stringReplaceAll(
747 `${$}`,
748 /[\n\r\t\f ]+/gu,
749 " ",
750 ),
751 );
752
753 /**
754 * Returns the result of stripping leading and trailing Ascii
755 * whitespace from the string representation of the provided value.
756 */
757 export const stripLeadingAndTrailingAsciiWhitespace = ($) =>
758 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
759
760 /**
761 * Returns a substring of the string representation of the provided
762 * value according to the algorithm of `String::substring`.
763 */
764 export const substring = createCallableFunction(
765 stringPrototype.substring,
766 );
767
768 /**
769 * Returns the result of converting the provided value to a string of
770 * scalar values by replacing (unpaired) surrogate values with
771 * U+FFFD.
772 */
773 export const toScalarValueString = createCallableFunction(
774 String.prototype.toWellFormed,
775 { name: "toScalarValueString" },
776 );
777
778 /**
779 * Returns the result of converting the provided value to a string.
780 *
781 * ☡ This method throws for symbols and other objects without a string
782 * representation.
783 */
784 export const toString = ($) => `${$}`;
This page took 0.126261 seconds and 3 git commands to generate.