]> Lady’s Gitweb - Pisces/blob - string.js
Add methods for own entries and values to object.js
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import {
11 bind,
12 call,
13 createArrowFunction,
14 createCallableFunction,
15 identity,
16 } from "./function.js";
17 import {
18 arrayIteratorFunction,
19 stringIteratorFunction,
20 } from "./iterable.js";
21 import {
22 defineOwnDataProperty,
23 defineOwnProperties,
24 getOwnPropertyDescriptors,
25 objectCreate,
26 setPropertyValues,
27 setPrototype,
28 } from "./object.js";
29 import { sameValue, toLength, UNDEFINED } from "./value.js";
30
31 const RE = RegExp;
32 const { prototype: rePrototype } = RE;
33 const { prototype: arrayPrototype } = Array;
34 const { prototype: stringPrototype } = String;
35
36 const { exec: reExec } = rePrototype;
37
38 export const {
39 /**
40 * A `RegExp`like object which only matches entire strings, and may
41 * have additional constraints specified.
42 *
43 * Matchers are callable objects and will return true if they are
44 * called with a string that they match, and false otherwise.
45 * Matchers will always return false if called with nonstrings,
46 * although other methods like `::exec` coerce their arguments and
47 * may still return true.
48 */
49 Matcher,
50 } = (() => {
51 const { toString: reToString } = rePrototype;
52 const getDotAll =
53 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
54 const getFlags =
55 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
56 const getGlobal =
57 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
58 const getHasIndices =
59 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
60 const getIgnoreCase =
61 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
62 const getMultiline =
63 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
64 const getSource =
65 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
66 const getSticky =
67 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
68 const getUnicode =
69 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
70
71 const Matcher = class extends identity {
72 #constraint;
73 #regExp;
74
75 /**
76 * Constructs a new `Matcher` from the provided source.
77 *
78 * If the provided source is a regular expression, then it must
79 * have the unicode flag set. Otherwise, it is interpreted as the
80 * string source of a regular expression with the unicode flag set.
81 *
82 * Other flags are taken from the provided regular expression
83 * object, if any are present.
84 *
85 * A name for the matcher may be provided as the second argument.
86 *
87 * A callable constraint on acceptable inputs may be provided as a
88 * third argument. If provided, it will be called with three
89 * arguments whenever a match appears successful: first, the string
90 * being matched, second, the match result, and third, the
91 * `Matcher` object itself. If the return value of this call is
92 * falsey, then the match will be considered a failure.
93 *
94 * ☡ If the provided source regular expression uses nongreedy
95 * quantifiers, it may not match the whole string even if a match
96 * with the whole string is possible. Surround the regular
97 * expression with `^(?:` and `)$` if you don’t want nongreedy
98 * regular expressions to fail when shorter matches are possible.
99 */
100 constructor(source, name = UNDEFINED, constraint = null) {
101 super(
102 ($) => {
103 if (typeof $ !== "string") {
104 // The provided value is not a string.
105 return false;
106 } else {
107 // The provided value is a string. Set the `.lastIndex` of
108 // the regular expression to 0 and see if the first attempt
109 // at a match matches the whole string and passes the
110 // provided constraint (if present).
111 regExp.lastIndex = 0;
112 const result = call(reExec, regExp, [$]);
113 return result?.[0] === $ &&
114 (constraint === null || constraint($, result, this));
115 }
116 },
117 );
118 const regExp = this.#regExp = (() => {
119 try {
120 call(reExec, source, [""]); // throws if source not a RegExp
121 } catch {
122 return new RE(`${source}`, "u");
123 }
124 const unicode = call(getUnicode, source, []);
125 if (!unicode) {
126 // The provided regular expression does not have a unicode
127 // flag.
128 throw new TypeError(
129 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
130 );
131 } else {
132 // The provided regular expression has a unicode flag.
133 return new RE(source);
134 }
135 })();
136 if (constraint !== null && typeof constraint !== "function") {
137 throw new TypeError(
138 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
139 );
140 } else {
141 this.#constraint = constraint;
142 return defineOwnProperties(
143 setPrototype(this, matcherPrototype),
144 {
145 lastIndex: setPropertyValues(objectCreate(null), {
146 configurable: false,
147 enumerable: false,
148 value: 0,
149 writable: false,
150 }),
151 name: defineOwnDataProperty(
152 objectCreate(null),
153 "value",
154 name != null
155 ? `${name}`
156 : `Matcher(${call(reToString, regExp, [])})`,
157 ),
158 },
159 );
160 }
161 }
162
163 /** Gets whether the dot‐all flag is present on this `Matcher`. */
164 get dotAll() {
165 return call(getDotAll, this.#regExp, []);
166 }
167
168 /**
169 * Executes this `Matcher` on the provided value and returns the
170 * result if there is a match, or null otherwise.
171 *
172 * Matchers only match if they can match the entire value on the
173 * first attempt.
174 *
175 * ☡ The match result returned by this method will be the same as
176 * that passed to the constraint function—and may have been
177 * modified by said function prior to being returned.
178 */
179 exec($) {
180 const regExp = this.#regExp;
181 const constraint = this.#constraint;
182 const string = `${$}`;
183 regExp.lastIndex = 0;
184 const result = call(reExec, regExp, [string]);
185 if (
186 result?.[0] === string &&
187 (constraint === null || constraint(string, result, this))
188 ) {
189 // The entire string was matched and the constraint, if
190 // present, returned a truthy value.
191 return result;
192 } else {
193 // The entire string was not matched or the constraint returned
194 // a falsey value.
195 return null;
196 }
197 }
198
199 /**
200 * Gets the flags present on this `Matcher`.
201 *
202 * ※ This needs to be defined because the internal `RegExp` object
203 * may have flags which are not yet recognized by ♓🌟 Piscēs.
204 */
205 get flags() {
206 return call(getFlags, this.#regExp, []);
207 }
208
209 /** Gets whether the global flag is present on this `Matcher`. */
210 get global() {
211 return call(getGlobal, this.#regExp, []);
212 }
213
214 /**
215 * Gets whether the has‐indices flag is present on this `Matcher`.
216 */
217 get hasIndices() {
218 return call(getHasIndices, this.#regExp, []);
219 }
220
221 /**
222 * Gets whether the ignore‐case flag is present on this `Matcher`.
223 */
224 get ignoreCase() {
225 return call(getIgnoreCase, this.#regExp, []);
226 }
227
228 /**
229 * Gets whether the multiline flag is present on this `Matcher`.
230 */
231 get multiline() {
232 return call(getMultiline, this.#regExp, []);
233 }
234
235 /** Gets the regular expression source for this `Matcher`. */
236 get source() {
237 return call(getSource, this.#regExp, []);
238 }
239
240 /** Gets whether the sticky flag is present on this `Matcher`. */
241 get sticky() {
242 return call(getSticky, this.#regExp, []);
243 }
244
245 /**
246 * Gets whether the unicode flag is present on this `Matcher`.
247 *
248 * ※ This will always be true.
249 */
250 get unicode() {
251 return call(getUnicode, this.#regExp, []);
252 }
253 };
254
255 const matcherConstructor = Object.defineProperties(
256 class extends RegExp {
257 constructor(...args) {
258 return new Matcher(...args);
259 }
260 },
261 {
262 name: defineOwnDataProperty(
263 Object.create(null),
264 "value",
265 "Matcher",
266 ),
267 length: defineOwnDataProperty(Object.create(null), "value", 1),
268 },
269 );
270 const matcherPrototype = defineOwnProperties(
271 matcherConstructor.prototype,
272 getOwnPropertyDescriptors(Matcher.prototype),
273 {
274 constructor: defineOwnDataProperty(
275 Object.create(null),
276 "value",
277 matcherConstructor,
278 ),
279 },
280 );
281
282 return { Matcher: matcherConstructor };
283 })();
284
285 export const {
286 /**
287 * Returns the result of converting the provided value to A·S·C·I·I
288 * lowercase.
289 */
290 asciiLowercase,
291
292 /**
293 * Returns the result of converting the provided value to A·S·C·I·I
294 * uppercase.
295 */
296 asciiUppercase,
297 } = (() => {
298 const {
299 toLowerCase: stringToLowercase,
300 toUpperCase: stringToUppercase,
301 } = stringPrototype;
302 return {
303 asciiLowercase: ($) =>
304 stringReplaceAll(
305 `${$}`,
306 /[A-Z]/gu,
307 createCallableFunction(stringToLowercase),
308 ),
309 asciiUppercase: ($) =>
310 stringReplaceAll(
311 `${$}`,
312 /[a-z]/gu,
313 createCallableFunction(stringToUppercase),
314 ),
315 };
316 })();
317
318 /**
319 * Returns −0 if the provided argument is "-0"; returns a number
320 * representing the index if the provided argument is a canonical
321 * numeric index string; otherwise, returns undefined.
322 *
323 * There is no clamping of the numeric index, but note that numbers
324 * above 2^53 − 1 are not safe nor valid integer indices.
325 */
326 export const canonicalNumericIndexString = ($) => {
327 if (typeof $ !== "string") {
328 return UNDEFINED;
329 } else if ($ === "-0") {
330 return -0;
331 } else {
332 const n = +$;
333 return $ === `${n}` ? n : UNDEFINED;
334 }
335 };
336
337 export const {
338 /**
339 * Returns an iterator over the codepoints in the string representation
340 * of the provided value according to the algorithm of
341 * `String::[Symbol.iterator]`.
342 */
343 characters,
344
345 /**
346 * Returns an iterator over the code units in the string
347 * representation of the provided value.
348 */
349 codeUnits,
350
351 /**
352 * Returns an iterator over the codepoints in the string
353 * representation of the provided value.
354 */
355 codepoints,
356
357 /**
358 * Returns an iterator over the scalar values in the string
359 * representation of the provided value.
360 *
361 * Codepoints which are not valid Unicode scalar values are replaced
362 * with U+FFFD.
363 */
364 scalarValues,
365 } = (() => {
366 const generateCharacters = function* (character) {
367 yield character;
368 };
369 const generateCodeUnits = function* (ucsCharacter) {
370 yield getCodeUnit(ucsCharacter, 0);
371 };
372 const generateCodepoints = function* (character) {
373 const { allowSurrogates } = this;
374 const codepoint = getCodepoint(character, 0);
375 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
376 ? codepoint
377 : 0xFFFD;
378 };
379
380 const charactersIterator = stringIteratorFunction(
381 generateCharacters,
382 "String Character Iterator",
383 );
384 const codeUnitsIterator = arrayIteratorFunction(
385 generateCodeUnits,
386 "String Code Unit Iterator",
387 );
388 const codepointsIterator = stringIteratorFunction(
389 bind(generateCodepoints, { allowSurrogates: true }, []),
390 "String Codepoint Iterator",
391 );
392 const scalarValuesIterator = stringIteratorFunction(
393 bind(generateCodepoints, { allowSurrogates: false }, []),
394 "String Scalar Value Iterator",
395 );
396
397 return {
398 characters: ($) => charactersIterator(`${$}`),
399 codeUnits: ($) => codeUnitsIterator(`${$}`),
400 codepoints: ($) => codepointsIterator(`${$}`),
401 scalarValues: ($) => scalarValuesIterator(`${$}`),
402 };
403 })();
404
405 /**
406 * Returns the character at the provided position in the string
407 * representation of the provided value according to the algorithm of
408 * `String::codePointAt`.
409 */
410 export const getCharacter = ($, pos) => {
411 const codepoint = getCodepoint($, pos);
412 return codepoint == null
413 ? UNDEFINED
414 : stringFromCodepoints(codepoint);
415 };
416
417 export const {
418 /**
419 * Returns the code unit at the provided position in the string
420 * representation of the provided value according to the algorithm of
421 * `String::charAt`, except that out‐of‐bounds values return undefined
422 * in place of nan.
423 */
424 getCodeUnit,
425
426 /**
427 * Returns a string created from the provided code units.
428 *
429 * ※ This is effectively an alias for `String.fromCharCode`, but
430 * with the same error behaviour as `String.fromCodePoint`.
431 *
432 * ☡ This function throws an error if provided with an argument which
433 * is not an integral number from 0 to FFFF₁₆ inclusive.
434 */
435 stringFromCodeUnits,
436
437 /**
438 * Returns the result of catenating the string representations of the
439 * provided values, returning a new string according to the algorithm
440 * of `String::concat`.
441 *
442 * ※ If no arguments are given, this function returns the empty
443 * string. This is different behaviour than if an explicit undefined
444 * first argument is given, in which case the resulting string will
445 * begin with `"undefined"`.
446 */
447 stringCatenate,
448 } = (() => {
449 const { fromCharCode } = String;
450 const { charCodeAt, concat } = String.prototype;
451 const {
452 isInteger: isIntegralNumber,
453 isNaN: isNan,
454 } = Number;
455
456 return {
457 getCodeUnit: ($, n) => {
458 const codeUnit = call(charCodeAt, $, [n]);
459 return isNan(codeUnit) ? UNDEFINED : codeUnit;
460 },
461 stringCatenate: Object.defineProperties(
462 (...args) => call(concat, "", args),
463 { name: { value: "stringCatenate" }, length: { value: 2 } },
464 ),
465 stringFromCodeUnits: Object.defineProperties(
466 (...codeUnits) => {
467 for (let index = 0; index < codeUnits.length; ++index) {
468 // Iterate over each provided code unit and throw if it is
469 // out of range.
470 const nextCU = +codeUnits[index];
471 if (
472 !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF
473 ) {
474 // The code unit is not an integral number between 0 and
475 // 0xFFFF.
476 throw new RangeError(
477 `Piscēs: Code unit out of range: ${nextCU}.`,
478 );
479 } else {
480 // The code unit is acceptable.
481 /* do nothing */
482 }
483 }
484 return call(fromCharCode, UNDEFINED, codeUnits);
485 },
486 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
487 ),
488 };
489 })();
490
491 /**
492 * Returns the codepoint at the provided position in the string
493 * representation of the provided value according to the algorithm of
494 * `String::codePointAt`.
495 */
496 export const getCodepoint = createCallableFunction(
497 stringPrototype.codePointAt,
498 { name: "getCodepoint" },
499 );
500
501 /**
502 * Returns the index of the first occurrence of the search string in
503 * the string representation of the provided value according to the
504 * algorithm of `String::indexOf`.
505 */
506 export const getFirstSubstringIndex = createCallableFunction(
507 stringPrototype.indexOf,
508 { name: "getFirstSubstringIndex" },
509 );
510
511 /**
512 * Returns the index of the last occurrence of the search string in the
513 * string representation of the provided value according to the
514 * algorithm of `String::lastIndexOf`.
515 */
516 export const getLastSubstringIndex = createCallableFunction(
517 stringPrototype.lastIndexOf,
518 { name: "getLastSubstringIndex" },
519 );
520
521 /** Returns whether the provided value is an array index. */
522 export const isArrayIndexString = ($) => {
523 const value = canonicalNumericIndexString($);
524 if (value !== UNDEFINED) {
525 // The provided value is a canonical numeric index string; return
526 // whether it is in range for array indices.
527 return sameValue(value, 0) ||
528 value === toLength(value) && value > 0 && value < -1 >>> 0;
529 } else {
530 // The provided value is not a canonical numeric index string.
531 return false;
532 }
533 };
534
535 /** Returns whether the provided value is an integer index string. */
536 export const isIntegerIndexString = ($) => {
537 const value = canonicalNumericIndexString($);
538 if (value !== UNDEFINED) {
539 // The provided value is a canonical numeric index string; return
540 // whether it is in range for integer indices.
541 return sameValue(value, 0) ||
542 value === toLength(value) && value > 0;
543 } else {
544 // The provided value is not a canonical numeric index string.
545 return false;
546 }
547 };
548
549 /**
550 * Returns the result of joining the provided iterable.
551 *
552 * If no separator is provided, it defaults to ",".
553 *
554 * If a value is nullish, it will be stringified as the empty string.
555 */
556 export const join = (() => {
557 const { join: arrayJoin } = arrayPrototype;
558 const join = ($, separator) =>
559 call(
560 arrayJoin,
561 [...$],
562 [separator === UNDEFINED ? "," : `${separator}`],
563 );
564 return join;
565 })();
566
567 /**
568 * Returns a string created from the raw value of the tagged template
569 * literal.
570 *
571 * ※ This is effectively an alias for `String.raw`.
572 */
573 export const rawString = createArrowFunction(String.raw, {
574 name: "rawString",
575 });
576
577 /**
578 * Returns a string created from the provided codepoints.
579 *
580 * ※ This is effectively an alias for `String.fromCodePoint`.
581 *
582 * ☡ This function throws an error if provided with an argument which
583 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
584 */
585 export const stringFromCodepoints = createArrowFunction(
586 String.fromCodePoint,
587 { name: "stringFromCodepoints" },
588 );
589
590 /**
591 * Returns the result of splitting the provided value on Ascii
592 * whitespace.
593 */
594 export const splitOnAsciiWhitespace = ($) =>
595 stringSplit(stripAndCollapseAsciiWhitespace($), " ");
596
597 /**
598 * Returns the result of splitting the provided value on commas,
599 * trimming Ascii whitespace from the resulting tokens.
600 */
601 export const splitOnCommas = ($) =>
602 stringSplit(
603 stripLeadingAndTrailingAsciiWhitespace(
604 stringReplaceAll(
605 `${$}`,
606 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
607 ",",
608 ),
609 ),
610 ",",
611 );
612
613 /**
614 * Returns whether the string representation of the provided value ends
615 * with the provided search string according to the algorithm of
616 * `String::endsWith`.
617 */
618 export const stringEndsWith = createCallableFunction(
619 stringPrototype.endsWith,
620 { name: "stringEndsWith" },
621 );
622
623 /**
624 * Returns whether the string representation of the provided value
625 * contains the provided search string according to the algorithm of
626 * `String::includes`.
627 */
628 export const stringIncludes = createCallableFunction(
629 stringPrototype.includes,
630 { name: "stringIncludes" },
631 );
632
633 /**
634 * Returns the result of matching the string representation of the
635 * provided value with the provided matcher according to the algorithm
636 * of `String::match`.
637 */
638 export const stringMatch = createCallableFunction(
639 stringPrototype.match,
640 { name: "stringMatch" },
641 );
642
643 /**
644 * Returns the result of matching the string representation of the
645 * provided value with the provided matcher according to the algorithm
646 * of `String::matchAll`.
647 */
648 export const stringMatchAll = createCallableFunction(
649 stringPrototype.matchAll,
650 { name: "stringMatchAll" },
651 );
652
653 /**
654 * Returns the normalized form of the string representation of the
655 * provided value according to the algorithm of `String::normalize`.
656 */
657 export const stringNormalize = createCallableFunction(
658 stringPrototype.normalize,
659 { name: "stringNormalize" },
660 );
661
662 /**
663 * Returns the result of padding the end of the string representation
664 * of the provided value padded until it is the desired length
665 * according to the algorithm of `String::padEnd`.
666 */
667 export const stringPadEnd = createCallableFunction(
668 stringPrototype.padEnd,
669 { name: "stringPadEnd" },
670 );
671
672 /**
673 * Returns the result of padding the start of the string representation
674 * of the provided value padded until it is the desired length
675 * according to the algorithm of `String::padStart`.
676 */
677 export const stringPadStart = createCallableFunction(
678 stringPrototype.padStart,
679 { name: "stringPadStart" },
680 );
681
682 /**
683 * Returns the result of repeating the string representation of the
684 * provided value the provided number of times according to the
685 * algorithm of `String::repeat`.
686 */
687 export const stringRepeat = createCallableFunction(
688 stringPrototype.repeat,
689 { name: "stringRepeat" },
690 );
691
692 /**
693 * Returns the result of replacing the string representation of the
694 * provided value with the provided replacement, using the provided
695 * matcher and according to the algorithm of `String::replace`.
696 */
697 export const stringReplace = createCallableFunction(
698 stringPrototype.replace,
699 { name: "stringReplace" },
700 );
701
702 /**
703 * Returns the result of replacing the string representation of the
704 * provided value with the provided replacement, using the provided
705 * matcher and according to the algorithm of `String::replaceAll`.
706 */
707 export const stringReplaceAll = createCallableFunction(
708 stringPrototype.replaceAll,
709 { name: "stringReplaceAll" },
710 );
711
712 /**
713 * Returns the result of searching the string representation of the
714 * provided value using the provided matcher and according to the
715 * algorithm of `String::search`.
716 */
717 export const stringSearch = createCallableFunction(
718 stringPrototype.search,
719 { name: "stringSearch" },
720 );
721
722 /**
723 * Returns a slice of the string representation of the provided value
724 * according to the algorithm of `String::slice`.
725 */
726 export const stringSlice = createCallableFunction(
727 stringPrototype.slice,
728 { name: "stringSlice" },
729 );
730
731 /**
732 * Returns the result of splitting of the string representation of the
733 * provided value on the provided separator according to the algorithm
734 * of `String::split`.
735 */
736 export const stringSplit = createCallableFunction(
737 stringPrototype.split,
738 { name: "stringSplit" },
739 );
740
741 /**
742 * Returns whether the string representation of the provided value
743 * starts with the provided search string according to the algorithm of
744 * `String::startsWith`.
745 */
746 export const stringStartsWith = createCallableFunction(
747 stringPrototype.startsWith,
748 { name: "stringStartsWith" },
749 );
750
751 /**
752 * Returns the value of the provided string.
753 *
754 * ※ This is effectively an alias for the `String::valueOf`.
755 *
756 * ☡ This function throws if the provided argument is not a string and
757 * does not have a `[[StringData]]` slot.
758 */
759 export const stringValue = createCallableFunction(
760 stringPrototype.valueOf,
761 { name: "stringValue" },
762 );
763
764 /**
765 * Returns the result of stripping leading and trailing Ascii
766 * whitespace from the provided value and collapsing other Ascii
767 * whitespace in the string representation of the provided value.
768 */
769 export const stripAndCollapseAsciiWhitespace = ($) =>
770 stripLeadingAndTrailingAsciiWhitespace(
771 stringReplaceAll(
772 `${$}`,
773 /[\n\r\t\f ]+/gu,
774 " ",
775 ),
776 );
777
778 /**
779 * Returns the result of stripping leading and trailing Ascii
780 * whitespace from the string representation of the provided value.
781 */
782 export const stripLeadingAndTrailingAsciiWhitespace = ($) =>
783 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
784
785 /**
786 * Returns a substring of the string representation of the provided
787 * value according to the algorithm of `String::substring`.
788 */
789 export const substring = createCallableFunction(
790 stringPrototype.substring,
791 );
792
793 /**
794 * Returns the result of converting the provided value to a string of
795 * scalar values by replacing (unpaired) surrogate values with
796 * U+FFFD.
797 */
798 export const toScalarValueString = createCallableFunction(
799 String.prototype.toWellFormed,
800 { name: "toScalarValueString" },
801 );
802
803 /**
804 * Returns the result of converting the provided value to a string.
805 *
806 * ☡ This method throws for symbols and other objects without a string
807 * representation.
808 */
809 export const toString = ($) => `${$}`;
This page took 0.116472 seconds and 5 git commands to generate.