]> Lady’s Gitweb - Pisces/blob - string.js
0c603acbfa47b70c051c1c452ab3bbce582a88c7
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import {
11 bind,
12 call,
13 createArrowFunction,
14 createCallableFunction,
15 identity,
16 } from "./function.js";
17 import {
18 arrayIteratorFunction,
19 stringIteratorFunction,
20 } from "./iterable.js";
21 import {
22 defineOwnProperties,
23 getOwnPropertyDescriptors,
24 setPrototype,
25 } from "./object.js";
26
27 const RE = RegExp;
28 const { prototype: rePrototype } = RE;
29 const { prototype: arrayPrototype } = Array;
30 const { prototype: stringPrototype } = String;
31
32 const { exec: reExec } = rePrototype;
33
34 export const {
35 /**
36 * A `RegExp`like object which only matches entire strings, and may
37 * have additional constraints specified.
38 *
39 * Matchers are callable objects and will return true if they are
40 * called with a string that they match, and false otherwise.
41 * Matchers will always return false if called with nonstrings,
42 * although other methods like `::exec` coerce their arguments and
43 * may still return true.
44 */
45 Matcher,
46 } = (() => {
47 const { toString: reToString } = rePrototype;
48 const getDotAll =
49 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
50 const getFlags =
51 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
52 const getGlobal =
53 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
54 const getHasIndices =
55 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
56 const getIgnoreCase =
57 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
58 const getMultiline =
59 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
60 const getSource =
61 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
62 const getSticky =
63 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
64 const getUnicode =
65 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
66
67 const Matcher = class extends identity {
68 #constraint;
69 #regExp;
70
71 /**
72 * Constructs a new `Matcher` from the provided source.
73 *
74 * If the provided source is a regular expression, then it must
75 * have the unicode flag set. Otherwise, it is interpreted as the
76 * string source of a regular expression with the unicode flag set.
77 *
78 * Other flags are taken from the provided regular expression
79 * object, if any are present.
80 *
81 * A name for the matcher may be provided as the second argument.
82 *
83 * A callable constraint on acceptable inputs may be provided as a
84 * third argument. If provided, it will be called with three
85 * arguments whenever a match appears successful: first, the string
86 * being matched, second, the match result, and third, the
87 * `Matcher` object itself. If the return value of this call is
88 * falsey, then the match will be considered a failure.
89 *
90 * ☡ If the provided source regular expression uses nongreedy
91 * quantifiers, it may not match the whole string even if a match
92 * with the whole string is possible. Surround the regular
93 * expression with `^(?:` and `)$` if you don’t want nongreedy
94 * regular expressions to fail when shorter matches are possible.
95 */
96 constructor(source, name = undefined, constraint = null) {
97 super(
98 ($) => {
99 if (typeof $ !== "string") {
100 // The provided value is not a string.
101 return false;
102 } else {
103 // The provided value is a string. Set the `.lastIndex` of
104 // the regular expression to 0 and see if the first attempt
105 // at a match matches the whole string and passes the
106 // provided constraint (if present).
107 regExp.lastIndex = 0;
108 const result = call(reExec, regExp, [$]);
109 return result?.[0] === $ &&
110 (constraint === null || constraint($, result, this));
111 }
112 },
113 );
114 const regExp = this.#regExp = (() => {
115 try {
116 call(reExec, source, [""]); // throws if source not a RegExp
117 } catch {
118 return new RE(`${source}`, "u");
119 }
120 const unicode = call(getUnicode, source, []);
121 if (!unicode) {
122 // The provided regular expression does not have a unicode
123 // flag.
124 throw new TypeError(
125 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
126 );
127 } else {
128 // The provided regular expression has a unicode flag.
129 return new RE(source);
130 }
131 })();
132 if (constraint !== null && typeof constraint !== "function") {
133 throw new TypeError(
134 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
135 );
136 } else {
137 this.#constraint = constraint;
138 return defineOwnProperties(
139 setPrototype(this, matcherPrototype),
140 {
141 lastIndex: {
142 configurable: false,
143 enumerable: false,
144 value: 0,
145 writable: false,
146 },
147 name: {
148 value: name != null
149 ? `${name}`
150 : `Matcher(${call(reToString, regExp, [])})`,
151 },
152 },
153 );
154 }
155 }
156
157 /** Gets whether the dot‐all flag is present on this `Matcher`. */
158 get dotAll() {
159 return call(getDotAll, this.#regExp, []);
160 }
161
162 /**
163 * Executes this `Matcher` on the provided value and returns the
164 * result if there is a match, or null otherwise.
165 *
166 * Matchers only match if they can match the entire value on the
167 * first attempt.
168 *
169 * ☡ The match result returned by this method will be the same as
170 * that passed to the constraint function—and may have been
171 * modified by said function prior to being returned.
172 */
173 exec($) {
174 const regExp = this.#regExp;
175 const constraint = this.#constraint;
176 const string = `${$}`;
177 regExp.lastIndex = 0;
178 const result = call(reExec, regExp, [string]);
179 if (
180 result?.[0] === string &&
181 (constraint === null || constraint(string, result, this))
182 ) {
183 // The entire string was matched and the constraint, if
184 // present, returned a truthy value.
185 return result;
186 } else {
187 // The entire string was not matched or the constraint returned
188 // a falsey value.
189 return null;
190 }
191 }
192
193 /**
194 * Gets the flags present on this `Matcher`.
195 *
196 * ※ This needs to be defined because the internal `RegExp` object
197 * may have flags which are not yet recognized by ♓🌟 Piscēs.
198 */
199 get flags() {
200 return call(getFlags, this.#regExp, []);
201 }
202
203 /** Gets whether the global flag is present on this `Matcher`. */
204 get global() {
205 return call(getGlobal, this.#regExp, []);
206 }
207
208 /**
209 * Gets whether the has‐indices flag is present on this `Matcher`.
210 */
211 get hasIndices() {
212 return call(getHasIndices, this.#regExp, []);
213 }
214
215 /**
216 * Gets whether the ignore‐case flag is present on this `Matcher`.
217 */
218 get ignoreCase() {
219 return call(getIgnoreCase, this.#regExp, []);
220 }
221
222 /**
223 * Gets whether the multiline flag is present on this `Matcher`.
224 */
225 get multiline() {
226 return call(getMultiline, this.#regExp, []);
227 }
228
229 /** Gets the regular expression source for this `Matcher`. */
230 get source() {
231 return call(getSource, this.#regExp, []);
232 }
233
234 /** Gets whether the sticky flag is present on this `Matcher`. */
235 get sticky() {
236 return call(getSticky, this.#regExp, []);
237 }
238
239 /**
240 * Gets whether the unicode flag is present on this `Matcher`.
241 *
242 * ※ This will always be true.
243 */
244 get unicode() {
245 return call(getUnicode, this.#regExp, []);
246 }
247 };
248
249 const matcherConstructor = defineOwnProperties(
250 class extends RegExp {
251 constructor(...args) {
252 return new Matcher(...args);
253 }
254 },
255 {
256 name: { value: "Matcher" },
257 length: { value: 1 },
258 },
259 );
260 const matcherPrototype = defineOwnProperties(
261 matcherConstructor.prototype,
262 getOwnPropertyDescriptors(Matcher.prototype),
263 { constructor: { value: matcherConstructor } },
264 );
265
266 return { Matcher: matcherConstructor };
267 })();
268
269 export const {
270 /**
271 * Returns the result of converting the provided value to A·S·C·I·I
272 * lowercase.
273 */
274 asciiLowercase,
275
276 /**
277 * Returns the result of converting the provided value to A·S·C·I·I
278 * uppercase.
279 */
280 asciiUppercase,
281 } = (() => {
282 const {
283 toLowerCase: stringToLowercase,
284 toUpperCase: stringToUppercase,
285 } = stringPrototype;
286 return {
287 asciiLowercase: ($) =>
288 stringReplaceAll(
289 `${$}`,
290 /[A-Z]/gu,
291 createCallableFunction(stringToLowercase),
292 ),
293 asciiUppercase: ($) =>
294 stringReplaceAll(
295 `${$}`,
296 /[a-z]/gu,
297 createCallableFunction(stringToUppercase),
298 ),
299 };
300 })();
301
302 export const {
303 /**
304 * Returns an iterator over the codepoints in the string representation
305 * of the provided value according to the algorithm of
306 * `String::[Symbol.iterator]`.
307 */
308 characters,
309
310 /**
311 * Returns an iterator over the code units in the string
312 * representation of the provided value.
313 */
314 codeUnits,
315
316 /**
317 * Returns an iterator over the codepoints in the string
318 * representation of the provided value.
319 */
320 codepoints,
321
322 /**
323 * Returns an iterator over the scalar values in the string
324 * representation of the provided value.
325 *
326 * Codepoints which are not valid Unicode scalar values are replaced
327 * with U+FFFD.
328 */
329 scalarValues,
330 } = (() => {
331 const generateCharacters = function* (character) {
332 yield character;
333 };
334 const generateCodeUnits = function* (ucsCharacter) {
335 yield getCodeUnit(ucsCharacter, 0);
336 };
337 const generateCodepoints = function* (character) {
338 const { allowSurrogates } = this;
339 const codepoint = getCodepoint(character, 0);
340 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
341 ? codepoint
342 : 0xFFFD;
343 };
344
345 const charactersIterator = stringIteratorFunction(
346 generateCharacters,
347 "String Character Iterator",
348 );
349 const codeUnitsIterator = arrayIteratorFunction(
350 generateCodeUnits,
351 "String Code Unit Iterator",
352 );
353 const codepointsIterator = stringIteratorFunction(
354 bind(generateCodepoints, { allowSurrogates: true }, []),
355 "String Codepoint Iterator",
356 );
357 const scalarValuesIterator = stringIteratorFunction(
358 bind(generateCodepoints, { allowSurrogates: false }, []),
359 "String Scalar Value Iterator",
360 );
361
362 return {
363 characters: ($) => charactersIterator(`${$}`),
364 codeUnits: ($) => codeUnitsIterator(`${$}`),
365 codepoints: ($) => codepointsIterator(`${$}`),
366 scalarValues: ($) => scalarValuesIterator(`${$}`),
367 };
368 })();
369
370 /**
371 * Returns the character at the provided position in the string
372 * representation of the provided value according to the algorithm of
373 * `String::codePointAt`.
374 */
375 export const getCharacter = ($, pos) => {
376 const codepoint = getCodepoint($, pos);
377 return codepoint == null
378 ? undefined
379 : stringFromCodepoints(codepoint);
380 };
381
382 /**
383 * Returns the code unit at the provided position in the string
384 * representation of the provided value according to the algorithm of
385 * `String::charAt`, except that out‐of‐bounds values return undefined
386 * in place of nan.
387 */
388 export const {
389 getCodeUnit,
390
391 /**
392 * Returns the result of catenating the string representations of the
393 * provided values, returning a new string according to the algorithm
394 * of `String::concat`.
395 *
396 * ※ If no arguments are given, this function returns the empty
397 * string. This is different behaviour than if an explicit undefined
398 * first argument is given, in which case the resulting string will
399 * begin with `"undefined"`.
400 */
401 stringCatenate,
402 } = (() => {
403 const { charCodeAt, concat } = String.prototype;
404 const { isNaN: isNan } = Number;
405
406 return {
407 getCodeUnit: ($, n) => {
408 const codeUnit = call(charCodeAt, $, [n]);
409 return isNan(codeUnit) ? undefined : codeUnit;
410 },
411 stringCatenate: defineOwnProperties(
412 (...args) => call(concat, "", args),
413 { name: { value: "stringCatenate" }, length: { value: 2 } },
414 ),
415 };
416 })();
417
418 /**
419 * Returns the codepoint at the provided position in the string
420 * representation of the provided value according to the algorithm of
421 * `String::codePointAt`.
422 */
423 export const getCodepoint = createCallableFunction(
424 stringPrototype.codePointAt,
425 { name: "getCodepoint" },
426 );
427
428 /**
429 * Returns the index of the first occurrence of the search string in
430 * the string representation of the provided value according to the
431 * algorithm of `String::indexOf`.
432 */
433 export const getFirstSubstringIndex = createCallableFunction(
434 stringPrototype.indexOf,
435 { name: "getFirstSubstringIndex" },
436 );
437
438 /**
439 * Returns the index of the last occurrence of the search string in the
440 * string representation of the provided value according to the
441 * algorithm of `String::lastIndexOf`.
442 */
443 export const getLastSubstringIndex = createCallableFunction(
444 stringPrototype.lastIndexOf,
445 { name: "getLastSubstringIndex" },
446 );
447
448 /**
449 * Returns the result of joining the provided iterable.
450 *
451 * If no separator is provided, it defaults to ",".
452 *
453 * If a value is nullish, it will be stringified as the empty string.
454 */
455 export const join = (() => {
456 const { join: arrayJoin } = arrayPrototype;
457 const join = ($, separator) =>
458 call(
459 arrayJoin,
460 [...$],
461 [separator === undefined ? "," : `${separator}`],
462 );
463 return join;
464 })();
465
466 /**
467 * Returns a string created from the raw value of the tagged template
468 * literal.
469 *
470 * ※ This is effectively an alias for `String.raw`.
471 */
472 export const rawString = createArrowFunction(String.raw, {
473 name: "rawString",
474 });
475
476 export const {
477 /**
478 * Returns a string created from the provided code units.
479 *
480 * ※ This is effectively an alias for `String.fromCharCode`, but
481 * with the same error behaviour as `String.fromCodePoint`.
482 *
483 * ☡ This function throws an error if provided with an argument which
484 * is not an integral number from 0 to FFFF₁₆ inclusive.
485 */
486 stringFromCodeUnits,
487 } = (() => {
488 const { fromCharCode } = String;
489 const { isInteger: isIntegralNumber } = Number;
490
491 return {
492 stringFromCodeUnits: defineOwnProperties(
493 (...codeUnits) => {
494 for (let index = 0; index < codeUnits.length; ++index) {
495 // Iterate over each provided code unit and throw if it is
496 // out of range.
497 const nextCU = +codeUnits[index];
498 if (
499 !isIntegralNumber(nextCU) || nextCU < 0 || nextCU > 0xFFFF
500 ) {
501 // The code unit is not an integral number between 0 and
502 // 0xFFFF.
503 throw new RangeError(
504 `Piscēs: Code unit out of range: ${nextCU}.`,
505 );
506 } else {
507 // The code unit is acceptable.
508 /* do nothing */
509 }
510 }
511 return call(fromCharCode, undefined, codeUnits);
512 },
513 { name: { value: "stringFromCodeUnits" }, length: { value: 1 } },
514 ),
515 };
516 })();
517
518 /**
519 * Returns a string created from the provided codepoints.
520 *
521 * ※ This is effectively an alias for `String.fromCodePoint`.
522 *
523 * ☡ This function throws an error if provided with an argument which
524 * is not an integral number from 0 to 10FFFF₁₆ inclusive.
525 */
526 export const stringFromCodepoints = createArrowFunction(
527 String.fromCodePoint,
528 { name: "stringFromCodepoints" },
529 );
530
531 /**
532 * Returns the result of splitting the provided value on A·S·C·I·I
533 * whitespace.
534 */
535 export const splitOnASCIIWhitespace = ($) =>
536 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
537
538 /**
539 * Returns the result of splitting the provided value on commas,
540 * trimming A·S·C·I·I whitespace from the resulting tokens.
541 */
542 export const splitOnCommas = ($) =>
543 stringSplit(
544 stripLeadingAndTrailingASCIIWhitespace(
545 stringReplaceAll(
546 `${$}`,
547 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
548 ",",
549 ),
550 ),
551 ",",
552 );
553
554 /**
555 * Returns whether the string representation of the provided value ends
556 * with the provided search string according to the algorithm of
557 * `String::endsWith`.
558 */
559 export const stringEndsWith = createCallableFunction(
560 stringPrototype.endsWith,
561 { name: "stringEndsWith" },
562 );
563
564 /**
565 * Returns whether the string representation of the provided value
566 * contains the provided search string according to the algorithm of
567 * `String::includes`.
568 */
569 export const stringIncludes = createCallableFunction(
570 stringPrototype.includes,
571 { name: "stringIncludes" },
572 );
573
574 /**
575 * Returns the result of matching the string representation of the
576 * provided value with the provided matcher according to the algorithm
577 * of `String::match`.
578 */
579 export const stringMatch = createCallableFunction(
580 stringPrototype.match,
581 { name: "stringMatch" },
582 );
583
584 /**
585 * Returns the result of matching the string representation of the
586 * provided value with the provided matcher according to the algorithm
587 * of `String::matchAll`.
588 */
589 export const stringMatchAll = createCallableFunction(
590 stringPrototype.matchAll,
591 { name: "stringMatchAll" },
592 );
593
594 /**
595 * Returns the normalized form of the string representation of the
596 * provided value according to the algorithm of `String::normalize`.
597 */
598 export const stringNormalize = createCallableFunction(
599 stringPrototype.normalize,
600 { name: "stringNormalize" },
601 );
602
603 /**
604 * Returns the result of padding the end of the string representation
605 * of the provided value padded until it is the desired length
606 * according to the algorithm of `String::padEnd`.
607 */
608 export const stringPadEnd = createCallableFunction(
609 stringPrototype.padEnd,
610 { name: "stringPadEnd" },
611 );
612
613 /**
614 * Returns the result of padding the start of the string representation
615 * of the provided value padded until it is the desired length
616 * according to the algorithm of `String::padStart`.
617 */
618 export const stringPadStart = createCallableFunction(
619 stringPrototype.padStart,
620 { name: "stringPadStart" },
621 );
622
623 /**
624 * Returns the result of repeating the string representation of the
625 * provided value the provided number of times according to the
626 * algorithm of `String::repeat`.
627 */
628 export const stringRepeat = createCallableFunction(
629 stringPrototype.repeat,
630 { name: "stringRepeat" },
631 );
632
633 /**
634 * Returns the result of replacing the string representation of the
635 * provided value with the provided replacement, using the provided
636 * matcher and according to the algorithm of `String::replace`.
637 */
638 export const stringReplace = createCallableFunction(
639 stringPrototype.replace,
640 { name: "stringReplace" },
641 );
642
643 /**
644 * Returns the result of replacing the string representation of the
645 * provided value with the provided replacement, using the provided
646 * matcher and according to the algorithm of `String::replaceAll`.
647 */
648 export const stringReplaceAll = createCallableFunction(
649 stringPrototype.replaceAll,
650 { name: "stringReplaceAll" },
651 );
652
653 /**
654 * Returns the result of searching the string representation of the
655 * provided value using the provided matcher and according to the
656 * algorithm of `String::search`.
657 */
658 export const stringSearch = createCallableFunction(
659 stringPrototype.search,
660 { name: "stringSearch" },
661 );
662
663 /**
664 * Returns a slice of the string representation of the provided value
665 * according to the algorithm of `String::slice`.
666 */
667 export const stringSlice = createCallableFunction(
668 stringPrototype.slice,
669 { name: "stringSlice" },
670 );
671
672 /**
673 * Returns the result of splitting of the string representation of the
674 * provided value on the provided separator according to the algorithm
675 * of `String::split`.
676 */
677 export const stringSplit = createCallableFunction(
678 stringPrototype.split,
679 { name: "stringSplit" },
680 );
681
682 /**
683 * Returns whether the string representation of the provided value
684 * starts with the provided search string according to the algorithm of
685 * `String::startsWith`.
686 */
687 export const stringStartsWith = createCallableFunction(
688 stringPrototype.startsWith,
689 { name: "stringStartsWith" },
690 );
691
692 /**
693 * Returns the value of the provided string.
694 *
695 * ※ This is effectively an alias for the `String::valueOf`.
696 *
697 * ☡ This function throws if the provided argument is not a string and
698 * does not have a `[[StringData]]` slot.
699 */
700 export const stringValue = createCallableFunction(
701 stringPrototype.valueOf,
702 { name: "stringValue" },
703 );
704
705 /**
706 * Returns the result of stripping leading and trailing A·S·C·I·I
707 * whitespace from the provided value and collapsing other A·S·C·I·I
708 * whitespace in the string representation of the provided value.
709 */
710 export const stripAndCollapseASCIIWhitespace = ($) =>
711 stripLeadingAndTrailingASCIIWhitespace(
712 stringReplaceAll(
713 `${$}`,
714 /[\n\r\t\f ]+/gu,
715 " ",
716 ),
717 );
718
719 /**
720 * Returns the result of stripping leading and trailing A·S·C·I·I
721 * whitespace from the string representation of the provided value.
722 */
723 export const stripLeadingAndTrailingASCIIWhitespace = ($) =>
724 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
725
726 /**
727 * Returns a substring of the string representation of the provided
728 * value according to the algorithm of `String::substring`.
729 */
730 export const substring = createCallableFunction(
731 stringPrototype.substring,
732 );
733
734 /**
735 * Returns the result of converting the provided value to a string of
736 * scalar values by replacing (unpaired) surrogate values with
737 * U+FFFD.
738 */
739 export const toScalarValueString = createCallableFunction(
740 String.prototype.toWellFormed,
741 { name: "toScalarValueString" },
742 );
743
744 /**
745 * Returns the result of converting the provided value to a string.
746 *
747 * ☡ This method throws for symbols and other objects without a string
748 * representation.
749 */
750 export const toString = ($) => `${$}`;
This page took 0.104616 seconds and 3 git commands to generate.