]> Lady’s Gitweb - Pisces/blob - string.js
Rename make⸺ to create⸺ in function.js
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import {
11 bind,
12 call,
13 createCallableFunction,
14 identity,
15 } from "./function.js";
16 import {
17 arrayIteratorFunction,
18 stringIteratorFunction,
19 } from "./iterable.js";
20 import {
21 defineOwnProperties,
22 getOwnPropertyDescriptors,
23 getPrototype,
24 objectCreate,
25 setPrototype,
26 } from "./object.js";
27 import { ITERATOR } from "./value.js";
28
29 const RE = RegExp;
30 const { prototype: rePrototype } = RE;
31 const { prototype: arrayPrototype } = Array;
32 const { prototype: stringPrototype } = String;
33
34 const { exec: reExec } = rePrototype;
35
36 export const {
37 /**
38 * A `RegExp`like object which only matches entire strings, and may
39 * have additional constraints specified.
40 *
41 * Matchers are callable objects and will return true if they are
42 * called with a string that they match, and false otherwise.
43 * Matchers will always return false if called with nonstrings,
44 * although other methods like `::exec` coerce their arguments and
45 * may still return true.
46 */
47 Matcher,
48 } = (() => {
49 const { toString: reToString } = rePrototype;
50 const getDotAll =
51 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
52 const getFlags =
53 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
54 const getGlobal =
55 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
56 const getHasIndices =
57 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
58 const getIgnoreCase =
59 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
60 const getMultiline =
61 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
62 const getSource =
63 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
64 const getSticky =
65 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
66 const getUnicode =
67 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
68
69 const Matcher = class extends identity {
70 #constraint;
71 #regExp;
72
73 /**
74 * Constructs a new `Matcher` from the provided source.
75 *
76 * If the provided source is a regular expression, then it must
77 * have the unicode flag set. Otherwise, it is interpreted as the
78 * string source of a regular expression with the unicode flag set.
79 *
80 * Other flags are taken from the provided regular expression
81 * object, if any are present.
82 *
83 * A name for the matcher may be provided as the second argument.
84 *
85 * A callable constraint on acceptable inputs may be provided as a
86 * third argument. If provided, it will be called with three
87 * arguments whenever a match appears successful: first, the string
88 * being matched, second, the match result, and third, the
89 * `Matcher` object itself. If the return value of this call is
90 * falsey, then the match will be considered a failure.
91 *
92 * ☡ If the provided source regular expression uses nongreedy
93 * quantifiers, it may not match the whole string even if a match
94 * with the whole string is possible. Surround the regular
95 * expression with `^(?:` and `)$` if you don’t want nongreedy
96 * regular expressions to fail when shorter matches are possible.
97 */
98 constructor(source, name = undefined, constraint = null) {
99 super(
100 ($) => {
101 if (typeof $ !== "string") {
102 // The provided value is not a string.
103 return false;
104 } else {
105 // The provided value is a string. Set the `.lastIndex` of
106 // the regular expression to 0 and see if the first attempt
107 // at a match matches the whole string and passes the
108 // provided constraint (if present).
109 regExp.lastIndex = 0;
110 const result = call(reExec, regExp, [$]);
111 return result?.[0] === $ &&
112 (constraint === null || constraint($, result, this));
113 }
114 },
115 );
116 const regExp = this.#regExp = (() => {
117 try {
118 call(reExec, source, [""]); // throws if source not a RegExp
119 } catch {
120 return new RE(`${source}`, "u");
121 }
122 const unicode = call(getUnicode, source, []);
123 if (!unicode) {
124 // The provided regular expression does not have a unicode
125 // flag.
126 throw new TypeError(
127 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
128 );
129 } else {
130 // The provided regular expression has a unicode flag.
131 return new RE(source);
132 }
133 })();
134 if (constraint !== null && typeof constraint !== "function") {
135 throw new TypeError(
136 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
137 );
138 } else {
139 this.#constraint = constraint;
140 return defineOwnProperties(
141 setPrototype(this, matcherPrototype),
142 {
143 lastIndex: {
144 configurable: false,
145 enumerable: false,
146 value: 0,
147 writable: false,
148 },
149 name: {
150 value: name != null
151 ? `${name}`
152 : `Matcher(${call(reToString, regExp, [])})`,
153 },
154 },
155 );
156 }
157 }
158
159 /** Gets whether the dot‐all flag is present on this `Matcher`. */
160 get dotAll() {
161 return call(getDotAll, this.#regExp, []);
162 }
163
164 /**
165 * Executes this `Matcher` on the provided value and returns the
166 * result if there is a match, or null otherwise.
167 *
168 * Matchers only match if they can match the entire value on the
169 * first attempt.
170 *
171 * ☡ The match result returned by this method will be the same as
172 * that passed to the constraint function—and may have been
173 * modified by said function prior to being returned.
174 */
175 exec($) {
176 const regExp = this.#regExp;
177 const constraint = this.#constraint;
178 const string = `${$}`;
179 regExp.lastIndex = 0;
180 const result = call(reExec, regExp, [string]);
181 if (
182 result?.[0] === string &&
183 (constraint === null || constraint(string, result, this))
184 ) {
185 // The entire string was matched and the constraint, if
186 // present, returned a truthy value.
187 return result;
188 } else {
189 // The entire string was not matched or the constraint returned
190 // a falsey value.
191 return null;
192 }
193 }
194
195 /**
196 * Gets the flags present on this `Matcher`.
197 *
198 * ※ This needs to be defined because the internal `RegExp` object
199 * may have flags which are not yet recognized by ♓🌟 Piscēs.
200 */
201 get flags() {
202 return call(getFlags, this.#regExp, []);
203 }
204
205 /** Gets whether the global flag is present on this `Matcher`. */
206 get global() {
207 return call(getGlobal, this.#regExp, []);
208 }
209
210 /**
211 * Gets whether the has‐indices flag is present on this `Matcher`.
212 */
213 get hasIndices() {
214 return call(getHasIndices, this.#regExp, []);
215 }
216
217 /**
218 * Gets whether the ignore‐case flag is present on this `Matcher`.
219 */
220 get ignoreCase() {
221 return call(getIgnoreCase, this.#regExp, []);
222 }
223
224 /**
225 * Gets whether the multiline flag is present on this `Matcher`.
226 */
227 get multiline() {
228 return call(getMultiline, this.#regExp, []);
229 }
230
231 /** Gets the regular expression source for this `Matcher`. */
232 get source() {
233 return call(getSource, this.#regExp, []);
234 }
235
236 /** Gets whether the sticky flag is present on this `Matcher`. */
237 get sticky() {
238 return call(getSticky, this.#regExp, []);
239 }
240
241 /**
242 * Gets whether the unicode flag is present on this `Matcher`.
243 *
244 * ※ This will always be true.
245 */
246 get unicode() {
247 return call(getUnicode, this.#regExp, []);
248 }
249 };
250
251 const matcherConstructor = defineOwnProperties(
252 class extends RegExp {
253 constructor(...args) {
254 return new Matcher(...args);
255 }
256 },
257 {
258 name: { value: "Matcher" },
259 length: { value: 1 },
260 },
261 );
262 const matcherPrototype = defineOwnProperties(
263 matcherConstructor.prototype,
264 getOwnPropertyDescriptors(Matcher.prototype),
265 { constructor: { value: matcherConstructor } },
266 );
267
268 return { Matcher: matcherConstructor };
269 })();
270
271 export const {
272 /**
273 * Returns the result of converting the provided value to A·S·C·I·I
274 * lowercase.
275 */
276 asciiLowercase,
277
278 /**
279 * Returns the result of converting the provided value to A·S·C·I·I
280 * uppercase.
281 */
282 asciiUppercase,
283 } = (() => {
284 const {
285 toLowerCase: stringToLowercase,
286 toUpperCase: stringToUppercase,
287 } = stringPrototype;
288 return {
289 asciiLowercase: ($) =>
290 stringReplaceAll(
291 `${$}`,
292 /[A-Z]/gu,
293 createCallableFunction(stringToLowercase),
294 ),
295 asciiUppercase: ($) =>
296 stringReplaceAll(
297 `${$}`,
298 /[a-z]/gu,
299 createCallableFunction(stringToUppercase),
300 ),
301 };
302 })();
303
304 export const {
305 /**
306 * Returns an iterator over the code units in the string
307 * representation of the provided value.
308 */
309 codeUnits,
310
311 /**
312 * Returns an iterator over the codepoints in the string
313 * representation of the provided value.
314 */
315 codepoints,
316
317 /**
318 * Returns an iterator over the scalar values in the string
319 * representation of the provided value.
320 *
321 * Codepoints which are not valid Unicode scalar values are replaced
322 * with U+FFFD.
323 */
324 scalarValues,
325
326 /**
327 * Returns the result of converting the provided value to a string of
328 * scalar values by replacing (unpaired) surrogate values with
329 * U+FFFD.
330 */
331 scalarValueString,
332 } = (() => {
333 const generateCodeUnits = function* (ucsCharacter) {
334 yield getCodeUnit(ucsCharacter, 0);
335 };
336 const generateCodepoints = function* (character) {
337 const { allowSurrogates } = this;
338 const codepoint = getCodepoint(character, 0);
339 yield allowSurrogates || codepoint <= 0xD7FF || codepoint >= 0xE000
340 ? codepoint
341 : 0xFFFD;
342 };
343
344 const codeUnitsIterator = arrayIteratorFunction(
345 generateCodeUnits,
346 "String Code Unit Iterator",
347 );
348 const codepointsIterator = stringIteratorFunction(
349 bind(generateCodepoints, { allowSurrogates: true }, []),
350 "String Codepoint Iterator",
351 );
352 const scalarValuesIterator = stringIteratorFunction(
353 bind(generateCodepoints, { allowSurrogates: false }, []),
354 "String Scalar Value Iterator",
355 );
356 const {
357 next: scalarValuesNext,
358 } = getPrototype(scalarValuesIterator(""));
359 const scalarValueIterablePrototype = {
360 [ITERATOR]() {
361 return {
362 next: bind(
363 scalarValuesNext,
364 scalarValuesIterator(this.source),
365 [],
366 ),
367 };
368 },
369 };
370
371 return {
372 codeUnits: ($) => codeUnitsIterator(`${$}`),
373 codepoints: ($) => codepointsIterator(`${$}`),
374 scalarValues: ($) => scalarValuesIterator(`${$}`),
375 scalarValueString: ($) =>
376 stringFromCodepoints(...objectCreate(
377 scalarValueIterablePrototype,
378 { source: { value: `${$}` } },
379 )),
380 };
381 })();
382
383 /**
384 * Returns an iterator over the codepoints in the string representation
385 * of the provided value according to the algorithm of
386 * `String::[Symbol.iterator]`.
387 */
388 export const characters = createCallableFunction(
389 stringPrototype[ITERATOR],
390 "characters",
391 );
392
393 /**
394 * Returns the character at the provided position in the string
395 * representation of the provided value according to the algorithm of
396 * `String::codePointAt`.
397 */
398 export const getCharacter = ($, pos) => {
399 const codepoint = getCodepoint($, pos);
400 return codepoint == null
401 ? undefined
402 : stringFromCodepoints(codepoint);
403 };
404
405 /**
406 * Returns the code unit at the provided position in the string
407 * representation of the provided value according to the algorithm of
408 * `String::charAt`.
409 */
410 export const getCodeUnit = createCallableFunction(
411 stringPrototype.charCodeAt,
412 "getCodeUnit",
413 );
414
415 /**
416 * Returns the codepoint at the provided position in the string
417 * representation of the provided value according to the algorithm of
418 * `String::codePointAt`.
419 */
420 export const getCodepoint = createCallableFunction(
421 stringPrototype.codePointAt,
422 "getCodepoint",
423 );
424
425 /**
426 * Returns the index of the first occurrence of the search string in
427 * the string representation of the provided value according to the
428 * algorithm of `String::indexOf`.
429 */
430 export const getFirstSubstringIndex = createCallableFunction(
431 stringPrototype.indexOf,
432 "getFirstSubstringIndex",
433 );
434
435 /**
436 * Returns the index of the last occurrence of the search string in the
437 * string representation of the provided value according to the
438 * algorithm of `String::lastIndexOf`.
439 */
440 export const getLastSubstringIndex = createCallableFunction(
441 stringPrototype.lastIndexOf,
442 "getLastSubstringIndex",
443 );
444
445 /**
446 * Returns the result of joining the provided iterable.
447 *
448 * If no separator is provided, it defaults to ",".
449 *
450 * If a value is nullish, it will be stringified as the empty string.
451 */
452 export const join = (() => {
453 const { join: arrayJoin } = arrayPrototype;
454 const join = ($, separator = ",") =>
455 call(arrayJoin, [...$], [`${separator}`]);
456 return join;
457 })();
458
459 export const {
460 /**
461 * Returns a string created from the raw value of the tagged template
462 * literal.
463 *
464 * ※ This is an alias for `String.raw`.
465 */
466 raw: rawString,
467
468 /**
469 * Returns a string created from the provided code units.
470 *
471 * ※ This is an alias for `String.fromCharCode`.
472 */
473 fromCharCode: stringFromCodeUnits,
474
475 /**
476 * Returns a string created from the provided codepoints.
477 *
478 * ※ This is an alias for `String.fromCodePoint`.
479 */
480 fromCodePoint: stringFromCodepoints,
481 } = String;
482
483 /**
484 * Returns the result of splitting the provided value on A·S·C·I·I
485 * whitespace.
486 */
487 export const splitOnASCIIWhitespace = ($) =>
488 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
489
490 /**
491 * Returns the result of splitting the provided value on commas,
492 * trimming A·S·C·I·I whitespace from the resulting tokens.
493 */
494 export const splitOnCommas = ($) =>
495 stringSplit(
496 stripLeadingAndTrailingASCIIWhitespace(
497 stringReplaceAll(
498 `${$}`,
499 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
500 ",",
501 ),
502 ),
503 ",",
504 );
505
506 /**
507 * Returns the result of catenating the string representations of the
508 * provided values, returning a new string according to the algorithm
509 * of `String::concat`.
510 */
511 export const stringCatenate = createCallableFunction(
512 stringPrototype.concat,
513 "stringCatenate",
514 );
515
516 /**
517 * Returns whether the string representation of the provided value ends
518 * with the provided search string according to the algorithm of
519 * `String::endsWith`.
520 */
521 export const stringEndsWith = createCallableFunction(
522 stringPrototype.endsWith,
523 "stringEndsWith",
524 );
525
526 /**
527 * Returns whether the string representation of the provided value
528 * contains the provided search string according to the algorithm of
529 * `String::includes`.
530 */
531 export const stringIncludes = createCallableFunction(
532 stringPrototype.includes,
533 "stringIncludes",
534 );
535
536 /**
537 * Returns the result of matching the string representation of the
538 * provided value with the provided matcher according to the algorithm
539 * of `String::match`.
540 */
541 export const stringMatch = createCallableFunction(
542 stringPrototype.match,
543 "stringMatch",
544 );
545
546 /**
547 * Returns the result of matching the string representation of the
548 * provided value with the provided matcher according to the algorithm
549 * of `String::matchAll`.
550 */
551 export const stringMatchAll = createCallableFunction(
552 stringPrototype.matchAll,
553 "stringMatchAll",
554 );
555
556 /**
557 * Returns the normalized form of the string representation of the
558 * provided value according to the algorithm of `String::matchAll`.
559 */
560 export const stringNormalize = createCallableFunction(
561 stringPrototype.normalize,
562 "stringNormalize",
563 );
564
565 /**
566 * Returns the result of padding the end of the string representation
567 * of the provided value padded until it is the desired length
568 * according to the algorithm of `String::padEnd`.
569 */
570 export const stringPadEnd = createCallableFunction(
571 stringPrototype.padEnd,
572 "stringPadEnd",
573 );
574
575 /**
576 * Returns the result of padding the start of the string representation
577 * of the provided value padded until it is the desired length
578 * according to the algorithm of `String::padStart`.
579 */
580 export const stringPadStart = createCallableFunction(
581 stringPrototype.padStart,
582 "stringPadStart",
583 );
584
585 /**
586 * Returns the result of repeating the string representation of the
587 * provided value the provided number of times according to the
588 * algorithm of `String::repeat`.
589 */
590 export const stringRepeat = createCallableFunction(
591 stringPrototype.repeat,
592 "stringRepeat",
593 );
594
595 /**
596 * Returns the result of replacing the string representation of the
597 * provided value with the provided replacement, using the provided
598 * matcher and according to the algorithm of `String::replace`.
599 */
600 export const stringReplace = createCallableFunction(
601 stringPrototype.replace,
602 "stringReplace",
603 );
604
605 /**
606 * Returns the result of replacing the string representation of the
607 * provided value with the provided replacement, using the provided
608 * matcher and according to the algorithm of `String::replaceAll`.
609 */
610 export const stringReplaceAll = createCallableFunction(
611 stringPrototype.replaceAll,
612 "stringReplaceAll",
613 );
614
615 /**
616 * Returns the result of searching the string representation of the
617 * provided value using the provided matcher and according to the
618 * algorithm of `String::search`.
619 */
620 export const stringSearch = createCallableFunction(
621 stringPrototype.search,
622 "stringSearch",
623 );
624
625 /**
626 * Returns a slice of the string representation of the provided value
627 * according to the algorithm of `String::slice`.
628 */
629 export const stringSlice = createCallableFunction(
630 stringPrototype.slice,
631 "stringSlice",
632 );
633
634 /**
635 * Returns the result of splitting of the string representation of the
636 * provided value on the provided separator according to the algorithm
637 * of `String::split`.
638 */
639 export const stringSplit = createCallableFunction(
640 stringPrototype.split,
641 "stringSplit",
642 );
643
644 /**
645 * Returns whether the string representation of the provided value
646 * starts with the provided search string according to the algorithm of
647 * `String::startsWith`.
648 */
649 export const stringStartsWith = createCallableFunction(
650 stringPrototype.startsWith,
651 "stringStartsWith",
652 );
653
654 /**
655 * Returns the `[[StringData]]` of the provided value.
656 *
657 * ☡ This function will throw if the provided object does not have a
658 * `[[StringData]]` internal slot.
659 */
660 export const stringValue = createCallableFunction(
661 stringPrototype.valueOf,
662 "stringValue",
663 );
664
665 /**
666 * Returns the result of stripping leading and trailing A·S·C·I·I
667 * whitespace from the provided value and collapsing other A·S·C·I·I
668 * whitespace in the string representation of the provided value.
669 */
670 export const stripAndCollapseASCIIWhitespace = ($) =>
671 stripLeadingAndTrailingASCIIWhitespace(
672 stringReplaceAll(
673 `${$}`,
674 /[\n\r\t\f ]+/gu,
675 " ",
676 ),
677 );
678
679 /**
680 * Returns the result of stripping leading and trailing A·S·C·I·I
681 * whitespace from the string representation of the provided value.
682 */
683 export const stripLeadingAndTrailingASCIIWhitespace = ($) =>
684 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
685
686 /**
687 * Returns a substring of the string representation of the provided
688 * value according to the algorithm of `String::substring`.
689 */
690 export const substring = createCallableFunction(
691 stringPrototype.substring,
692 );
693
694 /**
695 * Returns the result of converting the provided value to a string.
696 *
697 * ☡ This method throws for symbols and other objects without a string
698 * representation.
699 */
700 export const toString = ($) => `${$}`;
This page took 0.102107 seconds and 5 git commands to generate.