]> Lady’s Gitweb - Pisces/blob - string.js
219fe4892273513ed996f9c0adbc65e4be7ecd54
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022–2023 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getOwnPropertyDescriptors,
14 getPrototype,
15 objectCreate,
16 setPrototype,
17 } from "./object.js";
18 import { ITERATOR, TO_STRING_TAG } from "./value.js";
19
20 export const {
21 /**
22 * A RegExp·like object which only matches entire strings, and may
23 * have additional constraints specified.
24 *
25 * Matchers are callable objects and will return true if they are
26 * called with a string that they match, and false otherwise.
27 * Matchers will always return false if called with nonstrings,
28 * although other methods like `exec` coerce their arguments and may
29 * still return true.
30 */
31 Matcher,
32 } = (() => {
33 const RE = RegExp;
34 const { prototype: rePrototype } = RE;
35 const { exec: reExec, toString: reToString } = rePrototype;
36 const getDotAll =
37 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
38 const getFlags =
39 Object.getOwnPropertyDescriptor(rePrototype, "flags").get;
40 const getGlobal =
41 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
42 const getHasIndices =
43 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
44 const getIgnoreCase =
45 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
46 const getMultiline =
47 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
48 const getSource =
49 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
50 const getSticky =
51 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
52 const getUnicode =
53 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
54
55 const Matcher = class extends identity {
56 #constraint;
57 #regExp;
58
59 /**
60 * Constructs a new Matcher from the provided source.
61 *
62 * If the provided source is a regular expression, then it must
63 * have the unicode flag set. Otherwise, it is interpreted as the
64 * string source of a regular expression with the unicode flag set.
65 *
66 * Other flags are taken from the provided regular expression
67 * object, if any are present.
68 *
69 * A name for the matcher may be provided as the second argument.
70 *
71 * A callable constraint on acceptable inputs may be provided as a
72 * third argument. If provided, it will be called with three
73 * arguments whenever a match appears successful: first, the string
74 * being matched, second, the match result, and third, the Matcher
75 * object itself. If the return value of this call is falsey, then
76 * the match will be considered a failure.
77 *
78 * ☡ If the provided source regular expression uses nongreedy
79 * quantifiers, it may not match the whole string even if a match
80 * with the whole string is possible. Surround the regular
81 * expression with `^(?:` and `)$` if you don’t want nongreedy
82 * regular expressions to fail when shorter matches are possible.
83 */
84 constructor(source, name = undefined, constraint = null) {
85 super(
86 ($) => {
87 if (typeof $ !== "string") {
88 // The provided value is not a string.
89 return false;
90 } else {
91 // The provided value is a string. Set the `lastIndex` of
92 // the regular expression to 0 and see if the first attempt
93 // at a match matches the whole string and passes the
94 // provided constraint (if present).
95 regExp.lastIndex = 0;
96 const result = call(reExec, regExp, [$]);
97 return result?.[0] === $ &&
98 (constraint === null || constraint($, result, this));
99 }
100 },
101 );
102 const regExp = this.#regExp = (() => {
103 try {
104 call(reExec, source, [""]); // throws if source not a RegExp
105 } catch {
106 return new RE(`${source}`, "u");
107 }
108 const unicode = call(getUnicode, source, []);
109 if (!unicode) {
110 // The provided regular expression does not have a unicode
111 // flag.
112 throw new TypeError(
113 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
114 );
115 } else {
116 // The provided regular expression has a unicode flag.
117 return new RE(source);
118 }
119 })();
120 if (constraint !== null && typeof constraint !== "function") {
121 throw new TypeError(
122 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
123 );
124 } else {
125 this.#constraint = constraint;
126 return defineOwnProperties(
127 setPrototype(this, matcherPrototype),
128 {
129 lastIndex: {
130 configurable: false,
131 enumerable: false,
132 value: 0,
133 writable: false,
134 },
135 name: {
136 value: name != null
137 ? `${name}`
138 : `Matcher(${call(reToString, regExp, [])})`,
139 },
140 },
141 );
142 }
143 }
144
145 /** Gets whether the dotAll flag is present on this Matcher. */
146 get dotAll() {
147 return call(getDotAll, this.#regExp, []);
148 }
149
150 /**
151 * Executes this Matcher on the provided value and returns the
152 * result if there is a match, or null otherwise.
153 *
154 * Matchers only match if they can match the entire value on the
155 * first attempt.
156 *
157 * ☡ The match result returned by this method will be the same as
158 * that passed to the constraint function—and may have been
159 * modified by said function prior to being returned.
160 */
161 exec($) {
162 const regExp = this.#regExp;
163 const constraint = this.#constraint;
164 const string = `${$}`;
165 regExp.lastIndex = 0;
166 const result = call(reExec, regExp, [string]);
167 if (
168 result?.[0] === string &&
169 (constraint === null || constraint(string, result, this))
170 ) {
171 // The entire string was matched and the constraint, if
172 // present, returned a truthy value.
173 return result;
174 } else {
175 // The entire string was not matched or the constraint returned
176 // a falsey value.
177 return null;
178 }
179 }
180
181 /**
182 * Gets the flags present on this Matcher.
183 *
184 * ※ This needs to be defined because the internal RegExp object
185 * may have flags which are not yet recognized by ♓🌟 Piscēs.
186 */
187 get flags() {
188 return call(getFlags, this.#regExp, []);
189 }
190
191 /** Gets whether the global flag is present on this Matcher. */
192 get global() {
193 return call(getGlobal, this.#regExp, []);
194 }
195
196 /** Gets whether the hasIndices flag is present on this Matcher. */
197 get hasIndices() {
198 return call(getHasIndices, this.#regExp, []);
199 }
200
201 /** Gets whether the ignoreCase flag is present on this Matcher. */
202 get ignoreCase() {
203 return call(getIgnoreCase, this.#regExp, []);
204 }
205
206 /** Gets whether the multiline flag is present on this Matcher. */
207 get multiline() {
208 return call(getMultiline, this.#regExp, []);
209 }
210
211 /** Gets the regular expression source for this Matcher. */
212 get source() {
213 return call(getSource, this.#regExp, []);
214 }
215
216 /** Gets whether the sticky flag is present on this Matcher. */
217 get sticky() {
218 return call(getSticky, this.#regExp, []);
219 }
220
221 /**
222 * Gets whether the unicode flag is present on this Matcher.
223 *
224 * ※ This will always be true.
225 */
226 get unicode() {
227 return call(getUnicode, this.#regExp, []);
228 }
229 };
230
231 const matcherConstructor = defineOwnProperties(
232 class extends RegExp {
233 constructor(...args) {
234 return new Matcher(...args);
235 }
236 },
237 {
238 name: { value: "Matcher" },
239 length: { value: 1 },
240 },
241 );
242 const matcherPrototype = defineOwnProperties(
243 matcherConstructor.prototype,
244 getOwnPropertyDescriptors(Matcher.prototype),
245 { constructor: { value: matcherConstructor } },
246 );
247
248 return { Matcher: matcherConstructor };
249 })();
250
251 export const {
252 /**
253 * Returns the result of converting the provided value to A·S·C·I·I
254 * lowercase.
255 */
256 asciiLowercase,
257
258 /**
259 * Returns the result of converting the provided value to A·S·C·I·I
260 * uppercase.
261 */
262 asciiUppercase,
263 } = (() => {
264 const {
265 toLowerCase: stringToLowercase,
266 toUpperCase: stringToUppercase,
267 } = String.prototype;
268 return {
269 asciiLowercase: ($) =>
270 stringReplaceAll(
271 `${$}`,
272 /[A-Z]/gu,
273 makeCallable(stringToLowercase),
274 ),
275 asciiUppercase: ($) =>
276 stringReplaceAll(
277 `${$}`,
278 /[a-z]/gu,
279 makeCallable(stringToUppercase),
280 ),
281 };
282 })();
283
284 export const {
285 /**
286 * Returns an iterator over the code units in the string
287 * representation of the provided value.
288 */
289 codeUnits,
290
291 /**
292 * Returns an iterator over the codepoints in the string
293 * representation of the provided value.
294 */
295 codepoints,
296
297 /**
298 * Returns an iterator over the scalar values in the string
299 * representation of the provided value.
300 *
301 * Codepoints which are not valid Unicode scalar values are replaced
302 * with U+FFFF.
303 */
304 scalarValues,
305
306 /**
307 * Returns the result of converting the provided value to a string of
308 * scalar values by replacing (unpaired) surrogate values with
309 * U+FFFD.
310 */
311 scalarValueString,
312 } = (() => {
313 const { [ITERATOR]: arrayIterator } = Array.prototype;
314 const arrayIteratorPrototype = Object.getPrototypeOf(
315 [][ITERATOR](),
316 );
317 const { next: arrayIteratorNext } = arrayIteratorPrototype;
318 const iteratorPrototype = Object.getPrototypeOf(
319 arrayIteratorPrototype,
320 );
321 const { [ITERATOR]: stringIterator } = String.prototype;
322 const stringIteratorPrototype = Object.getPrototypeOf(
323 ""[ITERATOR](),
324 );
325 const { next: stringIteratorNext } = stringIteratorPrototype;
326
327 /**
328 * An iterator object for iterating over code values (either code
329 * units or codepoints) in a string.
330 *
331 * ※ This class is not exposed, although its methods are (through
332 * the prototypes of string code value iterator objects).
333 */
334 const StringCodeValueIterator = class extends identity {
335 #allowSurrogates;
336 #baseIterator;
337
338 /**
339 * Constructs a new string code value iterator from the provided
340 * base iterator.
341 *
342 * If the provided base iterator is an array iterator, this is a
343 * code unit iterator. If the provided iterator is a string
344 * iterator and surrogates are allowed, this is a codepoint
345 * iterator. If the provided iterator is a string iterator and
346 * surrogates are not allowed, this is a scalar value iterator.
347 */
348 constructor(baseIterator, allowSurrogates = true) {
349 super(objectCreate(stringCodeValueIteratorPrototype));
350 this.#allowSurrogates = !!allowSurrogates;
351 this.#baseIterator = baseIterator;
352 }
353
354 /** Provides the next code value in the iterator. */
355 next() {
356 const baseIterator = this.#baseIterator;
357 switch (getPrototype(baseIterator)) {
358 case arrayIteratorPrototype: {
359 // The base iterator is iterating over U·C·S characters.
360 const {
361 value: ucsCharacter,
362 done,
363 } = call(arrayIteratorNext, baseIterator, []);
364 return done
365 ? { value: undefined, done: true }
366 : { value: getCodeUnit(ucsCharacter, 0), done: false };
367 }
368 case stringIteratorPrototype: {
369 // The base iterator is iterating over Unicode characters.
370 const {
371 value: character,
372 done,
373 } = call(stringIteratorNext, baseIterator, []);
374 if (done) {
375 // The base iterator has been exhausted.
376 return { value: undefined, done: true };
377 } else {
378 // The base iterator provided a character; yield the
379 // codepoint.
380 const codepoint = getCodepoint(character, 0);
381 return {
382 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
383 codepoint >= 0xE000
384 ? codepoint
385 : 0xFFFD,
386 done: false,
387 };
388 }
389 }
390 default: {
391 // Should not be possible!
392 throw new TypeError(
393 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
394 );
395 }
396 }
397 }
398 };
399
400 const {
401 next: stringCodeValueIteratorNext,
402 } = StringCodeValueIterator.prototype;
403 const stringCodeValueIteratorPrototype = objectCreate(
404 iteratorPrototype,
405 {
406 next: {
407 configurable: true,
408 enumerable: false,
409 value: stringCodeValueIteratorNext,
410 writable: true,
411 },
412 [TO_STRING_TAG]: {
413 configurable: true,
414 enumerable: false,
415 value: "String Code Value Iterator",
416 writable: false,
417 },
418 },
419 );
420 const scalarValueIterablePrototype = {
421 [ITERATOR]() {
422 return {
423 next: bind(
424 stringCodeValueIteratorNext,
425 new StringCodeValueIterator(
426 call(stringIterator, this.source, []),
427 false,
428 ),
429 [],
430 ),
431 };
432 },
433 };
434
435 return {
436 codeUnits: ($) =>
437 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
438 codepoints: ($) =>
439 new StringCodeValueIterator(
440 call(stringIterator, `${$}`, []),
441 true,
442 ),
443 scalarValues: ($) =>
444 new StringCodeValueIterator(
445 call(stringIterator, `${$}`, []),
446 false,
447 ),
448 scalarValueString: ($) =>
449 stringFromCodepoints(...objectCreate(
450 scalarValueIterablePrototype,
451 { source: { value: `${$}` } },
452 )),
453 };
454 })();
455
456 /**
457 * Returns an iterator over the codepoints in the string representation
458 * of the provided value according to the algorithm of
459 * String::[Symbol.iterator].
460 */
461 export const characters = makeCallable(
462 String.prototype[ITERATOR],
463 );
464
465 /**
466 * Returns the character at the provided position in the string
467 * representation of the provided value according to the algorithm of
468 * String::codePointAt.
469 */
470 export const getCharacter = ($, pos) => {
471 const codepoint = getCodepoint($, pos);
472 return codepoint == null
473 ? undefined
474 : stringFromCodepoints(codepoint);
475 };
476
477 /**
478 * Returns the code unit at the provided position in the string
479 * representation of the provided value according to the algorithm of
480 * String::charAt.
481 */
482 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
483
484 /**
485 * Returns the codepoint at the provided position in the string
486 * representation of the provided value according to the algorithm of
487 * String::codePointAt.
488 */
489 export const getCodepoint = makeCallable(String.prototype.codePointAt);
490
491 /**
492 * Returns the index of the first occurrence of the search string in
493 * the string representation of the provided value according to the
494 * algorithm of String::indexOf.
495 */
496 export const getFirstSubstringIndex = makeCallable(
497 String.prototype.indexOf,
498 );
499
500 /**
501 * Returns the index of the last occurrence of the search string in the
502 * string representation of the provided value according to the
503 * algorithm of String::lastIndexOf.
504 */
505 export const getLastSubstringIndex = makeCallable(
506 String.prototype.lastIndexOf,
507 );
508
509 /**
510 * Returns the result of joining the provided iterable.
511 *
512 * If no separator is provided, it defaults to ",".
513 *
514 * If a value is nullish, it will be stringified as the empty string.
515 */
516 export const join = (() => {
517 const { join: arrayJoin } = Array.prototype;
518 const join = ($, separator = ",") =>
519 call(arrayJoin, [...$], [`${separator}`]);
520 return join;
521 })();
522
523 export const {
524 /**
525 * Returns a string created from the raw value of the tagged template
526 * literal.
527 *
528 * ※ This is an alias for String.raw.
529 */
530 raw: rawString,
531
532 /**
533 * Returns a string created from the provided code units.
534 *
535 * ※ This is an alias for String.fromCharCode.
536 */
537 fromCharCode: stringFromCodeUnits,
538
539 /**
540 * Returns a string created from the provided codepoints.
541 *
542 * ※ This is an alias for String.fromCodePoint.
543 */
544 fromCodePoint: stringFromCodepoints,
545 } = String;
546
547 /**
548 * Returns the result of splitting the provided value on A·S·C·I·I
549 * whitespace.
550 */
551 export const splitOnASCIIWhitespace = ($) =>
552 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
553
554 /**
555 * Returns the result of splitting the provided value on commas,
556 * trimming A·S·C·I·I whitespace from the resulting tokens.
557 */
558 export const splitOnCommas = ($) =>
559 stringSplit(
560 stripLeadingAndTrailingASCIIWhitespace(
561 stringReplaceAll(
562 `${$}`,
563 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
564 ",",
565 ),
566 ),
567 ",",
568 );
569
570 /**
571 * Returns the result of catenating the string representations of the
572 * provided values, returning a new string according to the algorithm
573 * of String::concat.
574 */
575 export const stringCatenate = makeCallable(String.prototype.concat);
576
577 /**
578 * Returns whether the string representation of the provided value ends
579 * with the provided search string according to the algorithm of
580 * String::endsWith.
581 */
582 export const stringEndsWith = makeCallable(String.prototype.endsWith);
583
584 /**
585 * Returns whether the string representation of the provided value
586 * contains the provided search string according to the algorithm of
587 * String::includes.
588 */
589 export const stringIncludes = makeCallable(String.prototype.includes);
590
591 /**
592 * Returns the result of matching the string representation of the
593 * provided value with the provided matcher according to the algorithm
594 * of String::match.
595 */
596 export const stringMatch = makeCallable(String.prototype.match);
597
598 /**
599 * Returns the result of matching the string representation of the
600 * provided value with the provided matcher according to the algorithm
601 * of String::matchAll.
602 */
603 export const stringMatchAll = makeCallable(String.prototype.matchAll);
604
605 /**
606 * Returns the normalized form of the string representation of the
607 * provided value according to the algorithm of String::matchAll.
608 */
609 export const stringNormalize = makeCallable(
610 String.prototype.normalize,
611 );
612
613 /**
614 * Returns the result of padding the end of the string representation
615 * of the provided value padded until it is the desired length
616 * according to the algorithm of String::padEnd.
617 */
618 export const stringPadEnd = makeCallable(String.prototype.padEnd);
619
620 /**
621 * Returns the result of padding the start of the string representation
622 * of the provided value padded until it is the desired length
623 * according to the algorithm of String::padStart.
624 */
625 export const stringPadStart = makeCallable(String.prototype.padStart);
626
627 /**
628 * Returns the result of repeating the string representation of the
629 * provided value the provided number of times according to the
630 * algorithm of String::repeat.
631 */
632 export const stringRepeat = makeCallable(String.prototype.repeat);
633
634 /**
635 * Returns the result of replacing the string representation of the
636 * provided value with the provided replacement, using the provided
637 * matcher and according to the algorithm of String::replace.
638 */
639 export const stringReplace = makeCallable(String.prototype.replace);
640
641 /**
642 * Returns the result of replacing the string representation of the
643 * provided value with the provided replacement, using the provided
644 * matcher and according to the algorithm of String::replaceAll.
645 */
646 export const stringReplaceAll = makeCallable(
647 String.prototype.replaceAll,
648 );
649
650 /**
651 * Returns the result of searching the string representation of the
652 * provided value using the provided matcher and according to the
653 * algorithm of String::search.
654 */
655 export const stringSearch = makeCallable(String.prototype.search);
656
657 /**
658 * Returns a slice of the string representation of the provided value
659 * according to the algorithm of String::slice.
660 */
661 export const stringSlice = makeCallable(String.prototype.slice);
662
663 /**
664 * Returns the result of splitting of the string representation of the
665 * provided value on the provided separator according to the algorithm
666 * of String::split.
667 */
668 export const stringSplit = makeCallable(String.prototype.split);
669
670 /**
671 * Returns whether the string representation of the provided value
672 * starts with the provided search string according to the algorithm of
673 * String::startsWith.
674 */
675 export const stringStartsWith = makeCallable(
676 String.prototype.startsWith,
677 );
678
679 /**
680 * Returns the `[[StringData]]` of the provided value.
681 *
682 * ☡ This function will throw if the provided object does not have a
683 * `[[StringData]]` internal slot.
684 */
685 export const stringValue = makeCallable(String.prototype.valueOf);
686
687 /**
688 * Returns the result of stripping leading and trailing A·S·C·I·I
689 * whitespace from the provided value and collapsing other A·S·C·I·I
690 * whitespace in the string representation of the provided value.
691 */
692 export const stripAndCollapseASCIIWhitespace = ($) =>
693 stripLeadingAndTrailingASCIIWhitespace(
694 stringReplaceAll(
695 `${$}`,
696 /[\n\r\t\f ]+/gu,
697 " ",
698 ),
699 );
700
701 /**
702 * Returns the result of stripping leading and trailing A·S·C·I·I
703 * whitespace from the string representation of the provided value.
704 */
705 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
706 const { exec: reExec } = RegExp.prototype;
707 return ($) =>
708 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
709 })();
710
711 /**
712 * Returns a substring of the string representation of the provided
713 * value according to the algorithm of String::substring.
714 */
715 export const substring = makeCallable(String.prototype.substring);
716
717 /**
718 * Returns the result of converting the provided value to a string.
719 *
720 * ☡ This method throws for symbols and other objects without a string
721 * representation.
722 */
723 export const toString = ($) => `${$}`;
This page took 0.100984 seconds and 3 git commands to generate.