]> Lady’s Gitweb - Pisces/blob - string.js
3dc39acade9259c7c5120c28cde20ad398225426
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getOwnPropertyDescriptors,
14 getPrototype,
15 objectCreate,
16 setPrototype,
17 } from "./object.js";
18
19 export const {
20 /**
21 * A RegExp·like object which only matches entire strings, and may
22 * have additional constraints specified.
23 *
24 * Matchers are callable objects and will return true if they are
25 * called with a string that they match, and false otherwise.
26 * Matchers will always return false if called with nonstrings,
27 * although other methods like `exec` coerce their arguments and may
28 * still return true.
29 */
30 Matcher,
31 } = (() => {
32 const RE = RegExp;
33 const { prototype: rePrototype } = RE;
34 const { exec: reExec, toString: reToString } = rePrototype;
35 const getDotAll =
36 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
37 const getGlobal =
38 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
39 const getHasIndices =
40 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
41 const getIgnoreCase =
42 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
43 const getMultiline =
44 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
45 const getSource =
46 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
47 const getSticky =
48 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
49 const getUnicode =
50 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
51
52 const Matcher = class extends identity {
53 #constraint;
54 #regExp;
55
56 /**
57 * Constructs a new Matcher from the provided source.
58 *
59 * If the provided source is a regular expression, then it must
60 * have the unicode flag set. Otherwise, it is interpreted as the
61 * string source of a regular expression with the unicode flag set.
62 *
63 * Other flags are taken from the provided regular expression
64 * object, if any are present.
65 *
66 * A name for the matcher may be provided as the second argument.
67 *
68 * A callable constraint on acceptable inputs may be provided as a
69 * third argument. If provided, it will be called with three
70 * arguments whenever a match appears successful: first, the string
71 * being matched, second, the match result, and third, the Matcher
72 * object itself. If the return value of this call is falsey, then
73 * the match will be considered a failure.
74 *
75 * ☡ If the provided source regular expression uses nongreedy
76 * quantifiers, it may not match the whole string even if a match
77 * with the whole string is possible. Surround the regular
78 * expression with `^(?:` and `)$` if you don’t want nongreedy
79 * regular expressions to fail when shorter matches are possible.
80 */
81 constructor(source, name = undefined, constraint = null) {
82 super(
83 ($) => {
84 if (typeof $ !== "string") {
85 // The provided value is not a string.
86 return false;
87 } else {
88 // The provided value is a string. Set the `lastIndex` of
89 // the regular expression to 0 and see if the first attempt
90 // at a match matches the whole string and passes the
91 // provided constraint (if present).
92 regExp.lastIndex = 0;
93 const result = call(reExec, regExp, [$]);
94 return result?.[0] === $ &&
95 (constraint === null || constraint($, result, this));
96 }
97 },
98 );
99 const regExp = this.#regExp = (() => {
100 try {
101 call(reExec, source, [""]); // throws if source not a RegExp
102 } catch {
103 return new RE(`${source}`, "u");
104 }
105 const unicode = call(getUnicode, source, []);
106 if (!unicode) {
107 // The provided regular expression does not have a unicode
108 // flag.
109 throw new TypeError(
110 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
111 );
112 } else {
113 // The provided regular expression has a unicode flag.
114 return new RE(source);
115 }
116 })();
117 if (constraint !== null && typeof constraint !== "function") {
118 throw new TypeError(
119 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
120 );
121 } else {
122 this.#constraint = constraint;
123 return defineOwnProperties(
124 setPrototype(this, matcherPrototype),
125 {
126 lastIndex: {
127 configurable: false,
128 enumerable: false,
129 value: 0,
130 writable: false,
131 },
132 name: {
133 value: name != null
134 ? `${name}`
135 : `Matcher(${call(reToString, regExp, [])})`,
136 },
137 },
138 );
139 }
140 }
141
142 /** Gets whether the dotAll flag is present on this Matcher. */
143 get dotAll() {
144 return call(getDotAll, this.#regExp, []);
145 }
146
147 /**
148 * Executes this Matcher on the provided value and returns the
149 * result if there is a match, or null otherwise.
150 *
151 * Matchers only match if they can match the entire value on the
152 * first attempt.
153 *
154 * ☡ The match result returned by this method will be the same as
155 * that passed to the constraint function—and may have been
156 * modified by said function prior to being returned.
157 */
158 exec($) {
159 const regExp = this.#regExp;
160 const constraint = this.#constraint;
161 const string = `${$}`;
162 regExp.lastIndex = 0;
163 const result = call(reExec, regExp, [string]);
164 if (
165 result?.[0] === string &&
166 (constraint === null || constraint(string, result, this))
167 ) {
168 // The entire string was matched and the constraint, if
169 // present, returned a truthy value.
170 return result;
171 } else {
172 // The entire string was not matched or the constraint returned
173 // a falsey value.
174 return null;
175 }
176 }
177
178 /** Gets whether the global flag is present on this Matcher. */
179 get global() {
180 return call(getGlobal, this.#regExp, []);
181 }
182
183 /** Gets whether the hasIndices flag is present on this Matcher. */
184 get hasIndices() {
185 return call(getHasIndices, this.#regExp, []);
186 }
187
188 /** Gets whether the ignoreCase flag is present on this Matcher. */
189 get ignoreCase() {
190 return call(getIgnoreCase, this.#regExp, []);
191 }
192
193 /** Gets whether the multiline flag is present on this Matcher. */
194 get multiline() {
195 return call(getMultiline, this.#regExp, []);
196 }
197
198 /** Gets the regular expression source for this Matcher. */
199 get source() {
200 return call(getSource, this.#regExp, []);
201 }
202
203 /** Gets whether the sticky flag is present on this Matcher. */
204 get sticky() {
205 return call(getSticky, this.#regExp, []);
206 }
207
208 /**
209 * Gets whether the unicode flag is present on this Matcher.
210 *
211 * ※ This will always be true.
212 */
213 get unicode() {
214 return call(getUnicode, this.#regExp, []);
215 }
216 };
217
218 const matcherConstructor = defineOwnProperties(
219 class extends RegExp {
220 constructor(...args) {
221 return new Matcher(...args);
222 }
223 },
224 {
225 name: { value: "Matcher" },
226 length: { value: 1 },
227 },
228 );
229 const matcherPrototype = defineOwnProperties(
230 matcherConstructor.prototype,
231 getOwnPropertyDescriptors(Matcher.prototype),
232 { constructor: { value: matcherConstructor } },
233 );
234
235 return { Matcher: matcherConstructor };
236 })();
237
238 export const {
239 /**
240 * Returns the result of converting the provided value to A·S·C·I·I
241 * lowercase.
242 */
243 asciiLowercase,
244
245 /**
246 * Returns the result of converting the provided value to A·S·C·I·I
247 * uppercase.
248 */
249 asciiUppercase,
250 } = (() => {
251 const {
252 toLowerCase: stringToLowercase,
253 toUpperCase: stringToUppercase,
254 } = String.prototype;
255 return {
256 asciiLowercase: ($) =>
257 stringReplaceAll(
258 `${$}`,
259 /[A-Z]/gu,
260 makeCallable(stringToLowercase),
261 ),
262 asciiUppercase: ($) =>
263 stringReplaceAll(
264 `${$}`,
265 /[a-z]/gu,
266 makeCallable(stringToUppercase),
267 ),
268 };
269 })();
270
271 export const {
272 /**
273 * Returns an iterator over the code units in the string
274 * representation of the provided value.
275 */
276 codeUnits,
277
278 /**
279 * Returns an iterator over the codepoints in the string
280 * representation of the provided value.
281 */
282 codepoints,
283
284 /**
285 * Returns an iterator over the scalar values in the string
286 * representation of the provided value.
287 *
288 * Codepoints which are not valid Unicode scalar values are replaced
289 * with U+FFFF.
290 */
291 scalarValues,
292
293 /**
294 * Returns the result of converting the provided value to a string of
295 * scalar values by replacing (unpaired) surrogate values with
296 * U+FFFD.
297 */
298 scalarValueString,
299 } = (() => {
300 const {
301 iterator: iteratorSymbol,
302 toStringTag: toStringTagSymbol,
303 } = Symbol;
304 const { [iteratorSymbol]: arrayIterator } = Array.prototype;
305 const arrayIteratorPrototype = Object.getPrototypeOf(
306 [][iteratorSymbol](),
307 );
308 const { next: arrayIteratorNext } = arrayIteratorPrototype;
309 const iteratorPrototype = Object.getPrototypeOf(
310 arrayIteratorPrototype,
311 );
312 const { [iteratorSymbol]: stringIterator } = String.prototype;
313 const stringIteratorPrototype = Object.getPrototypeOf(
314 ""[iteratorSymbol](),
315 );
316 const { next: stringIteratorNext } = stringIteratorPrototype;
317
318 /**
319 * An iterator object for iterating over code values (either code
320 * units or codepoints) in a string.
321 *
322 * ※ This class is not exposed, although its methods are (through
323 * the prototypes of string code value iterator objects).
324 */
325 const StringCodeValueIterator = class extends identity {
326 #allowSurrogates;
327 #baseIterator;
328
329 /**
330 * Constructs a new string code value iterator from the provided
331 * base iterator.
332 *
333 * If the provided base iterator is an array iterator, this is a
334 * code unit iterator. If the provided iterator is a string
335 * iterator and surrogates are allowed, this is a codepoint
336 * iterator. If the provided iterator is a string iterator and
337 * surrogates are not allowed, this is a scalar value iterator.
338 */
339 constructor(baseIterator, allowSurrogates = true) {
340 super(objectCreate(stringCodeValueIteratorPrototype));
341 this.#allowSurrogates = !!allowSurrogates;
342 this.#baseIterator = baseIterator;
343 }
344
345 /** Provides the next code value in the iterator. */
346 next() {
347 const baseIterator = this.#baseIterator;
348 switch (getPrototype(baseIterator)) {
349 case arrayIteratorPrototype: {
350 // The base iterator is iterating over U·C·S characters.
351 const {
352 value: ucsCharacter,
353 done,
354 } = call(arrayIteratorNext, baseIterator, []);
355 return done
356 ? { value: undefined, done: true }
357 : { value: getCodeUnit(ucsCharacter, 0), done: false };
358 }
359 case stringIteratorPrototype: {
360 // The base iterator is iterating over Unicode characters.
361 const {
362 value: character,
363 done,
364 } = call(stringIteratorNext, baseIterator, []);
365 if (done) {
366 // The base iterator has been exhausted.
367 return { value: undefined, done: true };
368 } else {
369 // The base iterator provided a character; yield the
370 // codepoint.
371 const codepoint = getCodepoint(character, 0);
372 return {
373 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
374 codepoint >= 0xE000
375 ? codepoint
376 : 0xFFFD,
377 done: false,
378 };
379 }
380 }
381 default: {
382 // Should not be possible!
383 throw new TypeError(
384 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
385 );
386 }
387 }
388 }
389 };
390
391 const {
392 next: stringCodeValueIteratorNext,
393 } = StringCodeValueIterator.prototype;
394 const stringCodeValueIteratorPrototype = objectCreate(
395 iteratorPrototype,
396 {
397 next: {
398 configurable: true,
399 enumerable: false,
400 value: stringCodeValueIteratorNext,
401 writable: true,
402 },
403 [toStringTagSymbol]: {
404 configurable: true,
405 enumerable: false,
406 value: "String Code Value Iterator",
407 writable: false,
408 },
409 },
410 );
411 const scalarValueIterablePrototype = {
412 [iteratorSymbol]() {
413 return {
414 next: bind(
415 stringCodeValueIteratorNext,
416 new StringCodeValueIterator(
417 call(stringIterator, this.source, []),
418 false,
419 ),
420 [],
421 ),
422 };
423 },
424 };
425
426 return {
427 codeUnits: ($) =>
428 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
429 codepoints: ($) =>
430 new StringCodeValueIterator(
431 call(stringIterator, `${$}`, []),
432 true,
433 ),
434 scalarValues: ($) =>
435 new StringCodeValueIterator(
436 call(stringIterator, `${$}`, []),
437 false,
438 ),
439 scalarValueString: ($) =>
440 stringFromCodepoints(...objectCreate(
441 scalarValueIterablePrototype,
442 { source: { value: `${$}` } },
443 )),
444 };
445 })();
446
447 /**
448 * Returns an iterator over the codepoints in the string representation
449 * of the provided value according to the algorithm of
450 * String::[Symbol.iterator].
451 */
452 export const characters = makeCallable(
453 String.prototype[Symbol.iterator],
454 );
455
456 /**
457 * Returns the character at the provided position in the string
458 * representation of the provided value according to the algorithm of
459 * String::codePointAt.
460 */
461 export const getCharacter = ($, pos) => {
462 const codepoint = getCodepoint($, pos);
463 return codepoint == null
464 ? undefined
465 : stringFromCodepoints(codepoint);
466 };
467
468 /**
469 * Returns the code unit at the provided position in the string
470 * representation of the provided value according to the algorithm of
471 * String::charAt.
472 */
473 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
474
475 /**
476 * Returns the codepoint at the provided position in the string
477 * representation of the provided value according to the algorithm of
478 * String::codePointAt.
479 */
480 export const getCodepoint = makeCallable(String.prototype.codePointAt);
481
482 /**
483 * Returns the index of the first occurrence of the search string in
484 * the string representation of the provided value according to the
485 * algorithm of String::indexOf.
486 */
487 export const getFirstSubstringIndex = makeCallable(
488 String.prototype.indexOf,
489 );
490
491 /**
492 * Returns the index of the last occurrence of the search string in the
493 * string representation of the provided value according to the
494 * algorithm of String::lastIndexOf.
495 */
496 export const getLastSubstringIndex = makeCallable(
497 String.prototype.lastIndexOf,
498 );
499
500 /**
501 * Returns the result of joining the provided iterable.
502 *
503 * If no separator is provided, it defaults to ",".
504 *
505 * If a value is nullish, it will be stringified as the empty string.
506 */
507 export const join = (() => {
508 const { join: arrayJoin } = Array.prototype;
509 const join = ($, separator = ",") =>
510 call(arrayJoin, [...$], [`${separator}`]);
511 return join;
512 })();
513
514 export const {
515 /**
516 * Returns a string created from the raw value of the tagged template
517 * literal.
518 *
519 * ※ This is an alias for String.raw.
520 */
521 raw: rawString,
522
523 /**
524 * Returns a string created from the provided code units.
525 *
526 * ※ This is an alias for String.fromCharCode.
527 */
528 fromCharCode: stringFromCodeUnits,
529
530 /**
531 * Returns a string created from the provided codepoints.
532 *
533 * ※ This is an alias for String.fromCodePoint.
534 */
535 fromCodePoint: stringFromCodepoints,
536 } = String;
537
538 /**
539 * Returns the result of splitting the provided value on A·S·C·I·I
540 * whitespace.
541 */
542 export const splitOnASCIIWhitespace = ($) =>
543 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
544
545 /**
546 * Returns the result of splitting the provided value on commas,
547 * trimming A·S·C·I·I whitespace from the resulting tokens.
548 */
549 export const splitOnCommas = ($) =>
550 stringSplit(
551 stripLeadingAndTrailingASCIIWhitespace(
552 stringReplaceAll(
553 `${$}`,
554 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
555 ",",
556 ),
557 ),
558 ",",
559 );
560
561 /**
562 * Returns the result of catenating the string representations of the
563 * provided values, returning a new string according to the algorithm
564 * of String::concat.
565 */
566 export const stringCatenate = makeCallable(String.prototype.concat);
567
568 /**
569 * Returns whether the string representation of the provided value ends
570 * with the provided search string according to the algorithm of
571 * String::endsWith.
572 */
573 export const stringEndsWith = makeCallable(String.prototype.endsWith);
574
575 /**
576 * Returns whether the string representation of the provided value
577 * contains the provided search string according to the algorithm of
578 * String::includes.
579 */
580 export const stringIncludes = makeCallable(String.prototype.includes);
581
582 /**
583 * Returns the result of matching the string representation of the
584 * provided value with the provided matcher according to the algorithm
585 * of String::match.
586 */
587 export const stringMatch = makeCallable(String.prototype.match);
588
589 /**
590 * Returns the result of matching the string representation of the
591 * provided value with the provided matcher according to the algorithm
592 * of String::matchAll.
593 */
594 export const stringMatchAll = makeCallable(String.prototype.matchAll);
595
596 /**
597 * Returns the normalized form of the string representation of the
598 * provided value according to the algorithm of String::matchAll.
599 */
600 export const stringNormalize = makeCallable(
601 String.prototype.normalize,
602 );
603
604 /**
605 * Returns the result of padding the end of the string representation
606 * of the provided value padded until it is the desired length
607 * according to the algorithm of String::padEnd.
608 */
609 export const stringPadEnd = makeCallable(String.prototype.padEnd);
610
611 /**
612 * Returns the result of padding the start of the string representation
613 * of the provided value padded until it is the desired length
614 * according to the algorithm of String::padStart.
615 */
616 export const stringPadStart = makeCallable(String.prototype.padStart);
617
618 /**
619 * Returns the result of repeating the string representation of the
620 * provided value the provided number of times according to the
621 * algorithm of String::repeat.
622 */
623 export const stringRepeat = makeCallable(String.prototype.repeat);
624
625 /**
626 * Returns the result of replacing the string representation of the
627 * provided value with the provided replacement, using the provided
628 * matcher and according to the algorithm of String::replace.
629 */
630 export const stringReplace = makeCallable(String.prototype.replace);
631
632 /**
633 * Returns the result of replacing the string representation of the
634 * provided value with the provided replacement, using the provided
635 * matcher and according to the algorithm of String::replaceAll.
636 */
637 export const stringReplaceAll = makeCallable(
638 String.prototype.replaceAll,
639 );
640
641 /**
642 * Returns the result of searching the string representation of the
643 * provided value using the provided matcher and according to the
644 * algorithm of String::search.
645 */
646 export const stringSearch = makeCallable(String.prototype.search);
647
648 /**
649 * Returns a slice of the string representation of the provided value
650 * according to the algorithm of String::slice.
651 */
652 export const stringSlice = makeCallable(String.prototype.slice);
653
654 /**
655 * Returns the result of splitting of the string representation of the
656 * provided value on the provided separator according to the algorithm
657 * of String::split.
658 */
659 export const stringSplit = makeCallable(String.prototype.split);
660
661 /**
662 * Returns whether the string representation of the provided value
663 * starts with the provided search string according to the algorithm of
664 * String::startsWith.
665 */
666 export const stringStartsWith = makeCallable(
667 String.prototype.startsWith,
668 );
669
670 /**
671 * Returns the `[[StringData]]` of the provided value.
672 *
673 * ☡ This function will throw if the provided object does not have a
674 * `[[StringData]]` internal slot.
675 */
676 export const stringValue = makeCallable(String.prototype.valueOf);
677
678 /**
679 * Returns the result of stripping leading and trailing A·S·C·I·I
680 * whitespace from the provided value and collapsing other A·S·C·I·I
681 * whitespace in the string representation of the provided value.
682 */
683 export const stripAndCollapseASCIIWhitespace = ($) =>
684 stripLeadingAndTrailingASCIIWhitespace(
685 stringReplaceAll(
686 `${$}`,
687 /[\n\r\t\f ]+/gu,
688 " ",
689 ),
690 );
691
692 /**
693 * Returns the result of stripping leading and trailing A·S·C·I·I
694 * whitespace from the string representation of the provided value.
695 */
696 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
697 const { exec: reExec } = RegExp.prototype;
698 return ($) =>
699 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
700 })();
701
702 /**
703 * Returns a substring of the string representation of the provided
704 * value according to the algorithm of String::substring.
705 */
706 export const substring = makeCallable(String.prototype.substring);
707
708 /**
709 * Returns the result of converting the provided value to a string.
710 *
711 * ☡ This method throws for symbols and other objects without a string
712 * representation.
713 */
714 export const toString = ($) => `${$}`;
This page took 0.102274 seconds and 3 git commands to generate.