]> Lady’s Gitweb - Pisces/blob - string.js
28412fc98b8854b9b26f248c5e8b8ae98526d033
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getPrototype,
14 objectCreate,
15 setPrototype,
16 } from "./object.js";
17
18 export const {
19 /**
20 * A RegExp·like object which only matches entire strings, and may
21 * have additional constraints specified.
22 *
23 * Matchers are callable objects and will return true if they are
24 * called with a string that they match, and false otherwise.
25 * Matchers will always return false if called with nonstrings,
26 * although other methods like `exec` coerce their arguments and may
27 * still return true.
28 */
29 Matcher,
30 } = (() => {
31 const RE = RegExp;
32 const { prototype: rePrototype } = RE;
33 const { exec: reExec, toString: reToString } = rePrototype;
34 const getDotAll =
35 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
36 const getGlobal =
37 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
38 const getHasIndices =
39 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
40 const getIgnoreCase =
41 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
42 const getMultiline =
43 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
44 const getSource =
45 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
46 const getSticky =
47 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
48 const getUnicode =
49 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
50
51 const Matcher = class extends identity {
52 #constraint;
53 #regExp;
54
55 /**
56 * Constructs a new Matcher from the provided source.
57 *
58 * If the provided source is a regular expression, then it must
59 * have the unicode flag set. Otherwise, it is interpreted as the
60 * string source of a regular expression with the unicode flag set.
61 *
62 * Other flags are taken from the provided regular expression
63 * object, if any are present.
64 *
65 * A name for the matcher may be provided as the second argument.
66 *
67 * A callable constraint on acceptable inputs may be provided as a
68 * third argument. If provided, it will be called with three
69 * arguments whenever a match appears successful: first, the string
70 * being matched, second, the match result, and third, the Matcher
71 * object itself. If the return value of this call is falsey, then
72 * the match will be considered a failure.
73 *
74 * ☡ If the provided source regular expression uses nongreedy
75 * quantifiers, it may not match the whole string even if a match
76 * with the whole string is possible. Surround the regular
77 * expression with `^(?:` and `)$` if you don’t want nongreedy
78 * regular expressions to fail when shorter matches are possible.
79 */
80 constructor(source, name = undefined, constraint = null) {
81 super(
82 ($) => {
83 if (typeof $ !== "string") {
84 // The provided value is not a string.
85 return false;
86 } else {
87 // The provided value is a string. Set the `lastIndex` of
88 // the regular expression to 0 and see if the first attempt
89 // at a match matches the whole string and passes the
90 // provided constraint (if present).
91 regExp.lastIndex = 0;
92 const result = call(reExec, regExp, [$]);
93 return result?.[0] === $ &&
94 (constraint === null || constraint($, result, this));
95 }
96 },
97 );
98 const regExp = this.#regExp = (() => {
99 try {
100 call(reExec, source, [""]); // throws if source not a RegExp
101 } catch {
102 return new RE(`${source}`, "u");
103 }
104 const unicode = call(getUnicode, source, []);
105 if (!unicode) {
106 // The provided regular expression does not have a unicode
107 // flag.
108 throw new TypeError(
109 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
110 );
111 } else {
112 // The provided regular expression has a unicode flag.
113 return new RE(source);
114 }
115 })();
116 if (constraint !== null && typeof constraint !== "function") {
117 throw new TypeError(
118 "Piscēs: Cannot construct Matcher: Constraint is not callable.",
119 );
120 } else {
121 this.#constraint = constraint;
122 return defineOwnProperties(
123 setPrototype(this, matcherPrototype),
124 {
125 lastIndex: {
126 configurable: false,
127 enumerable: false,
128 value: 0,
129 writable: false,
130 },
131 name: {
132 value: name != null
133 ? `${name}`
134 : `Matcher(${call(reToString, regExp, [])})`,
135 },
136 },
137 );
138 }
139 }
140
141 /** Gets whether the dotAll flag is present on this Matcher. */
142 get dotAll() {
143 return call(getDotAll, this.#regExp, []);
144 }
145
146 /**
147 * Executes this Matcher on the provided value and returns the
148 * result if there is a match, or null otherwise.
149 *
150 * Matchers only match if they can match the entire value on the
151 * first attempt.
152 *
153 * ☡ The match result returned by this method will be the same as
154 * that passed to the constraint function—and may have been
155 * modified by said function prior to being returned.
156 */
157 exec($) {
158 const regExp = this.#regExp;
159 const constraint = this.#constraint;
160 const string = `${$}`;
161 regExp.lastIndex = 0;
162 const result = call(reExec, regExp, [string]);
163 if (
164 result?.[0] === string &&
165 (constraint === null || constraint(string, result, this))
166 ) {
167 // The entire string was matched and the constraint, if
168 // present, returned a truthy value.
169 return result;
170 } else {
171 // The entire string was not matched or the constraint returned
172 // a falsey value.
173 return null;
174 }
175 }
176
177 /** Gets whether the global flag is present on this Matcher. */
178 get global() {
179 return call(getGlobal, this.#regExp, []);
180 }
181
182 /** Gets whether the hasIndices flag is present on this Matcher. */
183 get hasIndices() {
184 return call(getHasIndices, this.#regExp, []);
185 }
186
187 /** Gets whether the ignoreCase flag is present on this Matcher. */
188 get ignoreCase() {
189 return call(getIgnoreCase, this.#regExp, []);
190 }
191
192 /** Gets whether the multiline flag is present on this Matcher. */
193 get multiline() {
194 return call(getMultiline, this.#regExp, []);
195 }
196
197 /** Gets the regular expression source for this Matcher. */
198 get source() {
199 return call(getSource, this.#regExp, []);
200 }
201
202 /** Gets whether the sticky flag is present on this Matcher. */
203 get sticky() {
204 return call(getSticky, this.#regExp, []);
205 }
206
207 /**
208 * Gets whether the unicode flag is present on this Matcher.
209 *
210 * ※ This will always be true.
211 */
212 get unicode() {
213 return call(getUnicode, this.#regExp, []);
214 }
215 };
216 const matcherPrototype = setPrototype(
217 Matcher.prototype,
218 rePrototype,
219 );
220
221 return { Matcher };
222 })();
223
224 export const {
225 /**
226 * Returns the result of converting the provided value to A·S·C·I·I
227 * lowercase.
228 */
229 asciiLowercase,
230
231 /**
232 * Returns the result of converting the provided value to A·S·C·I·I
233 * uppercase.
234 */
235 asciiUppercase,
236 } = (() => {
237 const {
238 toLowerCase: stringToLowercase,
239 toUpperCase: stringToUppercase,
240 } = String.prototype;
241 return {
242 asciiLowercase: ($) =>
243 stringReplaceAll(
244 `${$}`,
245 /[A-Z]/gu,
246 makeCallable(stringToLowercase),
247 ),
248 asciiUppercase: ($) =>
249 stringReplaceAll(
250 `${$}`,
251 /[a-z]/gu,
252 makeCallable(stringToUppercase),
253 ),
254 };
255 })();
256
257 export const {
258 /**
259 * Returns an iterator over the code units in the string
260 * representation of the provided value.
261 */
262 codeUnits,
263
264 /**
265 * Returns an iterator over the codepoints in the string
266 * representation of the provided value.
267 */
268 codepoints,
269
270 /**
271 * Returns an iterator over the scalar values in the string
272 * representation of the provided value.
273 *
274 * Codepoints which are not valid Unicode scalar values are replaced
275 * with U+FFFF.
276 */
277 scalarValues,
278
279 /**
280 * Returns the result of converting the provided value to a string of
281 * scalar values by replacing (unpaired) surrogate values with
282 * U+FFFD.
283 */
284 scalarValueString,
285 } = (() => {
286 const {
287 iterator: iteratorSymbol,
288 toStringTag: toStringTagSymbol,
289 } = Symbol;
290 const { [iteratorSymbol]: arrayIterator } = Array.prototype;
291 const arrayIteratorPrototype = Object.getPrototypeOf(
292 [][iteratorSymbol](),
293 );
294 const { next: arrayIteratorNext } = arrayIteratorPrototype;
295 const iteratorPrototype = Object.getPrototypeOf(
296 arrayIteratorPrototype,
297 );
298 const { [iteratorSymbol]: stringIterator } = String.prototype;
299 const stringIteratorPrototype = Object.getPrototypeOf(
300 ""[iteratorSymbol](),
301 );
302 const { next: stringIteratorNext } = stringIteratorPrototype;
303
304 /**
305 * An iterator object for iterating over code values (either code
306 * units or codepoints) in a string.
307 *
308 * ※ This class is not exposed, although its methods are (through
309 * the prototypes of string code value iterator objects).
310 */
311 const StringCodeValueIterator = class extends identity {
312 #allowSurrogates;
313 #baseIterator;
314
315 /**
316 * Constructs a new string code value iterator from the provided
317 * base iterator.
318 *
319 * If the provided base iterator is an array iterator, this is a
320 * code unit iterator. If the provided iterator is a string
321 * iterator and surrogates are allowed, this is a codepoint
322 * iterator. If the provided iterator is a string iterator and
323 * surrogates are not allowed, this is a scalar value iterator.
324 */
325 constructor(baseIterator, allowSurrogates = true) {
326 super(objectCreate(stringCodeValueIteratorPrototype));
327 this.#allowSurrogates = !!allowSurrogates;
328 this.#baseIterator = baseIterator;
329 }
330
331 /** Provides the next code value in the iterator. */
332 next() {
333 const baseIterator = this.#baseIterator;
334 switch (getPrototype(baseIterator)) {
335 case arrayIteratorPrototype: {
336 // The base iterator is iterating over U·C·S characters.
337 const {
338 value: ucsCharacter,
339 done,
340 } = call(arrayIteratorNext, baseIterator, []);
341 return done
342 ? { value: undefined, done: true }
343 : { value: getCodeUnit(ucsCharacter, 0), done: false };
344 }
345 case stringIteratorPrototype: {
346 // The base iterator is iterating over Unicode characters.
347 const {
348 value: character,
349 done,
350 } = call(stringIteratorNext, baseIterator, []);
351 if (done) {
352 // The base iterator has been exhausted.
353 return { value: undefined, done: true };
354 } else {
355 // The base iterator provided a character; yield the
356 // codepoint.
357 const codepoint = getCodepoint(character, 0);
358 return {
359 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
360 codepoint >= 0xE000
361 ? codepoint
362 : 0xFFFD,
363 done: false,
364 };
365 }
366 }
367 default: {
368 // Should not be possible!
369 throw new TypeError(
370 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
371 );
372 }
373 }
374 }
375 };
376
377 const {
378 next: stringCodeValueIteratorNext,
379 } = StringCodeValueIterator.prototype;
380 const stringCodeValueIteratorPrototype = objectCreate(
381 iteratorPrototype,
382 {
383 next: {
384 configurable: true,
385 enumerable: false,
386 value: stringCodeValueIteratorNext,
387 writable: true,
388 },
389 [toStringTagSymbol]: {
390 configurable: true,
391 enumerable: false,
392 value: "String Code Value Iterator",
393 writable: false,
394 },
395 },
396 );
397 const scalarValueIterablePrototype = {
398 [iteratorSymbol]() {
399 return {
400 next: bind(
401 stringCodeValueIteratorNext,
402 new StringCodeValueIterator(
403 call(stringIterator, this.source, []),
404 false,
405 ),
406 [],
407 ),
408 };
409 },
410 };
411
412 return {
413 codeUnits: ($) =>
414 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
415 codepoints: ($) =>
416 new StringCodeValueIterator(
417 call(stringIterator, `${$}`, []),
418 true,
419 ),
420 scalarValues: ($) =>
421 new StringCodeValueIterator(
422 call(stringIterator, `${$}`, []),
423 false,
424 ),
425 scalarValueString: ($) =>
426 stringFromCodepoints(...objectCreate(
427 scalarValueIterablePrototype,
428 { source: { value: `${$}` } },
429 )),
430 };
431 })();
432
433 /**
434 * Returns an iterator over the codepoints in the string representation
435 * of the provided value according to the algorithm of
436 * String::[Symbol.iterator].
437 */
438 export const characters = makeCallable(
439 String.prototype[Symbol.iterator],
440 );
441
442 /**
443 * Returns the character at the provided position in the string
444 * representation of the provided value according to the algorithm of
445 * String::codePointAt.
446 */
447 export const getCharacter = ($, pos) => {
448 const codepoint = getCodepoint($, pos);
449 return codepoint == null
450 ? undefined
451 : stringFromCodepoints(codepoint);
452 };
453
454 /**
455 * Returns the code unit at the provided position in the string
456 * representation of the provided value according to the algorithm of
457 * String::charAt.
458 */
459 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
460
461 /**
462 * Returns the codepoint at the provided position in the string
463 * representation of the provided value according to the algorithm of
464 * String::codePointAt.
465 */
466 export const getCodepoint = makeCallable(String.prototype.codePointAt);
467
468 /**
469 * Returns the index of the first occurrence of the search string in
470 * the string representation of the provided value according to the
471 * algorithm of String::indexOf.
472 */
473 export const getFirstSubstringIndex = makeCallable(
474 String.prototype.indexOf,
475 );
476
477 /**
478 * Returns the index of the last occurrence of the search string in the
479 * string representation of the provided value according to the
480 * algorithm of String::lastIndexOf.
481 */
482 export const getLastSubstringIndex = makeCallable(
483 String.prototype.lastIndexOf,
484 );
485
486 /**
487 * Returns the result of joining the provided iterable.
488 *
489 * If no separator is provided, it defaults to ",".
490 *
491 * If a value is nullish, it will be stringified as the empty string.
492 */
493 export const join = (() => {
494 const { join: arrayJoin } = Array.prototype;
495 const join = ($, separator = ",") =>
496 call(arrayJoin, [...$], [`${separator}`]);
497 return join;
498 })();
499
500 export const {
501 /**
502 * Returns a string created from the raw value of the tagged template
503 * literal.
504 *
505 * ※ This is an alias for String.raw.
506 */
507 raw: rawString,
508
509 /**
510 * Returns a string created from the provided code units.
511 *
512 * ※ This is an alias for String.fromCharCode.
513 */
514 fromCharCode: stringFromCodeUnits,
515
516 /**
517 * Returns a string created from the provided codepoints.
518 *
519 * ※ This is an alias for String.fromCodePoint.
520 */
521 fromCodePoint: stringFromCodepoints,
522 } = String;
523
524 /**
525 * Returns the result of splitting the provided value on A·S·C·I·I
526 * whitespace.
527 */
528 export const splitOnASCIIWhitespace = ($) =>
529 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
530
531 /**
532 * Returns the result of splitting the provided value on commas,
533 * trimming A·S·C·I·I whitespace from the resulting tokens.
534 */
535 export const splitOnCommas = ($) =>
536 stringSplit(
537 stripLeadingAndTrailingASCIIWhitespace(
538 stringReplaceAll(
539 `${$}`,
540 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
541 ",",
542 ),
543 ),
544 ",",
545 );
546
547 /**
548 * Returns the result of catenating the string representations of the
549 * provided values, returning a new string according to the algorithm
550 * of String::concat.
551 */
552 export const stringCatenate = makeCallable(String.prototype.concat);
553
554 /**
555 * Returns whether the string representation of the provided value ends
556 * with the provided search string according to the algorithm of
557 * String::endsWith.
558 */
559 export const stringEndsWith = makeCallable(String.prototype.endsWith);
560
561 /**
562 * Returns whether the string representation of the provided value
563 * contains the provided search string according to the algorithm of
564 * String::includes.
565 */
566 export const stringIncludes = makeCallable(String.prototype.includes);
567
568 /**
569 * Returns the result of matching the string representation of the
570 * provided value with the provided matcher according to the algorithm
571 * of String::match.
572 */
573 export const stringMatch = makeCallable(String.prototype.match);
574
575 /**
576 * Returns the result of matching the string representation of the
577 * provided value with the provided matcher according to the algorithm
578 * of String::matchAll.
579 */
580 export const stringMatchAll = makeCallable(String.prototype.matchAll);
581
582 /**
583 * Returns the normalized form of the string representation of the
584 * provided value according to the algorithm of String::matchAll.
585 */
586 export const stringNormalize = makeCallable(
587 String.prototype.normalize,
588 );
589
590 /**
591 * Returns the result of padding the end of the string representation
592 * of the provided value padded until it is the desired length
593 * according to the algorithm of String::padEnd.
594 */
595 export const stringPadEnd = makeCallable(String.prototype.padEnd);
596
597 /**
598 * Returns the result of padding the start of the string representation
599 * of the provided value padded until it is the desired length
600 * according to the algorithm of String::padStart.
601 */
602 export const stringPadStart = makeCallable(String.prototype.padStart);
603
604 /**
605 * Returns the result of repeating the string representation of the
606 * provided value the provided number of times according to the
607 * algorithm of String::repeat.
608 */
609 export const stringRepeat = makeCallable(String.prototype.repeat);
610
611 /**
612 * Returns the result of replacing the string representation of the
613 * provided value with the provided replacement, using the provided
614 * matcher and according to the algorithm of String::replace.
615 */
616 export const stringReplace = makeCallable(String.prototype.replace);
617
618 /**
619 * Returns the result of replacing the string representation of the
620 * provided value with the provided replacement, using the provided
621 * matcher and according to the algorithm of String::replaceAll.
622 */
623 export const stringReplaceAll = makeCallable(
624 String.prototype.replaceAll,
625 );
626
627 /**
628 * Returns the result of searching the string representation of the
629 * provided value using the provided matcher and according to the
630 * algorithm of String::search.
631 */
632 export const stringSearch = makeCallable(String.prototype.search);
633
634 /**
635 * Returns a slice of the string representation of the provided value
636 * according to the algorithm of String::slice.
637 */
638 export const stringSlice = makeCallable(String.prototype.slice);
639
640 /**
641 * Returns the result of splitting of the string representation of the
642 * provided value on the provided separator according to the algorithm
643 * of String::split.
644 */
645 export const stringSplit = makeCallable(String.prototype.split);
646
647 /**
648 * Returns whether the string representation of the provided value
649 * starts with the provided search string according to the algorithm of
650 * String::startsWith.
651 */
652 export const stringStartsWith = makeCallable(
653 String.prototype.startsWith,
654 );
655
656 /**
657 * Returns the `[[StringData]]` of the provided value.
658 *
659 * ☡ This function will throw if the provided object does not have a
660 * `[[StringData]]` internal slot.
661 */
662 export const stringValue = makeCallable(String.prototype.valueOf);
663
664 /**
665 * Returns the result of stripping leading and trailing A·S·C·I·I
666 * whitespace from the provided value and collapsing other A·S·C·I·I
667 * whitespace in the string representation of the provided value.
668 */
669 export const stripAndCollapseASCIIWhitespace = ($) =>
670 stripLeadingAndTrailingASCIIWhitespace(
671 stringReplaceAll(
672 `${$}`,
673 /[\n\r\t\f ]+/gu,
674 " ",
675 ),
676 );
677
678 /**
679 * Returns the result of stripping leading and trailing A·S·C·I·I
680 * whitespace from the string representation of the provided value.
681 */
682 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
683 const { exec: reExec } = RegExp.prototype;
684 return ($) =>
685 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
686 })();
687
688 /**
689 * Returns a substring of the string representation of the provided
690 * value according to the algorithm of String::substring.
691 */
692 export const substring = makeCallable(String.prototype.substring);
693
694 /**
695 * Returns the result of converting the provided value to a string.
696 *
697 * ☡ This method throws for symbols and other objects without a string
698 * representation.
699 */
700 export const toString = ($) => `${$}`;
This page took 0.09659 seconds and 3 git commands to generate.