]> Lady’s Gitweb - Pisces/blob - string.js
100f0bd13bfb0252d1ca846925e9c900cb03b02c
[Pisces] / string.js
1 // ♓🌟 Piscēs ∷ string.js
2 // ====================================================================
3 //
4 // Copyright © 2022 Lady [@ Lady’s Computer].
5 //
6 // This Source Code Form is subject to the terms of the Mozilla Public
7 // License, v. 2.0. If a copy of the MPL was not distributed with this
8 // file, You can obtain one at <https://mozilla.org/MPL/2.0/>.
9
10 import { bind, call, identity, makeCallable } from "./function.js";
11 import {
12 defineOwnProperties,
13 getPrototype,
14 objectCreate,
15 setPrototype,
16 } from "./object.js";
17
18 export const {
19 /**
20 * A RegExp·like object which only matches entire strings.
21 *
22 * Matchers are callable objects and will return true if they are
23 * called with a string that they match, and false otherwise.
24 * Matchers will always return false if called with nonstrings,
25 * although other methods like `exec` may still return true.
26 */
27 Matcher,
28 } = (() => {
29 const RE = RegExp;
30 const { prototype: rePrototype } = RE;
31 const { exec: reExec, toString: reToString } = rePrototype;
32 const getDotAll =
33 Object.getOwnPropertyDescriptor(rePrototype, "dotAll").get;
34 const getGlobal =
35 Object.getOwnPropertyDescriptor(rePrototype, "global").get;
36 const getHasIndices =
37 Object.getOwnPropertyDescriptor(rePrototype, "hasIndices").get;
38 const getIgnoreCase =
39 Object.getOwnPropertyDescriptor(rePrototype, "ignoreCase").get;
40 const getMultiline =
41 Object.getOwnPropertyDescriptor(rePrototype, "multiline").get;
42 const getSource =
43 Object.getOwnPropertyDescriptor(rePrototype, "source").get;
44 const getSticky =
45 Object.getOwnPropertyDescriptor(rePrototype, "sticky").get;
46 const getUnicode =
47 Object.getOwnPropertyDescriptor(rePrototype, "unicode").get;
48
49 const Matcher = class extends identity {
50 #regExp;
51
52 /**
53 * Constructs a new Matcher from the provided source.
54 *
55 * If the provided source is a regular expression, then it must
56 * have the unicode flag set. Otherwise, it is interpreted as the
57 * string source of a regular expression with the unicode flag set.
58 *
59 * Other flags are taken from the provided regular expression
60 * object, if any are present.
61 *
62 * A name for the matcher may be provided as the second argument.
63 *
64 * ☡ If the provided source regular expression uses nongreedy
65 * quantifiers, it may not match the whole string even if a match
66 * with the whole string is possible. Surround the regular
67 * expression with `^(?:` and `)$` if you don’t want nongreedy
68 * regular expressions to fail when shorter matches are possible.
69 */
70 constructor(source, name = undefined) {
71 super(
72 ($) => {
73 if (typeof $ !== "string") {
74 // The provided value is not a string.
75 return false;
76 } else {
77 // The provided value is a string. Set the `lastIndex` of
78 // the regular expression to 0 and see if the first attempt
79 // at a match matches the whole string.
80 regExp.lastIndex = 0;
81 return call(reExec, regExp, [$])?.[0] === $;
82 }
83 },
84 );
85 const regExp = this.#regExp = (() => {
86 try {
87 call(reExec, source, [""]); // throws if source not a RegExp
88 } catch {
89 return new RE(`${source}`, "u");
90 }
91 const unicode = call(getUnicode, source, []);
92 if (!unicode) {
93 // The provided regular expression does not have a unicode
94 // flag.
95 throw new TypeError(
96 `Piscēs: Cannot create Matcher from non‐Unicode RegExp: ${source}`,
97 );
98 } else {
99 // The provided regular expression has a unicode flag.
100 return new RE(source);
101 }
102 })();
103 return defineOwnProperties(
104 setPrototype(this, matcherPrototype),
105 {
106 lastIndex: {
107 configurable: false,
108 enumerable: false,
109 value: 0,
110 writable: false,
111 },
112 name: {
113 value: name != null
114 ? `${name}`
115 : `Matcher(${call(reToString, regExp, [])})`,
116 },
117 },
118 );
119 }
120
121 /** Gets whether the dotAll flag is present on this Matcher. */
122 get dotAll() {
123 return call(getDotAll, this.#regExp, []);
124 }
125
126 /**
127 * Executes this Matcher on the provided value and returns the
128 * result if there is a match, or null otherwise.
129 *
130 * Matchers only match if they can match the entire value on the
131 * first attempt.
132 */
133 exec($) {
134 const regExp = this.#regExp;
135 const string = `${$}`;
136 regExp.lastIndex = 0;
137 const result = call(reExec, regExp, [string]);
138 if (result?.[0] === string) {
139 // The entire string was matched.
140 return result;
141 } else {
142 // The entire string was not matched.
143 return null;
144 }
145 }
146
147 /** Gets whether the global flag is present on this Matcher. */
148 get global() {
149 return call(getGlobal, this.#regExp, []);
150 }
151
152 /** Gets whether the hasIndices flag is present on this Matcher. */
153 get hasIndices() {
154 return call(getHasIndices, this.#regExp, []);
155 }
156
157 /** Gets whether the ignoreCase flag is present on this Matcher. */
158 get ignoreCase() {
159 return call(getIgnoreCase, this.#regExp, []);
160 }
161
162 /** Gets whether the multiline flag is present on this Matcher. */
163 get multiline() {
164 return call(getMultiline, this.#regExp, []);
165 }
166
167 /** Gets the regular expression source for this Matcher. */
168 get source() {
169 return call(getSource, this.#regExp, []);
170 }
171
172 /** Gets whether the sticky flag is present on this Matcher. */
173 get sticky() {
174 return call(getSticky, this.#regExp, []);
175 }
176
177 /**
178 * Gets whether the unicode flag is present on this Matcher.
179 *
180 * ※ This will always be true.
181 */
182 get unicode() {
183 return call(getUnicode, this.#regExp, []);
184 }
185 };
186 const matcherPrototype = setPrototype(
187 Matcher.prototype,
188 rePrototype,
189 );
190
191 return { Matcher };
192 })();
193
194 export const {
195 /**
196 * Returns the result of converting the provided value to A·S·C·I·I
197 * lowercase.
198 */
199 asciiLowercase,
200
201 /**
202 * Returns the result of converting the provided value to A·S·C·I·I
203 * uppercase.
204 */
205 asciiUppercase,
206 } = (() => {
207 const {
208 toLowerCase: stringToLowercase,
209 toUpperCase: stringToUppercase,
210 } = String.prototype;
211 return {
212 asciiLowercase: ($) =>
213 stringReplaceAll(
214 `${$}`,
215 /[A-Z]/gu,
216 makeCallable(stringToLowercase),
217 ),
218 asciiUppercase: ($) =>
219 stringReplaceAll(
220 `${$}`,
221 /[a-z]/gu,
222 makeCallable(stringToUppercase),
223 ),
224 };
225 })();
226
227 export const {
228 /**
229 * Returns an iterator over the code units in the string
230 * representation of the provided value.
231 */
232 codeUnits,
233
234 /**
235 * Returns an iterator over the codepoints in the string
236 * representation of the provided value.
237 */
238 codepoints,
239
240 /**
241 * Returns an iterator over the scalar values in the string
242 * representation of the provided value.
243 *
244 * Codepoints which are not valid Unicode scalar values are replaced
245 * with U+FFFF.
246 */
247 scalarValues,
248
249 /**
250 * Returns the result of converting the provided value to a string of
251 * scalar values by replacing (unpaired) surrogate values with
252 * U+FFFD.
253 */
254 scalarValueString,
255 } = (() => {
256 const {
257 iterator: iteratorSymbol,
258 toStringTag: toStringTagSymbol,
259 } = Symbol;
260 const { [iteratorSymbol]: arrayIterator } = Array.prototype;
261 const arrayIteratorPrototype = Object.getPrototypeOf(
262 [][iteratorSymbol](),
263 );
264 const { next: arrayIteratorNext } = arrayIteratorPrototype;
265 const iteratorPrototype = Object.getPrototypeOf(
266 arrayIteratorPrototype,
267 );
268 const { [iteratorSymbol]: stringIterator } = String.prototype;
269 const stringIteratorPrototype = Object.getPrototypeOf(
270 ""[iteratorSymbol](),
271 );
272 const { next: stringIteratorNext } = stringIteratorPrototype;
273
274 /**
275 * An iterator object for iterating over code values (either code
276 * units or codepoints) in a string.
277 *
278 * ※ This class is not exposed, although its methods are (through
279 * the prototypes of string code value iterator objects).
280 */
281 const StringCodeValueIterator = class extends identity {
282 #allowSurrogates;
283 #baseIterator;
284
285 /**
286 * Constructs a new string code value iterator from the provided
287 * base iterator.
288 *
289 * If the provided base iterator is an array iterator, this is a
290 * code unit iterator. If the provided iterator is a string
291 * iterator and surrogates are allowed, this is a codepoint
292 * iterator. If the provided iterator is a string iterator and
293 * surrogates are not allowed, this is a scalar value iterator.
294 */
295 constructor(baseIterator, allowSurrogates = true) {
296 super(objectCreate(stringCodeValueIteratorPrototype));
297 this.#allowSurrogates = !!allowSurrogates;
298 this.#baseIterator = baseIterator;
299 }
300
301 /** Provides the next code value in the iterator. */
302 next() {
303 const baseIterator = this.#baseIterator;
304 switch (getPrototype(baseIterator)) {
305 case arrayIteratorPrototype: {
306 // The base iterator is iterating over U·C·S characters.
307 const {
308 value: ucsCharacter,
309 done,
310 } = call(arrayIteratorNext, baseIterator, []);
311 return done
312 ? { value: undefined, done: true }
313 : { value: getCodeUnit(ucsCharacter, 0), done: false };
314 }
315 case stringIteratorPrototype: {
316 // The base iterator is iterating over Unicode characters.
317 const {
318 value: character,
319 done,
320 } = call(stringIteratorNext, baseIterator, []);
321 if (done) {
322 // The base iterator has been exhausted.
323 return { value: undefined, done: true };
324 } else {
325 // The base iterator provided a character; yield the
326 // codepoint.
327 const codepoint = getCodepoint(character, 0);
328 return {
329 value: this.#allowSurrogates || codepoint <= 0xD7FF ||
330 codepoint >= 0xE000
331 ? codepoint
332 : 0xFFFD,
333 done: false,
334 };
335 }
336 }
337 default: {
338 // Should not be possible!
339 throw new TypeError(
340 "Piscēs: Unrecognized base iterator type in %StringCodeValueIterator%.",
341 );
342 }
343 }
344 }
345 };
346
347 const {
348 next: stringCodeValueIteratorNext,
349 } = StringCodeValueIterator.prototype;
350 const stringCodeValueIteratorPrototype = objectCreate(
351 iteratorPrototype,
352 {
353 next: {
354 configurable: true,
355 enumerable: false,
356 value: stringCodeValueIteratorNext,
357 writable: true,
358 },
359 [toStringTagSymbol]: {
360 configurable: true,
361 enumerable: false,
362 value: "String Code Value Iterator",
363 writable: false,
364 },
365 },
366 );
367 const scalarValueIterablePrototype = {
368 [iteratorSymbol]() {
369 return {
370 next: bind(
371 stringCodeValueIteratorNext,
372 new StringCodeValueIterator(
373 call(stringIterator, this.source, []),
374 false,
375 ),
376 [],
377 ),
378 };
379 },
380 };
381
382 return {
383 codeUnits: ($) =>
384 new StringCodeValueIterator(call(arrayIterator, `${$}`, [])),
385 codepoints: ($) =>
386 new StringCodeValueIterator(
387 call(stringIterator, `${$}`, []),
388 true,
389 ),
390 scalarValues: ($) =>
391 new StringCodeValueIterator(
392 call(stringIterator, `${$}`, []),
393 false,
394 ),
395 scalarValueString: ($) =>
396 stringFromCodepoints(...objectCreate(
397 scalarValueIterablePrototype,
398 { source: { value: `${$}` } },
399 )),
400 };
401 })();
402
403 /**
404 * Returns an iterator over the codepoints in the string representation
405 * of the provided value according to the algorithm of
406 * String::[Symbol.iterator].
407 */
408 export const characters = makeCallable(
409 String.prototype[Symbol.iterator],
410 );
411
412 /**
413 * Returns the character at the provided position in the string
414 * representation of the provided value according to the algorithm of
415 * String::codePointAt.
416 */
417 export const getCharacter = ($, pos) => {
418 const codepoint = getCodepoint($, pos);
419 return codepoint == null
420 ? undefined
421 : stringFromCodepoints(codepoint);
422 };
423
424 /**
425 * Returns the code unit at the provided position in the string
426 * representation of the provided value according to the algorithm of
427 * String::charAt.
428 */
429 export const getCodeUnit = makeCallable(String.prototype.charCodeAt);
430
431 /**
432 * Returns the codepoint at the provided position in the string
433 * representation of the provided value according to the algorithm of
434 * String::codePointAt.
435 */
436 export const getCodepoint = makeCallable(String.prototype.codePointAt);
437
438 /**
439 * Returns the index of the first occurrence of the search string in
440 * the string representation of the provided value according to the
441 * algorithm of String::indexOf.
442 */
443 export const getFirstSubstringIndex = makeCallable(
444 String.prototype.indexOf,
445 );
446
447 /**
448 * Returns the index of the last occurrence of the search string in the
449 * string representation of the provided value according to the
450 * algorithm of String::lastIndexOf.
451 */
452 export const getLastSubstringIndex = makeCallable(
453 String.prototype.lastIndexOf,
454 );
455
456 /**
457 * Returns the result of joining the provided iterable.
458 *
459 * If no separator is provided, it defaults to ",".
460 *
461 * If a value is nullish, it will be stringified as the empty string.
462 */
463 export const join = (() => {
464 const { join: arrayJoin } = Array.prototype;
465 const join = ($, separator = ",") =>
466 call(arrayJoin, [...$], [`${separator}`]);
467 return join;
468 })();
469
470 export const {
471 /**
472 * Returns a string created from the raw value of the tagged template
473 * literal.
474 *
475 * ※ This is an alias for String.raw.
476 */
477 raw: rawString,
478
479 /**
480 * Returns a string created from the provided code units.
481 *
482 * ※ This is an alias for String.fromCharCode.
483 */
484 fromCharCode: stringFromCodeUnits,
485
486 /**
487 * Returns a string created from the provided codepoints.
488 *
489 * ※ This is an alias for String.fromCodePoint.
490 */
491 fromCodePoint: stringFromCodepoints,
492 } = String;
493
494 /**
495 * Returns the result of splitting the provided value on A·S·C·I·I
496 * whitespace.
497 */
498 export const splitOnASCIIWhitespace = ($) =>
499 stringSplit(stripAndCollapseASCIIWhitespace($), " ");
500
501 /**
502 * Returns the result of splitting the provided value on commas,
503 * trimming A·S·C·I·I whitespace from the resulting tokens.
504 */
505 export const splitOnCommas = ($) =>
506 stringSplit(
507 stripLeadingAndTrailingASCIIWhitespace(
508 stringReplaceAll(
509 `${$}`,
510 /[\n\r\t\f ]*,[\n\r\t\f ]*/gu,
511 ",",
512 ),
513 ),
514 ",",
515 );
516
517 /**
518 * Returns the result of catenating the string representations of the
519 * provided values, returning a new string according to the algorithm
520 * of String::concat.
521 */
522 export const stringCatenate = makeCallable(String.prototype.concat);
523
524 /**
525 * Returns whether the string representation of the provided value ends
526 * with the provided search string according to the algorithm of
527 * String::endsWith.
528 */
529 export const stringEndsWith = makeCallable(String.prototype.endsWith);
530
531 /**
532 * Returns whether the string representation of the provided value
533 * contains the provided search string according to the algorithm of
534 * String::includes.
535 */
536 export const stringIncludes = makeCallable(String.prototype.includes);
537
538 /**
539 * Returns the result of matching the string representation of the
540 * provided value with the provided matcher according to the algorithm
541 * of String::match.
542 */
543 export const stringMatch = makeCallable(String.prototype.match);
544
545 /**
546 * Returns the result of matching the string representation of the
547 * provided value with the provided matcher according to the algorithm
548 * of String::matchAll.
549 */
550 export const stringMatchAll = makeCallable(String.prototype.matchAll);
551
552 /**
553 * Returns the normalized form of the string representation of the
554 * provided value according to the algorithm of String::matchAll.
555 */
556 export const stringNormalize = makeCallable(
557 String.prototype.normalize,
558 );
559
560 /**
561 * Returns the result of padding the end of the string representation
562 * of the provided value padded until it is the desired length
563 * according to the algorithm of String::padEnd.
564 */
565 export const stringPadEnd = makeCallable(String.prototype.padEnd);
566
567 /**
568 * Returns the result of padding the start of the string representation
569 * of the provided value padded until it is the desired length
570 * according to the algorithm of String::padStart.
571 */
572 export const stringPadStart = makeCallable(String.prototype.padStart);
573
574 /**
575 * Returns the result of repeating the string representation of the
576 * provided value the provided number of times according to the
577 * algorithm of String::repeat.
578 */
579 export const stringRepeat = makeCallable(String.prototype.repeat);
580
581 /**
582 * Returns the result of replacing the string representation of the
583 * provided value with the provided replacement, using the provided
584 * matcher and according to the algorithm of String::replace.
585 */
586 export const stringReplace = makeCallable(String.prototype.replace);
587
588 /**
589 * Returns the result of replacing the string representation of the
590 * provided value with the provided replacement, using the provided
591 * matcher and according to the algorithm of String::replaceAll.
592 */
593 export const stringReplaceAll = makeCallable(
594 String.prototype.replaceAll,
595 );
596
597 /**
598 * Returns the result of searching the string representation of the
599 * provided value using the provided matcher and according to the
600 * algorithm of String::search.
601 */
602 export const stringSearch = makeCallable(String.prototype.search);
603
604 /**
605 * Returns a slice of the string representation of the provided value
606 * according to the algorithm of String::slice.
607 */
608 export const stringSlice = makeCallable(String.prototype.slice);
609
610 /**
611 * Returns the result of splitting of the string representation of the
612 * provided value on the provided separator according to the algorithm
613 * of String::split.
614 */
615 export const stringSplit = makeCallable(String.prototype.split);
616
617 /**
618 * Returns whether the string representation of the provided value
619 * starts with the provided search string according to the algorithm of
620 * String::startsWith.
621 */
622 export const stringStartsWith = makeCallable(
623 String.prototype.startsWith,
624 );
625
626 /**
627 * Returns the `[[StringData]]` of the provided value.
628 *
629 * ☡ This function will throw if the provided object does not have a
630 * `[[StringData]]` internal slot.
631 */
632 export const stringValue = makeCallable(String.prototype.valueOf);
633
634 /**
635 * Returns the result of stripping leading and trailing A·S·C·I·I
636 * whitespace from the provided value and collapsing other A·S·C·I·I
637 * whitespace in the string representation of the provided value.
638 */
639 export const stripAndCollapseASCIIWhitespace = ($) =>
640 stripLeadingAndTrailingASCIIWhitespace(
641 stringReplaceAll(
642 `${$}`,
643 /[\n\r\t\f ]+/gu,
644 " ",
645 ),
646 );
647
648 /**
649 * Returns the result of stripping leading and trailing A·S·C·I·I
650 * whitespace from the string representation of the provided value.
651 */
652 export const stripLeadingAndTrailingASCIIWhitespace = (() => {
653 const { exec: reExec } = RegExp.prototype;
654 return ($) =>
655 call(reExec, /^[\n\r\t\f ]*([^]*?)[\n\r\t\f ]*$/u, [$])[1];
656 })();
657
658 /**
659 * Returns a substring of the string representation of the provided
660 * value according to the algorithm of String::substring.
661 */
662 export const substring = makeCallable(String.prototype.substring);
663
664 /**
665 * Returns the result of converting the provided value to a string.
666 *
667 * ☡ This method throws for symbols and other objects without a string
668 * representation.
669 */
670 export const toString = ($) => `${$}`;
This page took 0.096179 seconds and 3 git commands to generate.