X-Git-Url: https://git.ladys.computer/Pisces/blobdiff_plain/b353e413a079066e14c779fc1e203e27ffcd92dd..6e6d4e3261c1c943fe44fa9e381bcf8bf1441fd6:/binary.js diff --git a/binary.js b/binary.js index 272775f..5ee2bda 100644 --- a/binary.js +++ b/binary.js @@ -1,7 +1,7 @@ // ♓🌟 Piscēs ∷ binary.js // ==================================================================== // -// Copyright © 2020–2022 Lady [@ Lady’s Computer]. +// Copyright © 2020–2023 Lady [@ Lady’s Computer]. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -68,6 +68,10 @@ const { setUint16: viewSetUint16, } = viewPrototype; +/** + * Returns an ArrayBuffer for encoding generated from the provided + * arguments. + */ const bufferFromArgs = ($, $s) => $ instanceof Buffer ? $ @@ -102,6 +106,174 @@ const bufferFromArgs = ($, $s) => : `${$}`, ); +/** + * Returns the result of decoding the provided base16 string into an + * ArrayBuffer. + * + * ※ This function is not exposed. + */ +const decodeBase16 = (source) => { + const u4s = map( + source, + (ucsCharacter) => { + const code = getCodeUnit(ucsCharacter, 0); + const result = code >= 0x30 && code <= 0x39 + ? code - 48 + : code >= 0x41 && code <= 0x46 + ? code - 55 + : code >= 0x61 && code <= 0x66 + ? code - 87 + : -1; + if (result < 0) { + throw new RangeError( + `Piscēs: Invalid character in Base64: ${ucsCharacter}.`, + ); + } else { + return result; + } + }, + ); + const { length } = u4s; + if (length % 2 == 1) { + // The length is such that an entire letter would be dropped during + // a forgiving decode. + throw new RangeError( + `Piscēs: Base16 string has invalid length: ${source}.`, + ); + } else { + // Every letter contributes at least some bits to the result. + const dataView = new View(new Buffer(floor(length / 2))); + for (let index = 0; index < length - 1;) { + call(viewSetUint8, dataView, [ + floor(index / 2), + (u4s[index] << 4) | u4s[++index, index++], + ]); + } + return call(getViewBuffer, dataView, []); + } +}; + +/** + * Returns the result of decoding the provided base32 string into an + * ArrayBuffer. + * + * If the second argument is truthy, uses Crockford’s encoding rather + * than the RFC’s (see ). This + * is more human‐friendly and tolerant. Check digits are not supported. + * + * ※ This function is not exposed. + */ +const decodeBase32 = (source, wrmg) => { + const u5s = map( + wrmg + ? stringReplace(source, /-/gu, "") + : source.length % 8 == 0 + ? stringReplace(source, /(?:=|={3,4}|={6})$/u, "") + : source, + (ucsCharacter) => { + const code = getCodeUnit(ucsCharacter, 0); + const result = wrmg + ? code >= 0x30 && code <= 0x39 + ? code - 48 + : code >= 0x41 && code <= 0x48 + ? code - 55 + : code == 0x49 + ? 1 // I + : code >= 0x4A && code <= 0x4B + ? code - 56 + : code == 0x4C + ? 1 // L + : code >= 0x4D && code <= 0x4E + ? code - 57 + : code == 0x4F + ? 0 // O + : code >= 0x50 && code <= 0x54 + ? code - 58 + // U is skipped + : code >= 0x56 && code <= 0x5A + ? code - 59 + : code >= 0x61 && code <= 0x68 + ? code - 87 + : code == 0x69 + ? 1 // i + : code >= 0x6A && code <= 0x6B + ? code - 88 + : code == 0x6C + ? 1 // l + : code >= 0x6D && code <= 0x6E + ? code - 89 + : code == 0x6F + ? 0 // o + : code >= 0x70 && code <= 0x74 + ? code - 90 + // u is skipped + : code >= 0x76 && code <= 0x7A + ? code - 91 + : -1 + : code >= 0x41 && code <= 0x5A + ? code - 65 + : code >= 0x61 && code <= 0x7A + ? code - 97 // same result as above; case insensitive + : code >= 0x32 && code <= 0x37 + ? code - 24 // digits 2–7 map to 26–31 + : -1; + if (result < 0) { + throw new RangeError( + `Piscēs: Invalid character in Base32: ${ucsCharacter}.`, + ); + } else { + return result; + } + }, + ); + const { length } = u5s; + const lengthMod8 = length % 8; + if (lengthMod8 == 1 || lengthMod8 == 3 || lengthMod8 == 6) { + // The length is such that an entire letter would be dropped during + // a forgiving decode. + throw new RangeError( + `Piscēs: Base32 string has invalid length: ${source}.`, + ); + } else { + // Every letter contributes at least some bits to the result. + const dataView = new View(new Buffer(floor(length * 5 / 8))); + for (let index = 0; index < length - 1;) { + // The final index is not handled; if the string is not divisible + // by 8, some bits might be dropped. This matches the “forgiving + // decode” behaviour specified by WhatW·G for base64. + const dataIndex = ceil(index * 5 / 8); + const remainder = index % 8; + if (remainder == 0) { + call(viewSetUint8, dataView, [ + dataIndex, + u5s[index] << 3 | u5s[++index] >> 2, + ]); + } else if (remainder == 1) { + call(viewSetUint8, dataView, [ + dataIndex, + u5s[index] << 6 | u5s[++index] << 1 | u5s[++index] >> 4, + ]); + } else if (remainder == 3) { + call(viewSetUint8, dataView, [ + dataIndex, + u5s[index] << 4 | u5s[++index] >> 1, + ]); + } else if (remainder == 4) { + call(viewSetUint8, dataView, [ + dataIndex, + u5s[index] << 7 | u5s[++index] << 2 | u5s[++index] >> 3, + ]); + } else { // remainder == 6 + call(viewSetUint8, dataView, [ + dataIndex, + u5s[index] << 5 | u5s[++index, index++], + ]); + } + } + return call(getViewBuffer, dataView, []); + } +}; + /** * Returns the result of decoding the provided base64 string into an * ArrayBuffer. @@ -128,7 +300,7 @@ const decodeBase64 = (source, safe = false) => { : -1; if (result < 0) { throw new RangeError( - `Piscēs: Invalid character in Base64: ${character}.`, + `Piscēs: Invalid character in Base64: ${ucsCharacter}.`, ); } else { return result; @@ -136,28 +308,158 @@ const decodeBase64 = (source, safe = false) => { }, ); const { length } = u6s; - const dataView = new View(new Buffer(floor(length * 3 / 4))); - for (let index = 0; index < length - 1;) { - const dataIndex = ceil(index * 3 / 4); - const remainder = index % 3; - if (remainder == 0) { - call(viewSetUint8, dataView, [ - dataIndex, - (u6s[index] << 2) + (u6s[++index] >> 4), - ]); - } else if (remainder == 1) { - call(viewSetUint8, dataView, [ - dataIndex, - ((u6s[index] & 0xF) << 4) + (u6s[++index] >> 2), - ]); + if (length % 4 == 1) { + // The length is such that an entire letter would be dropped during + // a forgiving decode. + throw new RangeError( + `Piscēs: Base64 string has invalid length: ${source}.`, + ); + } else { + // Every letter contributes at least some bits to the result. + const dataView = new View(new Buffer(floor(length * 3 / 4))); + for (let index = 0; index < length - 1;) { + // The final index is not handled; if the string is not divisible + // by 4, some bits might be dropped. This matches the “forgiving + // decode” behaviour specified by WhatW·G for base64. + const dataIndex = ceil(index * 3 / 4); + const remainder = index % 4; + if (remainder == 0) { + call(viewSetUint8, dataView, [ + dataIndex, + u6s[index] << 2 | u6s[++index] >> 4, + ]); + } else if (remainder == 1) { + call(viewSetUint8, dataView, [ + dataIndex, + u6s[index] << 4 | u6s[++index] >> 2, + ]); + } else { // remainder == 2 + call(viewSetUint8, dataView, [ + dataIndex, + u6s[index] << 6 | u6s[++index, index++], + ]); + } + } + return call(getViewBuffer, dataView, []); + } +}; + +/** + * Returns the result of encoding the provided ArrayBuffer into a + * base16 string. + * + * ※ This function is not exposed. + */ +const encodeBase16 = (buffer) => { + const dataView = new View(buffer); + const byteLength = call(getBufferByteLength, buffer, []); + const minimumLengthOfResults = byteLength * 2; + const resultingCodeUnits = fill( + objectCreate( + binaryCodeUnitIterablePrototype, + { length: { value: minimumLengthOfResults } }, + ), + 0x3D, + ); + for (let index = 0; index < byteLength;) { + const codeUnitIndex = index * 2; + const datum = call(viewGetUint8, dataView, [index++]); + const u4s = [datum >> 4, datum & 0xF]; + for (let u4i = 0; u4i < 2; ++u4i) { + const u4 = u4s[u4i]; + const result = u4 < 10 ? u4 + 48 : u4 < 16 ? u4 + 55 : -1; + if (result < 0) { + throw new RangeError( + `Piscēs: Unexpected Base16 value: ${u4}.`, + ); + } else { + resultingCodeUnits[codeUnitIndex + u4i] = result; + } + } + } + return stringFromCodeUnits(...resultingCodeUnits); +}; + +/** + * Returns the result of encoding the provided ArrayBuffer into a + * base32 string. + * + * ※ This function is not exposed. + */ +const encodeBase32 = (buffer, wrmg = false) => { + const dataView = new View(buffer); + const byteLength = call(getBufferByteLength, buffer, []); + const minimumLengthOfResults = ceil(byteLength * 8 / 5); + const fillByte = wrmg ? 0x2D : 0x3D; + const resultingCodeUnits = fill( + objectCreate( + binaryCodeUnitIterablePrototype, + { + length: { + value: minimumLengthOfResults + + (8 - (minimumLengthOfResults % 8)) % 8, + }, + }, + ), + fillByte, + ); + for (let index = 0; index < byteLength;) { + const codeUnitIndex = ceil(index * 8 / 5); + const currentIndex = codeUnitIndex + +( + 0b01011 & 1 << index % 5 && + resultingCodeUnits[codeUnitIndex] != fillByte + ); // bytes 0, 1 & 3 handle two letters; this is for the second + const remainder = currentIndex % 8; + const currentByte = call(viewGetUint8, dataView, [index]); + const nextByte = + 0b01011010 & 1 << remainder && ++index < byteLength + // digits 1, 3, 4 & 6 span multiple bytes + ? call(viewGetUint8, dataView, [index]) + : 0; + const u5 = remainder == 0 + ? currentByte >> 3 + : remainder == 1 + ? (currentByte & 0b00000111) << 2 | nextByte >> 6 + : remainder == 2 + ? (currentByte & 0b00111111) >> 1 + : remainder == 3 + ? (currentByte & 0b00000001) << 4 | nextByte >> 4 + : remainder == 4 + ? (currentByte & 0b00001111) << 1 | nextByte >> 7 + : remainder == 5 + ? (currentByte & 0b01111111) >> 2 + : remainder == 6 + ? (currentByte & 0b00000011) << 3 | nextByte >> 5 + : (++index, currentByte & 0b00011111); // remainder == 7 + const result = wrmg + ? u5 < 10 ? u5 + 48 : u5 < 18 + ? u5 + 55 + // skip I + : u5 < 20 + ? u5 + 56 + // skip L + : u5 < 22 + ? u5 + 57 + // skip O + : u5 < 27 + ? u5 + 58 + // skip U + : u5 < 32 + ? u5 + 59 + : -1 + : u5 < 26 + ? u5 + 65 + : u5 < 32 + ? u5 + 24 + : -1; + if (result < 0) { + throw new RangeError(`Piscēs: Unexpected Base32 value: ${u5}.`); } else { - call(viewSetUint8, dataView, [ - dataIndex, - ((u6s[index] & 0x3) << 6) + u6s[++index], - ]); + resultingCodeUnits[currentIndex] = result; } } - return call(getViewBuffer, dataView, []); + const answer = stringFromCodeUnits(...resultingCodeUnits); + return wrmg ? answer.replace(/-+$/u, "") : answer; }; /** @@ -186,21 +488,20 @@ const encodeBase64 = (buffer, safe = false) => { const codeUnitIndex = ceil(index * 4 / 3); const currentIndex = codeUnitIndex + +( index % 3 == 0 && resultingCodeUnits[codeUnitIndex] != 0x3D - ); + ); // every third byte handles two letters; this is for the second const remainder = currentIndex % 4; + const currentByte = call(viewGetUint8, dataView, [index]); + const nextByte = remainder % 3 && ++index < byteLength + // digits 1 & 2 span multiple bytes + ? call(viewGetUint8, dataView, [index]) + : 0; const u6 = remainder == 0 - ? call(viewGetUint8, dataView, [index]) >> 2 + ? currentByte >> 2 : remainder == 1 - ? ((call(viewGetUint8, dataView, [index++]) & 0x3) << 4) + - (index < byteLength - ? call(viewGetUint8, dataView, [index]) >> 4 - : 0) + ? (currentByte & 0b00000011) << 4 | nextByte >> 4 : remainder == 2 - ? ((call(viewGetUint8, dataView, [index++]) & 0xF) << 2) + - (index < byteLength - ? call(viewGetUint8, dataView, [index]) >> 6 - : 0) - : call(viewGetUint8, dataView, [index++]) & 0x3F; + ? (currentByte & 0b00001111) << 2 | nextByte >> 6 + : (++index, currentByte & 0b00111111); // remainder == 3 const result = u6 < 26 ? u6 + 65 : u6 < 52 @@ -241,11 +542,58 @@ const sourceFromArgs = ($, $s) => "", ); +/** + * Returns an ArrayBuffer generated from the provided base16 string. + * + * This function can also be used as a tag for a template literal. The + * literal will be interpreted akin to `String.raw`. + * + * ☡ This function throws if the provided string is not a valid base16 + * string. + */ +export const base16Binary = ($, ...$s) => + decodeBase16(sourceFromArgs($, $s)); + +/** + * Returns a (big‐endian) base16 string created from the provided typed + * array, buffer, or (16‐bit) string. + * + * This function can also be used as a tag for a template literal. The + * literal will be interpreted akin to `String.raw`. + */ +export const base16String = ($, ...$s) => + encodeBase16(bufferFromArgs($, $s)); + +/** + * Returns an ArrayBuffer generated from the provided base32 string. + * + * This function can also be used as a tag for a template literal. The + * literal will be interpreted akin to `String.raw`. + * + * ☡ This function throws if the provided string is not a valid base32 + * string. + */ +export const base32Binary = ($, ...$s) => + decodeBase32(sourceFromArgs($, $s)); + +/** + * Returns a (big‐endian) base32 string created from the provided typed + * array, buffer, or (16‐bit) string. + * + * This function can also be used as a tag for a template literal. The + * literal will be interpreted akin to `String.raw`. + */ +export const base32String = ($, ...$s) => + encodeBase32(bufferFromArgs($, $s)); + /** * Returns an ArrayBuffer generated from the provided base64 string. * * This function can also be used as a tag for a template literal. The * literal will be interpreted akin to `String.raw`. + * + * ☡ This function throws if the provided string is not a valid base64 + * string. */ export const base64Binary = ($, ...$s) => decodeBase64(sourceFromArgs($, $s)); @@ -266,6 +614,9 @@ export const base64String = ($, ...$s) => * * This function can also be used as a tag for a template literal. The * literal will be interpreted akin to `String.raw`. + * + * ☡ This function throws if the provided string is not a valid + * filename‐safe base64 string. */ export const filenameSafeBase64Binary = ($, ...$s) => decodeBase64(sourceFromArgs($, $s), true); @@ -280,6 +631,41 @@ export const filenameSafeBase64Binary = ($, ...$s) => export const filenameSafeBase64String = ($, ...$s) => encodeBase64(bufferFromArgs($, $s), true); +/** + * Returns whether the provided value is a base16 string. + * + * ※ This function returns false if the provided value is not a string + * primitive. + */ +export const isBase16 = ($) => { + if (typeof $ !== "string") { + return false; + } else { + const source = stringReplace($, /[\t\n\f\r ]+/gu, ""); + return source.length % 2 != 1 && + call(reExec, /[^0-9A-F]/iu, [source]) == null; + } +}; + +/** + * Returns whether the provided value is a base32 string. + * + * ※ This function returns false if the provided value is not a string + * primitive. + */ +export const isBase32 = ($) => { + if (typeof $ !== "string") { + return false; + } else { + const source = stringReplace($, /[\t\n\f\r ]+/gu, ""); + const trimmed = source.length % 8 == 0 + ? stringReplace(source, /(?:=|={3,4}|={6})$/u, "") + : source; + return trimmed.length % 8 != 1 && + call(reExec, /[^2-7A-Z/]/iu, [trimmed]) == null; + } +}; + /** * Returns whether the provided value is a Base64 string. * @@ -317,3 +703,44 @@ export const isFilenameSafeBase64 = ($) => { call(reExec, /[^0-9A-Za-z_-]/u, [trimmed]) == null; } }; + +/** + * Returns whether the provided value is a W·R·M·G (Crockford) base32 + * string. Check digits are not supported. + * + * ※ This function returns false if the provided value is not a string + * primitive. + */ +export const isWRMGBase32 = ($) => { + if (typeof $ !== "string") { + return false; + } else { + const source = stringReplace($, /[\t\n\f\r ]+/gu, ""); + const trimmed = stringReplace(source, /-/gu, ""); + return trimmed.length % 8 != 1 && + call(reExec, /[^0-9A-TV-Z]/iu, [trimmed]) == null; + } +}; + +/** + * Returns an ArrayBuffer generated from the provided W·R·M·G + * (Crockford) base32 string. + * + * This function can also be used as a tag for a template literal. The + * literal will be interpreted akin to `String.raw`. + * + * ☡ This function throws if the provided string is not a valid W·R·M·G + * base32 string. + */ +export const wrmgBase32Binary = ($, ...$s) => + decodeBase32(sourceFromArgs($, $s), true); + +/** + * Returns a (big‐endian) W·R·M·G (Crockford) base32 string created + * from the provided typed array, buffer, or (16‐bit) string. + * + * This function can also be used as a tag for a template literal. The + * literal will be interpreted akin to `String.raw`. + */ +export const wrmgBase32String = ($, ...$s) => + encodeBase32(bufferFromArgs($, $s), true);