t/dataoptions.js

129 lines
3.5 KiB
JavaScript
Raw Permalink Normal View History

import { PLAINTEXT, LANGUAGES } from "./languages.js";
2022-11-10 18:05:08 +00:00
const UTF8_ENCODER = new TextEncoder();
const UTF8_DECODER = new TextDecoder();
2022-11-10 18:13:24 +00:00
class DataOptionsError extends Error {}
class DecodeError extends DataOptionsError {}
2022-11-10 18:05:08 +00:00
/**
* Data options are stored in a binary header before the payload.
2022-11-10 18:13:24 +00:00
*
2022-11-10 18:05:08 +00:00
* The header consists of a fixed section, followed by a dynamic section.
* The fixed section consists of bytes with a fixed meaning and positioning.
* The dynamic section is based on the contents of the fixed section and
* contains data related to the fixed section.
2022-11-10 18:13:24 +00:00
*
2022-11-10 18:05:08 +00:00
* Fixed section:
2022-11-10 18:13:24 +00:00
*
* Byte 1
* |xxx----y|
2022-11-10 18:05:08 +00:00
* x = amount of extra bytes (other than this byte) in the header's fixed
* section (uid)
* y = highest bit of language ID (the rest in byte 2)
2022-11-10 18:13:24 +00:00
*
* Byte 2 (+ lowest bit of byte 1) is language ID (uint), i.e. index of the
* language in the LANGUAGE_NAMES list.
2022-11-10 18:13:24 +00:00
*
2022-11-10 18:05:08 +00:00
* Byte 3 is number of bytes reserved for the page title.
2022-11-10 18:13:24 +00:00
*
2022-11-10 18:05:08 +00:00
* Dynamic section parts:
2022-11-10 18:13:24 +00:00
*
2022-11-10 18:05:08 +00:00
* 1. Bytes of page title (UTF-8).
2022-11-10 18:13:24 +00:00
*
* ---
2022-11-10 18:13:24 +00:00
*
* NOTE: If options are set to their default values, the header is minimised
* to not include those bytes if possible.
*/
export class DataOptions {
language = PLAINTEXT;
2022-11-10 18:05:08 +00:00
title = "";
/**
* Parse options from uncompressed bytes.
* @param {Uint8Array} data
* @returns {Uint8Array} The data without the header.
*/
parseFrom(data) {
const byte1 = data[0];
2022-11-10 18:05:08 +00:00
const fixedHeaderBytes = ((byte1 & 0b11100000) >>> 5) + 1;
2022-11-10 18:05:08 +00:00
if (fixedHeaderBytes >= 2) {
const languageIDLowByte = data[1];
2022-11-10 18:13:24 +00:00
const languageIDHighBit = byte1 & 0b00000001;
const languageID = (languageIDHighBit << 8) | languageIDLowByte;
if (LANGUAGES.has(languageID)) {
this.language = LANGUAGES.get(languageID);
} else {
this.language = PLAINTEXT;
}
} else {
this.language = PLAINTEXT;
}
2022-11-10 18:05:08 +00:00
let titleLen = 0;
if (fixedHeaderBytes >= 3) {
titleLen = data[2];
if (titleLen > data) {
throw new DecodeError(
`Title length ${titleLen} was bigger than chunk size ${data.length}.`
);
}
this.title = UTF8_DECODER.decode(
data.subarray(fixedHeaderBytes, fixedHeaderBytes + titleLen)
);
} else {
this.title = "";
}
return data.subarray(fixedHeaderBytes + titleLen);
}
/**
* Serialize options to uncompressed bytes.
* @param {Uint8Array} data
* @returns {Uint8Array} Data with the options in a header.
*/
serializeTo(data) {
let byte1LowBit = null;
2022-11-10 18:05:08 +00:00
const fixedBytes = [];
const dynamicBytes = [];
const hasTitleBytes = this.title !== "";
const hasLanguageBytes = this.language !== PLAINTEXT || hasTitleBytes;
2022-11-10 18:05:08 +00:00
if (hasLanguageBytes) {
const languageID = LANGUAGES.get(this.language);
const languageIDLowByte = languageID & 0b011111111;
const languageIDHighBit = languageID & 0b100000000;
byte1LowBit = languageIDHighBit >>> 8;
2022-11-10 18:05:08 +00:00
fixedBytes.push(languageIDLowByte);
}
if (hasTitleBytes) {
const languageBytes = UTF8_ENCODER.encode(this.title);
fixedBytes.push(languageBytes.length);
dynamicBytes.push(...languageBytes);
}
2022-11-10 18:05:08 +00:00
let byte1 = (fixedBytes.length & 0b00000111) << 5;
if (byte1LowBit !== null) {
byte1 |= byte1LowBit;
}
2022-11-10 18:05:08 +00:00
const headerBytes = new Uint8Array([byte1, ...fixedBytes, ...dynamicBytes]);
2022-11-10 18:05:08 +00:00
const combined = new Uint8Array(headerBytes.length + data.length);
combined.set(headerBytes, 0);
combined.set(data, headerBytes.length);
return combined;
}
}