You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.2 KiB
TypeScript
48 lines
1.2 KiB
TypeScript
const FIRST_BIT = 0x80;
|
|
const FIRST_TWO_BITS = 0xc0;
|
|
const FIRST_THREE_BITS = 0xe0;
|
|
const FIRST_FOUR_BITS = 0xf0;
|
|
const FIRST_FIVE_BITS = 0xf8;
|
|
|
|
const TWO_BIT_CHAR = 0xc0;
|
|
const THREE_BIT_CHAR = 0xe0;
|
|
const FOUR_BIT_CHAR = 0xf0;
|
|
const CONTINUING_CHAR = 0x80;
|
|
|
|
/**
|
|
* Determines if the passed in bytes are valid utf8
|
|
* @param bytes - An array of 8-bit bytes. Must be indexable and have length property
|
|
* @param start - The index to start validating
|
|
* @param end - The index to end validating
|
|
*/
|
|
export function validateUtf8(
|
|
bytes: { [index: number]: number },
|
|
start: number,
|
|
end: number
|
|
): boolean {
|
|
let continuation = 0;
|
|
|
|
for (let i = start; i < end; i += 1) {
|
|
const byte = bytes[i];
|
|
|
|
if (continuation) {
|
|
if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
|
|
return false;
|
|
}
|
|
continuation -= 1;
|
|
} else if (byte & FIRST_BIT) {
|
|
if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
|
|
continuation = 1;
|
|
} else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR) {
|
|
continuation = 2;
|
|
} else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR) {
|
|
continuation = 3;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return !continuation;
|
|
}
|