first
This commit is contained in:
187
node_modules/@rgrove/parse-xml/dist/lib/StringScanner.js
generated
vendored
Normal file
187
node_modules/@rgrove/parse-xml/dist/lib/StringScanner.js
generated
vendored
Normal file
@ -0,0 +1,187 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.StringScanner = void 0;
|
||||
const emptyString = '';
|
||||
const surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
|
||||
/** @private */
|
||||
class StringScanner {
|
||||
constructor(string) {
|
||||
this.charCount = this.charLength(string, true);
|
||||
this.charIndex = 0;
|
||||
this.length = string.length;
|
||||
this.multiByteMode = this.charCount !== this.length;
|
||||
this.string = string;
|
||||
if (this.multiByteMode) {
|
||||
let charsToBytes = [];
|
||||
// Create a mapping of character indexes to byte indexes. Since the string
|
||||
// contains multibyte characters, a byte index may not necessarily align
|
||||
// with a character index.
|
||||
for (let byteIndex = 0, charIndex = 0; charIndex < this.charCount; ++charIndex) {
|
||||
charsToBytes[charIndex] = byteIndex;
|
||||
byteIndex += string.codePointAt(byteIndex) > 65535 ? 2 : 1;
|
||||
}
|
||||
this.charsToBytes = charsToBytes;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Whether the current character index is at the end of the input string.
|
||||
*/
|
||||
get isEnd() {
|
||||
return this.charIndex >= this.charCount;
|
||||
}
|
||||
// -- Protected Methods ------------------------------------------------------
|
||||
/**
|
||||
* Returns the number of characters in the given string, which may differ from
|
||||
* the byte length if the string contains multibyte characters.
|
||||
*/
|
||||
charLength(string, multiByteSafe = this.multiByteMode) {
|
||||
// We could get the char length with `[ ...string ].length`, but that's
|
||||
// actually slower than replacing surrogate pairs with single-byte
|
||||
// characters and then counting the result.
|
||||
return multiByteSafe
|
||||
? string.replace(surrogatePair, '_').length
|
||||
: string.length;
|
||||
}
|
||||
// -- Public Methods ---------------------------------------------------------
|
||||
/**
|
||||
* Advances the scanner by the given number of characters, stopping if the end
|
||||
* of the string is reached.
|
||||
*/
|
||||
advance(count = 1) {
|
||||
this.charIndex = Math.min(this.charCount, this.charIndex + count);
|
||||
}
|
||||
/**
|
||||
* Returns the byte index of the given character index in the string. The two
|
||||
* may differ in strings that contain multibyte characters.
|
||||
*/
|
||||
charIndexToByteIndex(charIndex = this.charIndex) {
|
||||
return this.multiByteMode
|
||||
? this.charsToBytes[charIndex] ?? Infinity
|
||||
: charIndex;
|
||||
}
|
||||
/**
|
||||
* Consumes and returns the given number of characters if possible, advancing
|
||||
* the scanner and stopping if the end of the string is reached.
|
||||
*
|
||||
* If no characters could be consumed, an empty string will be returned.
|
||||
*/
|
||||
consume(charCount = 1) {
|
||||
let chars = this.peek(charCount);
|
||||
this.advance(charCount);
|
||||
return chars;
|
||||
}
|
||||
/**
|
||||
* Consumes and returns the given number of bytes if possible, advancing the
|
||||
* scanner and stopping if the end of the string is reached.
|
||||
*
|
||||
* It's up to the caller to ensure that the given byte count doesn't split a
|
||||
* multibyte character.
|
||||
*
|
||||
* If no bytes could be consumed, an empty string will be returned.
|
||||
*/
|
||||
consumeBytes(byteCount) {
|
||||
let byteIndex = this.charIndexToByteIndex();
|
||||
let result = this.string.slice(byteIndex, byteIndex + byteCount);
|
||||
this.advance(this.charLength(result));
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Consumes and returns all characters for which the given function returns
|
||||
* `true`, stopping when `false` is returned or the end of the input is
|
||||
* reached.
|
||||
*/
|
||||
consumeMatchFn(fn) {
|
||||
let { length, multiByteMode, string } = this;
|
||||
let startByteIndex = this.charIndexToByteIndex();
|
||||
let endByteIndex = startByteIndex;
|
||||
if (multiByteMode) {
|
||||
while (endByteIndex < length) {
|
||||
let char = string[endByteIndex];
|
||||
let isSurrogatePair = char >= '\uD800' && char <= '\uDBFF';
|
||||
if (isSurrogatePair) {
|
||||
char += string[endByteIndex + 1];
|
||||
}
|
||||
if (!fn(char)) {
|
||||
break;
|
||||
}
|
||||
endByteIndex += isSurrogatePair ? 2 : 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (endByteIndex < length && fn(string[endByteIndex])) {
|
||||
++endByteIndex;
|
||||
}
|
||||
}
|
||||
return this.consumeBytes(endByteIndex - startByteIndex);
|
||||
}
|
||||
/**
|
||||
* Consumes the given string if it exists at the current character index, and
|
||||
* advances the scanner.
|
||||
*
|
||||
* If the given string doesn't exist at the current character index, an empty
|
||||
* string will be returned and the scanner will not be advanced.
|
||||
*/
|
||||
consumeString(stringToConsume) {
|
||||
let { length } = stringToConsume;
|
||||
let byteIndex = this.charIndexToByteIndex();
|
||||
if (stringToConsume === this.string.slice(byteIndex, byteIndex + length)) {
|
||||
this.advance(length === 1 ? 1 : this.charLength(stringToConsume));
|
||||
return stringToConsume;
|
||||
}
|
||||
return emptyString;
|
||||
}
|
||||
/**
|
||||
* Consumes characters until the given global regex is matched, advancing the
|
||||
* scanner up to (but not beyond) the beginning of the match. If the regex
|
||||
* doesn't match, nothing will be consumed.
|
||||
*
|
||||
* Returns the consumed string, or an empty string if nothing was consumed.
|
||||
*/
|
||||
consumeUntilMatch(regex) {
|
||||
let matchByteIndex = this.string
|
||||
.slice(this.charIndexToByteIndex())
|
||||
.search(regex);
|
||||
return matchByteIndex > 0
|
||||
? this.consumeBytes(matchByteIndex)
|
||||
: emptyString;
|
||||
}
|
||||
/**
|
||||
* Consumes characters until the given string is found, advancing the scanner
|
||||
* up to (but not beyond) that point. If the string is never found, nothing
|
||||
* will be consumed.
|
||||
*
|
||||
* Returns the consumed string, or an empty string if nothing was consumed.
|
||||
*/
|
||||
consumeUntilString(searchString) {
|
||||
let byteIndex = this.charIndexToByteIndex();
|
||||
let matchByteIndex = this.string.indexOf(searchString, byteIndex);
|
||||
return matchByteIndex > 0
|
||||
? this.consumeBytes(matchByteIndex - byteIndex)
|
||||
: emptyString;
|
||||
}
|
||||
/**
|
||||
* Returns the given number of characters starting at the current character
|
||||
* index, without advancing the scanner and without exceeding the end of the
|
||||
* input string.
|
||||
*/
|
||||
peek(count = 1) {
|
||||
let { charIndex, string } = this;
|
||||
return this.multiByteMode
|
||||
? string.slice(this.charIndexToByteIndex(charIndex), this.charIndexToByteIndex(charIndex + count))
|
||||
: string.slice(charIndex, charIndex + count);
|
||||
}
|
||||
/**
|
||||
* Resets the scanner position to the given character _index_, or to the start
|
||||
* of the input string if no index is given.
|
||||
*
|
||||
* If _index_ is negative, the scanner position will be moved backward by that
|
||||
* many characters, stopping if the beginning of the string is reached.
|
||||
*/
|
||||
reset(index = 0) {
|
||||
this.charIndex = index >= 0
|
||||
? Math.min(this.charCount, index)
|
||||
: Math.max(0, this.charIndex + index);
|
||||
}
|
||||
}
|
||||
exports.StringScanner = StringScanner;
|
||||
//# sourceMappingURL=StringScanner.js.map
|
||||
Reference in New Issue
Block a user