first
This commit is contained in:
33
node_modules/@rgrove/parse-xml/src/index.ts
generated
vendored
Normal file
33
node_modules/@rgrove/parse-xml/src/index.ts
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
import { Parser } from './lib/Parser.js';
|
||||
|
||||
import type { ParserOptions } from './lib/Parser.js';
|
||||
|
||||
export * from './lib/types.js';
|
||||
export { XmlCdata } from './lib/XmlCdata.js';
|
||||
export { XmlComment } from './lib/XmlComment.js';
|
||||
export { XmlDeclaration } from './lib/XmlDeclaration.js';
|
||||
export { XmlDocument } from './lib/XmlDocument.js';
|
||||
export { XmlDocumentType } from './lib/XmlDocumentType.js';
|
||||
export { XmlElement } from './lib/XmlElement.js';
|
||||
export { XmlError } from './lib/XmlError.js';
|
||||
export { XmlNode } from './lib/XmlNode.js';
|
||||
export { XmlProcessingInstruction } from './lib/XmlProcessingInstruction.js';
|
||||
export { XmlText } from './lib/XmlText.js';
|
||||
|
||||
export type { ParserOptions } from './lib/Parser.js';
|
||||
|
||||
/**
|
||||
* Parses the given XML string and returns an `XmlDocument` instance
|
||||
* representing the document tree.
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* import { parseXml } from '@rgrove/parse-xml';
|
||||
* let doc = parseXml('<kittens fuzzy="yes">I like fuzzy kittens.</kittens>');
|
||||
*
|
||||
* @param xml XML string to parse.
|
||||
* @param options Parser options.
|
||||
*/
|
||||
export function parseXml(xml: string, options?: ParserOptions) {
|
||||
return (new Parser(xml, options)).document;
|
||||
}
|
||||
906
node_modules/@rgrove/parse-xml/src/lib/Parser.ts
generated
vendored
Normal file
906
node_modules/@rgrove/parse-xml/src/lib/Parser.ts
generated
vendored
Normal file
@ -0,0 +1,906 @@
|
||||
import { StringScanner } from './StringScanner.js';
|
||||
import * as syntax from './syntax.js';
|
||||
import { XmlCdata } from './XmlCdata.js';
|
||||
import { XmlComment } from './XmlComment.js';
|
||||
import { XmlDeclaration } from './XmlDeclaration.js';
|
||||
import { XmlDocument } from './XmlDocument.js';
|
||||
import { XmlDocumentType } from './XmlDocumentType.js';
|
||||
import { XmlElement } from './XmlElement.js';
|
||||
import { XmlError } from './XmlError.js';
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
||||
import { XmlText } from './XmlText.js';
|
||||
|
||||
const emptyString = '';
|
||||
|
||||
/**
|
||||
* Parses an XML string into an `XmlDocument`.
|
||||
*
|
||||
* @private
|
||||
*/
|
||||
export class Parser {
|
||||
readonly document: XmlDocument;
|
||||
|
||||
private currentNode: XmlDocument | XmlElement;
|
||||
private readonly options: ParserOptions;
|
||||
private readonly scanner: StringScanner;
|
||||
|
||||
/**
|
||||
* @param xml XML string to parse.
|
||||
* @param options Parser options.
|
||||
*/
|
||||
constructor(xml: string, options: ParserOptions = {}) {
|
||||
let doc = this.document = new XmlDocument();
|
||||
|
||||
this.currentNode = doc;
|
||||
this.options = options;
|
||||
this.scanner = new StringScanner(xml);
|
||||
|
||||
if (this.options.includeOffsets) {
|
||||
doc.start = 0;
|
||||
doc.end = xml.length;
|
||||
}
|
||||
|
||||
this.parse();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given `XmlNode` as a child of `this.currentNode`.
|
||||
*/
|
||||
addNode(node: XmlNode, charIndex: number) {
|
||||
node.parent = this.currentNode;
|
||||
|
||||
if (this.options.includeOffsets) {
|
||||
node.start = this.scanner.charIndexToByteIndex(charIndex);
|
||||
node.end = this.scanner.charIndexToByteIndex();
|
||||
}
|
||||
|
||||
// @ts-expect-error: XmlDocument has a more limited set of possible children
|
||||
// than XmlElement so TypeScript is unhappy, but we always do the right
|
||||
// thing.
|
||||
this.currentNode.children.push(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given _text_ to the document, either by appending it to a
|
||||
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
|
||||
*/
|
||||
addText(text: string, charIndex: number) {
|
||||
let { children } = this.currentNode;
|
||||
let { length } = children;
|
||||
|
||||
text = normalizeLineBreaks(text);
|
||||
|
||||
if (length > 0) {
|
||||
let prevNode = children[length - 1];
|
||||
|
||||
if (prevNode?.type === XmlNode.TYPE_TEXT) {
|
||||
let textNode = prevNode as XmlText;
|
||||
|
||||
// The previous node is a text node, so we can append to it and avoid
|
||||
// creating another node.
|
||||
textNode.text += text;
|
||||
|
||||
if (this.options.includeOffsets) {
|
||||
textNode.end = this.scanner.charIndexToByteIndex();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return this.addNode(new XmlText(text), charIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes element attributes.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
|
||||
*/
|
||||
consumeAttributes(): Record<string, string> {
|
||||
let attributes = Object.create(null);
|
||||
|
||||
while (this.consumeWhitespace()) {
|
||||
let attrName = this.consumeName();
|
||||
|
||||
if (!attrName) {
|
||||
break;
|
||||
}
|
||||
|
||||
let attrValue = this.consumeEqual() && this.consumeAttributeValue();
|
||||
|
||||
if (attrValue === false) {
|
||||
throw this.error('Attribute value expected');
|
||||
}
|
||||
|
||||
if (attrName in attributes) {
|
||||
throw this.error(`Duplicate attribute: ${attrName}`);
|
||||
}
|
||||
|
||||
if (attrName === 'xml:space'
|
||||
&& attrValue !== 'default'
|
||||
&& attrValue !== 'preserve') {
|
||||
|
||||
throw this.error('Value of the `xml:space` attribute must be "default" or "preserve"');
|
||||
}
|
||||
|
||||
attributes[attrName] = attrValue;
|
||||
}
|
||||
|
||||
if (this.options.sortAttributes) {
|
||||
let attrNames = Object.keys(attributes).sort();
|
||||
let sortedAttributes = Object.create(null);
|
||||
|
||||
for (let i = 0; i < attrNames.length; ++i) {
|
||||
let attrName = attrNames[i] as string;
|
||||
sortedAttributes[attrName] = attributes[attrName];
|
||||
}
|
||||
|
||||
attributes = sortedAttributes;
|
||||
}
|
||||
|
||||
return attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes an `AttValue` (attribute value) if possible.
|
||||
*
|
||||
* @returns
|
||||
* Contents of the `AttValue` minus quotes, or `false` if nothing was
|
||||
* consumed. An empty string indicates that an `AttValue` was consumed but
|
||||
* was empty.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
||||
*/
|
||||
consumeAttributeValue(): string | false {
|
||||
let { scanner } = this;
|
||||
let quote = scanner.peek();
|
||||
|
||||
if (quote !== '"' && quote !== "'") {
|
||||
return false;
|
||||
}
|
||||
|
||||
scanner.advance();
|
||||
|
||||
let chars;
|
||||
let isClosed = false;
|
||||
let value = emptyString;
|
||||
let regex = quote === '"'
|
||||
? syntax.attValueCharDoubleQuote
|
||||
: syntax.attValueCharSingleQuote;
|
||||
|
||||
matchLoop: while (!scanner.isEnd) {
|
||||
chars = scanner.consumeUntilMatch(regex);
|
||||
|
||||
if (chars) {
|
||||
this.validateChars(chars);
|
||||
value += chars.replace(syntax.attValueNormalizedWhitespace, ' ');
|
||||
}
|
||||
|
||||
switch (scanner.peek()) {
|
||||
case quote:
|
||||
isClosed = true;
|
||||
break matchLoop;
|
||||
|
||||
case '&':
|
||||
value += this.consumeReference();
|
||||
continue;
|
||||
|
||||
case '<':
|
||||
throw this.error('Unescaped `<` is not allowed in an attribute value');
|
||||
|
||||
default:
|
||||
break matchLoop;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isClosed) {
|
||||
throw this.error('Unclosed attribute');
|
||||
}
|
||||
|
||||
scanner.advance();
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a CDATA section if possible.
|
||||
*
|
||||
* @returns Whether a CDATA section was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
|
||||
*/
|
||||
consumeCdataSection(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
if (!scanner.consumeString('<![CDATA[')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let text = scanner.consumeUntilString(']]>');
|
||||
this.validateChars(text);
|
||||
|
||||
if (!scanner.consumeString(']]>')) {
|
||||
throw this.error('Unclosed CDATA section');
|
||||
}
|
||||
|
||||
return this.options.preserveCdata
|
||||
? this.addNode(new XmlCdata(normalizeLineBreaks(text)), startIndex)
|
||||
: this.addText(text, startIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes character data if possible.
|
||||
*
|
||||
* @returns Whether character data was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
|
||||
*/
|
||||
consumeCharData(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
let charData = scanner.consumeUntilMatch(syntax.endCharData);
|
||||
|
||||
if (!charData) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.validateChars(charData);
|
||||
|
||||
if (scanner.peek(3) === ']]>') {
|
||||
throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
|
||||
}
|
||||
|
||||
return this.addText(charData, startIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a comment if possible.
|
||||
*
|
||||
* @returns Whether a comment was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
|
||||
*/
|
||||
consumeComment(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
if (!scanner.consumeString('<!--')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let content = scanner.consumeUntilString('--');
|
||||
this.validateChars(content);
|
||||
|
||||
if (!scanner.consumeString('-->')) {
|
||||
if (scanner.peek(2) === '--') {
|
||||
throw this.error("The string `--` isn't allowed inside a comment");
|
||||
}
|
||||
|
||||
throw this.error('Unclosed comment');
|
||||
}
|
||||
|
||||
return this.options.preserveComments
|
||||
? this.addNode(new XmlComment(normalizeLineBreaks(content)), startIndex)
|
||||
: true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a reference in a content context if possible.
|
||||
*
|
||||
* This differs from `consumeReference()` in that a consumed reference will be
|
||||
* added to the document as a text node instead of returned.
|
||||
*
|
||||
* @returns Whether a reference was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
|
||||
*/
|
||||
consumeContentReference(): boolean {
|
||||
let startIndex = this.scanner.charIndex;
|
||||
let ref = this.consumeReference();
|
||||
|
||||
return ref
|
||||
? this.addText(ref, startIndex)
|
||||
: false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a doctype declaration if possible.
|
||||
*
|
||||
* This is a loose implementation since doctype declarations are currently
|
||||
* discarded without further parsing.
|
||||
*
|
||||
* @returns Whether a doctype declaration was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
|
||||
*/
|
||||
consumeDoctypeDeclaration(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
if (!scanner.consumeString('<!DOCTYPE')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let name = this.consumeWhitespace()
|
||||
&& this.consumeName();
|
||||
|
||||
if (!name) {
|
||||
throw this.error('Expected a name');
|
||||
}
|
||||
|
||||
let publicId;
|
||||
let systemId;
|
||||
|
||||
if (this.consumeWhitespace()) {
|
||||
if (scanner.consumeString('PUBLIC')) {
|
||||
publicId = this.consumeWhitespace()
|
||||
&& this.consumePubidLiteral();
|
||||
|
||||
if (publicId === false) {
|
||||
throw this.error('Expected a public identifier');
|
||||
}
|
||||
|
||||
this.consumeWhitespace();
|
||||
}
|
||||
|
||||
if (publicId !== undefined || scanner.consumeString('SYSTEM')) {
|
||||
this.consumeWhitespace();
|
||||
systemId = this.consumeSystemLiteral();
|
||||
|
||||
if (systemId === false) {
|
||||
throw this.error('Expected a system identifier');
|
||||
}
|
||||
|
||||
this.consumeWhitespace();
|
||||
}
|
||||
}
|
||||
|
||||
let internalSubset;
|
||||
|
||||
if (scanner.consumeString('[')) {
|
||||
// The internal subset may contain comments that contain `]` characters,
|
||||
// so we can't use `consumeUntilString()` here.
|
||||
internalSubset = scanner.consumeUntilMatch(/\][\x20\t\r\n]*>/);
|
||||
|
||||
if (!scanner.consumeString(']')) {
|
||||
throw this.error('Unclosed internal subset');
|
||||
}
|
||||
|
||||
this.consumeWhitespace();
|
||||
}
|
||||
|
||||
if (!scanner.consumeString('>')) {
|
||||
throw this.error('Unclosed doctype declaration');
|
||||
}
|
||||
|
||||
return this.options.preserveDocumentType
|
||||
? this.addNode(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex)
|
||||
: true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes an element if possible.
|
||||
*
|
||||
* @returns Whether an element was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
|
||||
*/
|
||||
consumeElement(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
if (!scanner.consumeString('<')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let name = this.consumeName();
|
||||
|
||||
if (!name) {
|
||||
scanner.reset(startIndex);
|
||||
return false;
|
||||
}
|
||||
|
||||
let attributes = this.consumeAttributes();
|
||||
let isEmpty = !!scanner.consumeString('/>');
|
||||
let element = new XmlElement(name, attributes);
|
||||
|
||||
element.parent = this.currentNode;
|
||||
|
||||
if (!isEmpty) {
|
||||
if (!scanner.consumeString('>')) {
|
||||
throw this.error(`Unclosed start tag for element \`${name}\``);
|
||||
}
|
||||
|
||||
this.currentNode = element;
|
||||
|
||||
do {
|
||||
this.consumeCharData();
|
||||
} while (
|
||||
this.consumeElement()
|
||||
|| this.consumeContentReference()
|
||||
|| this.consumeCdataSection()
|
||||
|| this.consumeProcessingInstruction()
|
||||
|| this.consumeComment()
|
||||
);
|
||||
|
||||
let endTagMark = scanner.charIndex;
|
||||
let endTagName;
|
||||
|
||||
if (!scanner.consumeString('</')
|
||||
|| !(endTagName = this.consumeName())
|
||||
|| endTagName !== name) {
|
||||
|
||||
scanner.reset(endTagMark);
|
||||
throw this.error(`Missing end tag for element ${name}`);
|
||||
}
|
||||
|
||||
this.consumeWhitespace();
|
||||
|
||||
if (!scanner.consumeString('>')) {
|
||||
throw this.error(`Unclosed end tag for element ${name}`);
|
||||
}
|
||||
|
||||
this.currentNode = element.parent;
|
||||
}
|
||||
|
||||
return this.addNode(element, startIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes an `Eq` production if possible.
|
||||
*
|
||||
* @returns Whether an `Eq` production was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
|
||||
*/
|
||||
consumeEqual(): boolean {
|
||||
this.consumeWhitespace();
|
||||
|
||||
if (this.scanner.consumeString('=')) {
|
||||
this.consumeWhitespace();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes `Misc` content if possible.
|
||||
*
|
||||
* @returns Whether anything was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
|
||||
*/
|
||||
consumeMisc(): boolean {
|
||||
return this.consumeComment()
|
||||
|| this.consumeProcessingInstruction()
|
||||
|| this.consumeWhitespace();
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes one or more `Name` characters if possible.
|
||||
*
|
||||
* @returns `Name` characters, or an empty string if none were consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
|
||||
*/
|
||||
consumeName(): string {
|
||||
return syntax.isNameStartChar(this.scanner.peek())
|
||||
? this.scanner.consumeMatchFn(syntax.isNameChar)
|
||||
: emptyString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a processing instruction if possible.
|
||||
*
|
||||
* @returns Whether a processing instruction was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
|
||||
*/
|
||||
consumeProcessingInstruction(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
if (!scanner.consumeString('<?')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let name = this.consumeName();
|
||||
|
||||
if (name) {
|
||||
if (name.toLowerCase() === 'xml') {
|
||||
scanner.reset(startIndex);
|
||||
throw this.error("XML declaration isn't allowed here");
|
||||
}
|
||||
} else {
|
||||
throw this.error('Invalid processing instruction');
|
||||
}
|
||||
|
||||
if (!this.consumeWhitespace()) {
|
||||
if (scanner.consumeString('?>')) {
|
||||
return this.addNode(new XmlProcessingInstruction(name), startIndex);
|
||||
}
|
||||
|
||||
throw this.error('Whitespace is required after a processing instruction name');
|
||||
}
|
||||
|
||||
let content = scanner.consumeUntilString('?>');
|
||||
this.validateChars(content);
|
||||
|
||||
if (!scanner.consumeString('?>')) {
|
||||
throw this.error('Unterminated processing instruction');
|
||||
}
|
||||
|
||||
return this.addNode(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a prolog if possible.
|
||||
*
|
||||
* @returns Whether a prolog was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
|
||||
*/
|
||||
consumeProlog(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
this.consumeXmlDeclaration();
|
||||
|
||||
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
||||
|
||||
if (this.consumeDoctypeDeclaration()) {
|
||||
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
||||
}
|
||||
|
||||
return startIndex < scanner.charIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a public identifier literal if possible.
|
||||
*
|
||||
* @returns
|
||||
* Value of the public identifier literal minus quotes, or `false` if
|
||||
* nothing was consumed. An empty string indicates that a public id literal
|
||||
* was consumed but was empty.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
|
||||
*/
|
||||
consumePubidLiteral(): string | false {
|
||||
let startIndex = this.scanner.charIndex;
|
||||
let value = this.consumeSystemLiteral();
|
||||
|
||||
if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
|
||||
this.scanner.reset(startIndex);
|
||||
throw this.error('Invalid character in public identifier');
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a reference if possible.
|
||||
*
|
||||
* This differs from `consumeContentReference()` in that a consumed reference
|
||||
* will be returned rather than added to the document.
|
||||
*
|
||||
* @returns
|
||||
* Parsed reference value, or `false` if nothing was consumed (to
|
||||
* distinguish from a reference that resolves to an empty string).
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
|
||||
*/
|
||||
consumeReference(): string | false {
|
||||
let { scanner } = this;
|
||||
|
||||
if (!scanner.consumeString('&')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let ref = scanner.consumeMatchFn(syntax.isReferenceChar);
|
||||
|
||||
if (scanner.consume() !== ';') {
|
||||
throw this.error('Unterminated reference (a reference must end with `;`)');
|
||||
}
|
||||
|
||||
let parsedValue;
|
||||
|
||||
if (ref[0] === '#') {
|
||||
// This is a character reference.
|
||||
let codePoint = ref[1] === 'x'
|
||||
? parseInt(ref.slice(2), 16) // Hex codepoint.
|
||||
: parseInt(ref.slice(1), 10); // Decimal codepoint.
|
||||
|
||||
if (isNaN(codePoint)) {
|
||||
throw this.error('Invalid character reference');
|
||||
}
|
||||
|
||||
if (!syntax.isXmlCodePoint(codePoint)) {
|
||||
throw this.error('Character reference resolves to an invalid character');
|
||||
}
|
||||
|
||||
parsedValue = String.fromCodePoint(codePoint);
|
||||
} else {
|
||||
// This is an entity reference.
|
||||
parsedValue = syntax.predefinedEntities[ref];
|
||||
|
||||
if (parsedValue === undefined) {
|
||||
let {
|
||||
ignoreUndefinedEntities,
|
||||
resolveUndefinedEntity,
|
||||
} = this.options;
|
||||
|
||||
let wrappedRef = `&${ref};`; // for backcompat with <= 2.x
|
||||
|
||||
if (resolveUndefinedEntity) {
|
||||
let resolvedValue = resolveUndefinedEntity(wrappedRef);
|
||||
|
||||
if (resolvedValue !== null && resolvedValue !== undefined) {
|
||||
let type = typeof resolvedValue;
|
||||
|
||||
if (type !== 'string') {
|
||||
throw new TypeError(`\`resolveUndefinedEntity()\` must return a string, \`null\`, or \`undefined\`, but returned a value of type ${type}`);
|
||||
}
|
||||
|
||||
return resolvedValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (ignoreUndefinedEntities) {
|
||||
return wrappedRef;
|
||||
}
|
||||
|
||||
scanner.reset(-wrappedRef.length);
|
||||
throw this.error(`Named entity isn't defined: ${wrappedRef}`);
|
||||
}
|
||||
}
|
||||
|
||||
return parsedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes a `SystemLiteral` if possible.
|
||||
*
|
||||
* A `SystemLiteral` is similar to an attribute value, but allows the
|
||||
* characters `<` and `&` and doesn't replace references.
|
||||
*
|
||||
* @returns
|
||||
* Value of the `SystemLiteral` minus quotes, or `false` if nothing was
|
||||
* consumed. An empty string indicates that a `SystemLiteral` was consumed
|
||||
* but was empty.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
|
||||
*/
|
||||
consumeSystemLiteral(): string | false {
|
||||
let { scanner } = this;
|
||||
let quote = scanner.consumeString('"') || scanner.consumeString("'");
|
||||
|
||||
if (!quote) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let value = scanner.consumeUntilString(quote);
|
||||
this.validateChars(value);
|
||||
|
||||
if (!scanner.consumeString(quote)) {
|
||||
throw this.error('Missing end quote');
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes one or more whitespace characters if possible.
|
||||
*
|
||||
* @returns Whether any whitespace characters were consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
||||
*/
|
||||
consumeWhitespace(): boolean {
|
||||
return !!this.scanner.consumeMatchFn(syntax.isWhitespace);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes an XML declaration if possible.
|
||||
*
|
||||
* @returns Whether an XML declaration was consumed.
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
|
||||
*/
|
||||
consumeXmlDeclaration(): boolean {
|
||||
let { scanner } = this;
|
||||
let startIndex = scanner.charIndex;
|
||||
|
||||
if (!scanner.consumeString('<?xml')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this.consumeWhitespace()) {
|
||||
throw this.error('Invalid XML declaration');
|
||||
}
|
||||
|
||||
let version = !!scanner.consumeString('version')
|
||||
&& this.consumeEqual()
|
||||
&& this.consumeSystemLiteral();
|
||||
|
||||
if (version === false) {
|
||||
throw this.error('XML version is missing or invalid');
|
||||
} else if (!/^1\.[0-9]+$/.test(version)) {
|
||||
throw this.error('Invalid character in version number');
|
||||
}
|
||||
|
||||
let encoding;
|
||||
let standalone;
|
||||
|
||||
if (this.consumeWhitespace()) {
|
||||
encoding = !!scanner.consumeString('encoding')
|
||||
&& this.consumeEqual()
|
||||
&& this.consumeSystemLiteral();
|
||||
|
||||
if (encoding) {
|
||||
if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
|
||||
throw this.error('Invalid character in encoding name');
|
||||
}
|
||||
this.consumeWhitespace();
|
||||
}
|
||||
|
||||
standalone = !!scanner.consumeString('standalone')
|
||||
&& this.consumeEqual()
|
||||
&& this.consumeSystemLiteral();
|
||||
|
||||
if (standalone) {
|
||||
if (standalone !== 'yes' && standalone !== 'no') {
|
||||
throw this.error('Only "yes" and "no" are permitted as values of `standalone`');
|
||||
}
|
||||
|
||||
this.consumeWhitespace();
|
||||
}
|
||||
}
|
||||
|
||||
if (!scanner.consumeString('?>')) {
|
||||
throw this.error('Invalid or unclosed XML declaration');
|
||||
}
|
||||
|
||||
return this.options.preserveXmlDeclaration
|
||||
? this.addNode(new XmlDeclaration(
|
||||
version,
|
||||
encoding || undefined,
|
||||
(standalone as 'yes' | 'no' | false) || undefined,
|
||||
), startIndex)
|
||||
: true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an `XmlError` for the current scanner position.
|
||||
*/
|
||||
error(message: string) {
|
||||
let { scanner } = this;
|
||||
return new XmlError(message, scanner.charIndex, scanner.string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the XML input.
|
||||
*/
|
||||
parse() {
|
||||
this.scanner.consumeString('\uFEFF'); // byte order mark
|
||||
this.consumeProlog();
|
||||
|
||||
if (!this.consumeElement()) {
|
||||
throw this.error('Root element is missing or invalid');
|
||||
}
|
||||
|
||||
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
||||
|
||||
if (!this.scanner.isEnd) {
|
||||
throw this.error('Extra content at the end of the document');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws an invalid character error if any character in the given _string_
|
||||
* isn't a valid XML character.
|
||||
*/
|
||||
validateChars(string: string) {
|
||||
let { length } = string;
|
||||
|
||||
for (let i = 0; i < length; ++i) {
|
||||
let cp = string.codePointAt(i) as number;
|
||||
|
||||
if (!syntax.isXmlCodePoint(cp)) {
|
||||
this.scanner.reset(-([ ...string ].length - i));
|
||||
throw this.error('Invalid character');
|
||||
}
|
||||
|
||||
if (cp > 65535) {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -- Private Functions --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Normalizes line breaks in the given text by replacing CRLF sequences and lone
|
||||
* CR characters with LF characters.
|
||||
*/
|
||||
function normalizeLineBreaks(text: string): string {
|
||||
let i = 0;
|
||||
|
||||
while ((i = text.indexOf('\r', i)) !== -1) {
|
||||
text = text[i + 1] === '\n'
|
||||
? text.slice(0, i) + text.slice(i + 1)
|
||||
: text.slice(0, i) + '\n' + text.slice(i + 1);
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
// -- Types --------------------------------------------------------------------
|
||||
export type ParserOptions = {
|
||||
/**
|
||||
* When `true`, an undefined named entity (like "&bogus;") will be left in the
|
||||
* output as is instead of causing a parse error.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
ignoreUndefinedEntities?: boolean;
|
||||
|
||||
/**
|
||||
* When `true`, the starting and ending byte offsets of each node in the input
|
||||
* string will be made available via `start` and `end` properties on the node.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
includeOffsets?: boolean;
|
||||
|
||||
/**
|
||||
* When `true`, CDATA sections will be preserved in the document as `XmlCdata`
|
||||
* nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
|
||||
* which keeps the node tree simpler and easier to work with.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
preserveCdata?: boolean;
|
||||
|
||||
/**
|
||||
* When `true`, comments will be preserved in the document as `XmlComment`
|
||||
* nodes. Otherwise comments will not be included in the node tree.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
preserveComments?: boolean;
|
||||
|
||||
/**
|
||||
* When `true`, a document type declaration (if present) will be preserved in
|
||||
* the document as an `XmlDocumentType` node. Otherwise the declaration will
|
||||
* not be included in the node tree.
|
||||
*
|
||||
* Note that when this is `true` and a document type declaration is present,
|
||||
* the DTD will precede the root node in the node tree (normally the root
|
||||
* node would be first).
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
preserveDocumentType?: boolean;
|
||||
|
||||
/**
|
||||
* When `true`, an XML declaration (if present) will be preserved in the
|
||||
* document as an `XmlDeclaration` node. Otherwise the declaration will not be
|
||||
* included in the node tree.
|
||||
*
|
||||
* Note that when this is `true` and an XML declaration is present, the
|
||||
* XML declaration will be the first child of the document (normally the root
|
||||
* node would be first).
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
preserveXmlDeclaration?: boolean;
|
||||
|
||||
/**
|
||||
* When an undefined named entity is encountered, this function will be called
|
||||
* with the entity as its only argument. It should return a string value with
|
||||
* which to replace the entity, or `null` or `undefined` to treat the entity
|
||||
* as undefined (which may result in a parse error depending on the value of
|
||||
* `ignoreUndefinedEntities`).
|
||||
*/
|
||||
resolveUndefinedEntity?: (entity: string) => string | null | undefined;
|
||||
|
||||
/**
|
||||
* When `true`, attributes in an element's `attributes` object will be sorted
|
||||
* in alphanumeric order by name. Otherwise they'll retain their original
|
||||
* order as found in the XML.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
sortAttributes?: boolean;
|
||||
};
|
||||
217
node_modules/@rgrove/parse-xml/src/lib/StringScanner.ts
generated
vendored
Normal file
217
node_modules/@rgrove/parse-xml/src/lib/StringScanner.ts
generated
vendored
Normal file
@ -0,0 +1,217 @@
|
||||
const emptyString = '';
|
||||
const surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
|
||||
|
||||
/** @private */
|
||||
export class StringScanner {
|
||||
charIndex: number;
|
||||
readonly string: string;
|
||||
|
||||
private readonly charCount: number;
|
||||
private readonly charsToBytes: number[] | undefined;
|
||||
private readonly length: number;
|
||||
private readonly multiByteMode: boolean;
|
||||
|
||||
constructor(string: string) {
|
||||
this.charCount = this.charLength(string, true);
|
||||
this.charIndex = 0;
|
||||
this.length = string.length;
|
||||
this.multiByteMode = this.charCount !== this.length;
|
||||
this.string = string;
|
||||
|
||||
if (this.multiByteMode) {
|
||||
let charsToBytes = [];
|
||||
|
||||
// Create a mapping of character indexes to byte indexes. Since the string
|
||||
// contains multibyte characters, a byte index may not necessarily align
|
||||
// with a character index.
|
||||
for (let byteIndex = 0, charIndex = 0; charIndex < this.charCount; ++charIndex) {
|
||||
charsToBytes[charIndex] = byteIndex;
|
||||
byteIndex += (string.codePointAt(byteIndex) as number) > 65535 ? 2 : 1;
|
||||
}
|
||||
|
||||
this.charsToBytes = charsToBytes;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the current character index is at the end of the input string.
|
||||
*/
|
||||
get isEnd() {
|
||||
return this.charIndex >= this.charCount;
|
||||
}
|
||||
|
||||
// -- Protected Methods ------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the number of characters in the given string, which may differ from
|
||||
* the byte length if the string contains multibyte characters.
|
||||
*/
|
||||
protected charLength(string: string, multiByteSafe = this.multiByteMode): number {
|
||||
// We could get the char length with `[ ...string ].length`, but that's
|
||||
// actually slower than replacing surrogate pairs with single-byte
|
||||
// characters and then counting the result.
|
||||
return multiByteSafe
|
||||
? string.replace(surrogatePair, '_').length
|
||||
: string.length;
|
||||
}
|
||||
|
||||
// -- Public Methods ---------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Advances the scanner by the given number of characters, stopping if the end
|
||||
* of the string is reached.
|
||||
*/
|
||||
advance(count = 1) {
|
||||
this.charIndex = Math.min(this.charCount, this.charIndex + count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the byte index of the given character index in the string. The two
|
||||
* may differ in strings that contain multibyte characters.
|
||||
*/
|
||||
charIndexToByteIndex(charIndex: number = this.charIndex): number {
|
||||
return this.multiByteMode
|
||||
? (this.charsToBytes as number[])[charIndex] ?? Infinity
|
||||
: charIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes and returns the given number of characters if possible, advancing
|
||||
* the scanner and stopping if the end of the string is reached.
|
||||
*
|
||||
* If no characters could be consumed, an empty string will be returned.
|
||||
*/
|
||||
consume(charCount = 1): string {
|
||||
let chars = this.peek(charCount);
|
||||
this.advance(charCount);
|
||||
return chars;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes and returns the given number of bytes if possible, advancing the
|
||||
* scanner and stopping if the end of the string is reached.
|
||||
*
|
||||
* It's up to the caller to ensure that the given byte count doesn't split a
|
||||
* multibyte character.
|
||||
*
|
||||
* If no bytes could be consumed, an empty string will be returned.
|
||||
*/
|
||||
consumeBytes(byteCount: number): string {
|
||||
let byteIndex = this.charIndexToByteIndex();
|
||||
let result = this.string.slice(byteIndex, byteIndex + byteCount);
|
||||
this.advance(this.charLength(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes and returns all characters for which the given function returns
|
||||
* `true`, stopping when `false` is returned or the end of the input is
|
||||
* reached.
|
||||
*/
|
||||
consumeMatchFn(fn: (char: string) => boolean): string {
|
||||
let { length, multiByteMode, string } = this;
|
||||
let startByteIndex = this.charIndexToByteIndex();
|
||||
let endByteIndex = startByteIndex;
|
||||
|
||||
if (multiByteMode) {
|
||||
while (endByteIndex < length) {
|
||||
let char = string[endByteIndex] as string;
|
||||
let isSurrogatePair = char >= '\uD800' && char <= '\uDBFF';
|
||||
|
||||
if (isSurrogatePair) {
|
||||
char += string[endByteIndex + 1];
|
||||
}
|
||||
|
||||
if (!fn(char)) {
|
||||
break;
|
||||
}
|
||||
|
||||
endByteIndex += isSurrogatePair ? 2 : 1;
|
||||
}
|
||||
} else {
|
||||
while (endByteIndex < length && fn(string[endByteIndex] as string)) {
|
||||
++endByteIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return this.consumeBytes(endByteIndex - startByteIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes the given string if it exists at the current character index, and
|
||||
* advances the scanner.
|
||||
*
|
||||
* If the given string doesn't exist at the current character index, an empty
|
||||
* string will be returned and the scanner will not be advanced.
|
||||
*/
|
||||
consumeString(stringToConsume: string): string {
|
||||
let { length } = stringToConsume;
|
||||
let byteIndex = this.charIndexToByteIndex();
|
||||
|
||||
if (stringToConsume === this.string.slice(byteIndex, byteIndex + length)) {
|
||||
this.advance(length === 1 ? 1 : this.charLength(stringToConsume));
|
||||
return stringToConsume;
|
||||
}
|
||||
|
||||
return emptyString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes characters until the given global regex is matched, advancing the
|
||||
* scanner up to (but not beyond) the beginning of the match. If the regex
|
||||
* doesn't match, nothing will be consumed.
|
||||
*
|
||||
* Returns the consumed string, or an empty string if nothing was consumed.
|
||||
*/
|
||||
consumeUntilMatch(regex: RegExp): string {
|
||||
let matchByteIndex = this.string
|
||||
.slice(this.charIndexToByteIndex())
|
||||
.search(regex);
|
||||
|
||||
return matchByteIndex > 0
|
||||
? this.consumeBytes(matchByteIndex)
|
||||
: emptyString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consumes characters until the given string is found, advancing the scanner
|
||||
* up to (but not beyond) that point. If the string is never found, nothing
|
||||
* will be consumed.
|
||||
*
|
||||
* Returns the consumed string, or an empty string if nothing was consumed.
|
||||
*/
|
||||
consumeUntilString(searchString: string): string {
|
||||
let byteIndex = this.charIndexToByteIndex();
|
||||
let matchByteIndex = this.string.indexOf(searchString, byteIndex);
|
||||
|
||||
return matchByteIndex > 0
|
||||
? this.consumeBytes(matchByteIndex - byteIndex)
|
||||
: emptyString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the given number of characters starting at the current character
|
||||
* index, without advancing the scanner and without exceeding the end of the
|
||||
* input string.
|
||||
*/
|
||||
peek(count = 1): string {
|
||||
let { charIndex, string } = this;
|
||||
|
||||
return this.multiByteMode
|
||||
? string.slice(this.charIndexToByteIndex(charIndex), this.charIndexToByteIndex(charIndex + count))
|
||||
: string.slice(charIndex, charIndex + count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the scanner position to the given character _index_, or to the start
|
||||
* of the input string if no index is given.
|
||||
*
|
||||
* If _index_ is negative, the scanner position will be moved backward by that
|
||||
* many characters, stopping if the beginning of the string is reached.
|
||||
*/
|
||||
reset(index = 0) {
|
||||
this.charIndex = index >= 0
|
||||
? Math.min(this.charCount, index)
|
||||
: Math.max(0, this.charIndex + index);
|
||||
}
|
||||
}
|
||||
11
node_modules/@rgrove/parse-xml/src/lib/XmlCdata.ts
generated
vendored
Normal file
11
node_modules/@rgrove/parse-xml/src/lib/XmlCdata.ts
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
import { XmlText } from './XmlText.js';
|
||||
|
||||
/**
|
||||
* A CDATA section within an XML document.
|
||||
*/
|
||||
export class XmlCdata extends XmlText {
|
||||
override get type() {
|
||||
return XmlNode.TYPE_CDATA;
|
||||
}
|
||||
}
|
||||
26
node_modules/@rgrove/parse-xml/src/lib/XmlComment.ts
generated
vendored
Normal file
26
node_modules/@rgrove/parse-xml/src/lib/XmlComment.ts
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
/**
|
||||
* A comment within an XML document.
|
||||
*/
|
||||
export class XmlComment extends XmlNode {
|
||||
/**
|
||||
* Content of this comment.
|
||||
*/
|
||||
content: string;
|
||||
|
||||
constructor(content = '') {
|
||||
super();
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_COMMENT;
|
||||
}
|
||||
|
||||
override toJSON() {
|
||||
return Object.assign(XmlNode.prototype.toJSON.call(this), {
|
||||
content: this.content,
|
||||
});
|
||||
}
|
||||
}
|
||||
58
node_modules/@rgrove/parse-xml/src/lib/XmlDeclaration.ts
generated
vendored
Normal file
58
node_modules/@rgrove/parse-xml/src/lib/XmlDeclaration.ts
generated
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
/**
|
||||
* An XML declaration within an XML document.
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* ```xml
|
||||
* <?xml version="1.0" encoding="UTF-8"?>
|
||||
* ```
|
||||
*/
|
||||
export class XmlDeclaration extends XmlNode {
|
||||
/**
|
||||
* Value of the encoding declaration in this XML declaration, or `null` if no
|
||||
* encoding declaration was present.
|
||||
*/
|
||||
encoding: string | null;
|
||||
|
||||
/**
|
||||
* Value of the standalone declaration in this XML declaration, or `null` if
|
||||
* no standalone declaration was present.
|
||||
*/
|
||||
standalone: 'yes' | 'no' | null;
|
||||
|
||||
/**
|
||||
* Value of the version declaration in this XML declaration.
|
||||
*/
|
||||
version: string;
|
||||
|
||||
constructor(
|
||||
version: string,
|
||||
encoding?: string,
|
||||
standalone?: typeof XmlDeclaration.prototype.standalone,
|
||||
) {
|
||||
super();
|
||||
|
||||
this.version = version;
|
||||
this.encoding = encoding ?? null;
|
||||
this.standalone = standalone ?? null;
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_XML_DECLARATION;
|
||||
}
|
||||
|
||||
override toJSON() {
|
||||
let json = XmlNode.prototype.toJSON.call(this);
|
||||
json.version = this.version;
|
||||
|
||||
for (let key of ['encoding', 'standalone'] as const) {
|
||||
if (this[key] !== null) {
|
||||
json[key] = this[key];
|
||||
}
|
||||
}
|
||||
|
||||
return json;
|
||||
}
|
||||
}
|
||||
59
node_modules/@rgrove/parse-xml/src/lib/XmlDocument.ts
generated
vendored
Normal file
59
node_modules/@rgrove/parse-xml/src/lib/XmlDocument.ts
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
import { XmlElement } from './XmlElement.js';
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
import type { XmlComment } from './XmlComment.js';
|
||||
import type { XmlDeclaration } from './XmlDeclaration.js';
|
||||
import type { XmlDocumentType } from './XmlDocumentType.js';
|
||||
import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
||||
|
||||
/**
|
||||
* Represents an XML document. All elements within the document are descendants
|
||||
* of this node.
|
||||
*/
|
||||
export class XmlDocument extends XmlNode {
|
||||
/**
|
||||
* Child nodes of this document.
|
||||
*/
|
||||
readonly children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlProcessingInstruction | XmlElement>;
|
||||
|
||||
constructor(children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlElement | XmlProcessingInstruction> = []) {
|
||||
super();
|
||||
this.children = children;
|
||||
}
|
||||
|
||||
override get document() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Root element of this document, or `null` if this document is empty.
|
||||
*/
|
||||
get root(): XmlElement | null {
|
||||
for (let child of this.children) {
|
||||
if (child instanceof XmlElement) {
|
||||
return child;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Text content of this document and all its descendants.
|
||||
*/
|
||||
get text(): string {
|
||||
return this.children
|
||||
.map(child => 'text' in child ? child.text : '')
|
||||
.join('');
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_DOCUMENT;
|
||||
}
|
||||
|
||||
override toJSON() {
|
||||
return Object.assign(XmlNode.prototype.toJSON.call(this), {
|
||||
children: this.children.map(child => child.toJSON()),
|
||||
});
|
||||
}
|
||||
}
|
||||
67
node_modules/@rgrove/parse-xml/src/lib/XmlDocumentType.ts
generated
vendored
Normal file
67
node_modules/@rgrove/parse-xml/src/lib/XmlDocumentType.ts
generated
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
/**
|
||||
* A document type declaration within an XML document.
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* ```xml
|
||||
* <!DOCTYPE kittens [
|
||||
* <!ELEMENT kittens (#PCDATA)>
|
||||
* ]>
|
||||
* ```
|
||||
*/
|
||||
export class XmlDocumentType extends XmlNode {
|
||||
/**
|
||||
* Name of the root element described by this document type declaration.
|
||||
*/
|
||||
name: string;
|
||||
|
||||
/**
|
||||
* Public identifier of the external subset of this document type declaration,
|
||||
* or `null` if no public identifier was present.
|
||||
*/
|
||||
publicId: string | null;
|
||||
|
||||
/**
|
||||
* System identifier of the external subset of this document type declaration,
|
||||
* or `null` if no system identifier was present.
|
||||
*/
|
||||
systemId: string | null;
|
||||
|
||||
/**
|
||||
* Internal subset of this document type declaration, or `null` if no internal
|
||||
* subset was present.
|
||||
*/
|
||||
internalSubset: string | null;
|
||||
|
||||
constructor(
|
||||
name: string,
|
||||
publicId?: string,
|
||||
systemId?: string,
|
||||
internalSubset?: string,
|
||||
) {
|
||||
super();
|
||||
this.name = name;
|
||||
this.publicId = publicId ?? null;
|
||||
this.systemId = systemId ?? null;
|
||||
this.internalSubset = internalSubset ?? null;
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_DOCUMENT_TYPE;
|
||||
}
|
||||
|
||||
override toJSON() {
|
||||
let json = XmlNode.prototype.toJSON.call(this);
|
||||
json.name = this.name;
|
||||
|
||||
for (let key of ['publicId', 'systemId', 'internalSubset'] as const) {
|
||||
if (this[key] !== null) {
|
||||
json[key] = this[key];
|
||||
}
|
||||
}
|
||||
|
||||
return json;
|
||||
}
|
||||
}
|
||||
81
node_modules/@rgrove/parse-xml/src/lib/XmlElement.ts
generated
vendored
Normal file
81
node_modules/@rgrove/parse-xml/src/lib/XmlElement.ts
generated
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
import type { JsonObject } from './types.js';
|
||||
import type { XmlCdata } from './XmlCdata.js';
|
||||
import type { XmlComment } from './XmlComment.js';
|
||||
import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
||||
import type { XmlText } from './XmlText.js';
|
||||
|
||||
/**
|
||||
* Element in an XML document.
|
||||
*/
|
||||
export class XmlElement extends XmlNode {
|
||||
/**
|
||||
* Attributes on this element.
|
||||
*/
|
||||
attributes: {[attrName: string]: string};
|
||||
|
||||
/**
|
||||
* Child nodes of this element.
|
||||
*/
|
||||
children: Array<XmlCdata | XmlComment | XmlElement | XmlProcessingInstruction | XmlText>;
|
||||
|
||||
/**
|
||||
* Name of this element.
|
||||
*/
|
||||
name: string;
|
||||
|
||||
constructor(
|
||||
name: string,
|
||||
attributes: {[attrName: string]: string} = Object.create(null),
|
||||
children: Array<XmlCdata | XmlComment | XmlElement | XmlProcessingInstruction | XmlText> = [],
|
||||
) {
|
||||
super();
|
||||
|
||||
this.name = name;
|
||||
this.attributes = attributes;
|
||||
this.children = children;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this element is empty (meaning it has no children).
|
||||
*/
|
||||
get isEmpty(): boolean {
|
||||
return this.children.length === 0;
|
||||
}
|
||||
|
||||
override get preserveWhitespace(): boolean {
|
||||
let node: XmlNode | null = this; // eslint-disable-line @typescript-eslint/no-this-alias
|
||||
|
||||
while (node instanceof XmlElement) {
|
||||
if ('xml:space' in node.attributes) {
|
||||
return node.attributes['xml:space'] === 'preserve';
|
||||
}
|
||||
|
||||
node = node.parent;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Text content of this element and all its descendants.
|
||||
*/
|
||||
get text(): string {
|
||||
return this.children
|
||||
.map(child => 'text' in child ? child.text : '')
|
||||
.join('');
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_ELEMENT;
|
||||
}
|
||||
|
||||
override toJSON(): JsonObject {
|
||||
return Object.assign(XmlNode.prototype.toJSON.call(this), {
|
||||
name: this.name,
|
||||
attributes: this.attributes,
|
||||
children: this.children.map(child => child.toJSON()),
|
||||
});
|
||||
}
|
||||
}
|
||||
80
node_modules/@rgrove/parse-xml/src/lib/XmlError.ts
generated
vendored
Normal file
80
node_modules/@rgrove/parse-xml/src/lib/XmlError.ts
generated
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
/**
|
||||
* An error that occurred while parsing XML.
|
||||
*/
|
||||
export class XmlError extends Error {
|
||||
/**
|
||||
* Character column at which this error occurred (1-based).
|
||||
*/
|
||||
readonly column: number;
|
||||
|
||||
/**
|
||||
* Short excerpt from the input string that contains the problem.
|
||||
*/
|
||||
readonly excerpt: string;
|
||||
|
||||
/**
|
||||
* Line number at which this error occurred (1-based).
|
||||
*/
|
||||
readonly line: number;
|
||||
|
||||
/**
|
||||
* Character position at which this error occurred relative to the beginning
|
||||
* of the input (0-based).
|
||||
*/
|
||||
readonly pos: number;
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
charIndex: number,
|
||||
xml: string,
|
||||
) {
|
||||
let column = 1;
|
||||
let excerpt = '';
|
||||
let line = 1;
|
||||
|
||||
// Find the line and column where the error occurred.
|
||||
for (let i = 0; i < charIndex; ++i) {
|
||||
let char = xml[i];
|
||||
|
||||
if (char === '\n') {
|
||||
column = 1;
|
||||
excerpt = '';
|
||||
line += 1;
|
||||
} else {
|
||||
column += 1;
|
||||
excerpt += char;
|
||||
}
|
||||
}
|
||||
|
||||
let eol = xml.indexOf('\n', charIndex);
|
||||
|
||||
excerpt += eol === -1
|
||||
? xml.slice(charIndex)
|
||||
: xml.slice(charIndex, eol);
|
||||
|
||||
let excerptStart = 0;
|
||||
|
||||
// Keep the excerpt below 50 chars, but always keep the error position in
|
||||
// view.
|
||||
if (excerpt.length > 50) {
|
||||
if (column < 40) {
|
||||
excerpt = excerpt.slice(0, 50);
|
||||
} else {
|
||||
excerptStart = column - 20;
|
||||
excerpt = excerpt.slice(excerptStart, column + 30);
|
||||
}
|
||||
}
|
||||
|
||||
super(
|
||||
`${message} (line ${line}, column ${column})\n`
|
||||
+ ` ${excerpt}\n`
|
||||
+ ' '.repeat(column - excerptStart + 1) + '^\n',
|
||||
);
|
||||
|
||||
this.column = column;
|
||||
this.excerpt = excerpt;
|
||||
this.line = line;
|
||||
this.name = 'XmlError';
|
||||
this.pos = charIndex;
|
||||
}
|
||||
}
|
||||
137
node_modules/@rgrove/parse-xml/src/lib/XmlNode.ts
generated
vendored
Normal file
137
node_modules/@rgrove/parse-xml/src/lib/XmlNode.ts
generated
vendored
Normal file
@ -0,0 +1,137 @@
|
||||
import type { JsonObject } from './types.js';
|
||||
import type { XmlDocument } from './XmlDocument.js';
|
||||
import type { XmlElement } from './XmlElement.js';
|
||||
|
||||
/**
|
||||
* Base interface for a node in an XML document.
|
||||
*/
|
||||
export class XmlNode {
|
||||
/**
|
||||
* Type value for an `XmlCdata` node.
|
||||
*/
|
||||
static readonly TYPE_CDATA = 'cdata';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlComment` node.
|
||||
*/
|
||||
static readonly TYPE_COMMENT = 'comment';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlDocument` node.
|
||||
*/
|
||||
static readonly TYPE_DOCUMENT = 'document';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlDocumentType` node.
|
||||
*/
|
||||
static readonly TYPE_DOCUMENT_TYPE = 'doctype';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlElement` node.
|
||||
*/
|
||||
static readonly TYPE_ELEMENT = 'element';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlProcessingInstruction` node.
|
||||
*/
|
||||
static readonly TYPE_PROCESSING_INSTRUCTION = 'pi';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlText` node.
|
||||
*/
|
||||
static readonly TYPE_TEXT = 'text';
|
||||
|
||||
/**
|
||||
* Type value for an `XmlDeclaration` node.
|
||||
*/
|
||||
static readonly TYPE_XML_DECLARATION = 'xmldecl';
|
||||
|
||||
/**
|
||||
* Parent node of this node, or `null` if this node has no parent.
|
||||
*/
|
||||
parent: XmlDocument | XmlElement | null = null;
|
||||
|
||||
/**
|
||||
* Starting byte offset of this node in the original XML string, or `-1` if
|
||||
* the offset is unknown.
|
||||
*/
|
||||
start = -1;
|
||||
|
||||
/**
|
||||
* Ending byte offset of this node in the original XML string, or `-1` if the
|
||||
* offset is unknown.
|
||||
*/
|
||||
end = -1;
|
||||
|
||||
/**
|
||||
* Document that contains this node, or `null` if this node is not associated
|
||||
* with a document.
|
||||
*/
|
||||
get document(): XmlDocument | null {
|
||||
return this.parent?.document ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this node is the root node of the document (also known as the
|
||||
* document element).
|
||||
*/
|
||||
get isRootNode(): boolean {
|
||||
return this.parent !== null
|
||||
&& this.parent === this.document
|
||||
&& this.type === XmlNode.TYPE_ELEMENT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether whitespace should be preserved in the content of this element and
|
||||
* its children.
|
||||
*
|
||||
* This is influenced by the value of the special `xml:space` attribute, and
|
||||
* will be `true` for any node whose `xml:space` attribute is set to
|
||||
* "preserve". If a node has no such attribute, it will inherit the value of
|
||||
* the nearest ancestor that does (if any).
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
|
||||
*/
|
||||
get preserveWhitespace(): boolean {
|
||||
return !!this.parent?.preserveWhitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type of this node.
|
||||
*
|
||||
* The value of this property is a string that matches one of the static
|
||||
* `TYPE_*` properties on the `XmlNode` class (e.g. `TYPE_ELEMENT`,
|
||||
* `TYPE_TEXT`, etc.).
|
||||
*
|
||||
* The `XmlNode` class itself is a base class and doesn't have its own type
|
||||
* name.
|
||||
*/
|
||||
get type() {
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a JSON-serializable object representing this node, minus properties
|
||||
* that could result in circular references.
|
||||
*/
|
||||
toJSON(): JsonObject {
|
||||
let json: JsonObject = {
|
||||
type: this.type,
|
||||
};
|
||||
|
||||
if (this.isRootNode) {
|
||||
json.isRootNode = true;
|
||||
}
|
||||
|
||||
if (this.preserveWhitespace) {
|
||||
json.preserveWhitespace = true;
|
||||
}
|
||||
|
||||
if (this.start !== -1) {
|
||||
json.start = this.start;
|
||||
json.end = this.end;
|
||||
}
|
||||
|
||||
return json;
|
||||
}
|
||||
}
|
||||
35
node_modules/@rgrove/parse-xml/src/lib/XmlProcessingInstruction.ts
generated
vendored
Normal file
35
node_modules/@rgrove/parse-xml/src/lib/XmlProcessingInstruction.ts
generated
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
/**
|
||||
* A processing instruction within an XML document.
|
||||
*/
|
||||
export class XmlProcessingInstruction extends XmlNode {
|
||||
/**
|
||||
* Content of this processing instruction.
|
||||
*/
|
||||
content: string;
|
||||
|
||||
/**
|
||||
* Name of this processing instruction. Also sometimes referred to as the
|
||||
* processing instruction "target".
|
||||
*/
|
||||
name: string;
|
||||
|
||||
constructor(name: string, content = '') {
|
||||
super();
|
||||
|
||||
this.name = name;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_PROCESSING_INSTRUCTION;
|
||||
}
|
||||
|
||||
override toJSON() {
|
||||
return Object.assign(XmlNode.prototype.toJSON.call(this), {
|
||||
name: this.name,
|
||||
content: this.content,
|
||||
});
|
||||
}
|
||||
}
|
||||
26
node_modules/@rgrove/parse-xml/src/lib/XmlText.ts
generated
vendored
Normal file
26
node_modules/@rgrove/parse-xml/src/lib/XmlText.ts
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
import { XmlNode } from './XmlNode.js';
|
||||
|
||||
/**
|
||||
* Text content within an XML document.
|
||||
*/
|
||||
export class XmlText extends XmlNode {
|
||||
/**
|
||||
* Text content of this node.
|
||||
*/
|
||||
text: string;
|
||||
|
||||
constructor(text = '') {
|
||||
super();
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
override get type() {
|
||||
return XmlNode.TYPE_TEXT;
|
||||
}
|
||||
|
||||
override toJSON() {
|
||||
return Object.assign(XmlNode.prototype.toJSON.call(this), {
|
||||
text: this.text,
|
||||
});
|
||||
}
|
||||
}
|
||||
130
node_modules/@rgrove/parse-xml/src/lib/syntax.ts
generated
vendored
Normal file
130
node_modules/@rgrove/parse-xml/src/lib/syntax.ts
generated
vendored
Normal file
@ -0,0 +1,130 @@
|
||||
/**
|
||||
* Regular expression that matches one or more `AttValue` characters in a
|
||||
* double-quoted attribute value.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
||||
*/
|
||||
export const attValueCharDoubleQuote = /["&<]/;
|
||||
|
||||
/**
|
||||
* Regular expression that matches one or more `AttValue` characters in a
|
||||
* single-quoted attribute value.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
||||
*/
|
||||
export const attValueCharSingleQuote = /['&<]/;
|
||||
|
||||
/**
|
||||
* Regular expression that matches a whitespace character that should be
|
||||
* normalized to a space character in an attribute value.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#AVNormalize
|
||||
*/
|
||||
export const attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
|
||||
|
||||
/**
|
||||
* Regular expression that matches one or more characters that signal the end of
|
||||
* XML `CharData` content.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
|
||||
*/
|
||||
export const endCharData = /<|&|]]>/;
|
||||
|
||||
/**
|
||||
* Mapping of predefined entity names to their replacement values.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-predefined-ent
|
||||
*/
|
||||
export const predefinedEntities: Readonly<{[name: string]: string;}> = Object.freeze(Object.assign(Object.create(null), {
|
||||
amp: '&',
|
||||
apos: "'",
|
||||
gt: '>',
|
||||
lt: '<',
|
||||
quot: '"',
|
||||
}));
|
||||
|
||||
/**
|
||||
* Returns `true` if _char_ is an XML `NameChar`, `false` if it isn't.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
|
||||
*/
|
||||
export function isNameChar(char: string): boolean {
|
||||
let cp = char.codePointAt(0) as number;
|
||||
|
||||
// Including the most common NameStartChars here improves performance
|
||||
// slightly.
|
||||
return (cp >= 0x61 && cp <= 0x7A) // a-z
|
||||
|| (cp >= 0x41 && cp <= 0x5A) // A-Z
|
||||
|| (cp >= 0x30 && cp <= 0x39) // 0-9
|
||||
|| cp === 0x2D // -
|
||||
|| cp === 0x2E // .
|
||||
|| cp === 0xB7
|
||||
|| (cp >= 0x300 && cp <= 0x36F)
|
||||
|| cp === 0x203F
|
||||
|| cp === 0x2040
|
||||
|| isNameStartChar(char, cp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if _char_ is an XML `NameStartChar`, `false` if it isn't.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
|
||||
*/
|
||||
export function isNameStartChar(char: string, cp = char.codePointAt(0) as number): boolean {
|
||||
return (cp >= 0x61 && cp <= 0x7A) // a-z
|
||||
|| (cp >= 0x41 && cp <= 0x5A) // A-Z
|
||||
|| cp === 0x3A // :
|
||||
|| cp === 0x5F // _
|
||||
|| (cp >= 0xC0 && cp <= 0xD6)
|
||||
|| (cp >= 0xD8 && cp <= 0xF6)
|
||||
|| (cp >= 0xF8 && cp <= 0x2FF)
|
||||
|| (cp >= 0x370 && cp <= 0x37D)
|
||||
|| (cp >= 0x37F && cp <= 0x1FFF)
|
||||
|| cp === 0x200C
|
||||
|| cp === 0x200D
|
||||
|| (cp >= 0x2070 && cp <= 0x218F)
|
||||
|| (cp >= 0x2C00 && cp <= 0x2FEF)
|
||||
|| (cp >= 0x3001 && cp <= 0xD7FF)
|
||||
|| (cp >= 0xF900 && cp <= 0xFDCF)
|
||||
|| (cp >= 0xFDF0 && cp <= 0xFFFD)
|
||||
|| (cp >= 0x10000 && cp <= 0xEFFFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if _char_ is a valid reference character (which may appear
|
||||
* between `&` and `;` in a reference), `false` otherwise.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-references
|
||||
*/
|
||||
export function isReferenceChar(char: string): boolean {
|
||||
return char === '#' || isNameChar(char);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if _char_ is an XML whitespace character, `false` otherwise.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
||||
*/
|
||||
export function isWhitespace(char: string): boolean {
|
||||
let cp = char.codePointAt(0);
|
||||
|
||||
return cp === 0x20
|
||||
|| cp === 0x9
|
||||
|| cp === 0xA
|
||||
|| cp === 0xD;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if _codepoint_ is a valid XML `Char` code point, `false`
|
||||
* otherwise.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
|
||||
*/
|
||||
export function isXmlCodePoint(cp: number): boolean {
|
||||
return (cp >= 0x20 && cp <= 0xD7FF)
|
||||
|| cp === 0xA
|
||||
|| cp === 0x9
|
||||
|| cp === 0xD
|
||||
|| (cp >= 0xE000 && cp <= 0xFFFD)
|
||||
|| (cp >= 0x10000 && cp <= 0x10FFFF);
|
||||
}
|
||||
2
node_modules/@rgrove/parse-xml/src/lib/types.ts
generated
vendored
Normal file
2
node_modules/@rgrove/parse-xml/src/lib/types.ts
generated
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
export type JsonObject = {[key in string]?: JsonValue};
|
||||
export type JsonValue = string | number | boolean | JsonObject | JsonValue[] | null;
|
||||
Reference in New Issue
Block a user