Files
beall-11ty/node_modules/@rgrove/parse-xml/dist/lib/Parser.d.ts
2026-03-31 16:38:22 -07:00

265 lines
9.0 KiB
TypeScript

import { XmlDocument } from './XmlDocument.js';
import { XmlError } from './XmlError.js';
import { XmlNode } from './XmlNode.js';
/**
* Parses an XML string into an `XmlDocument`.
*
* @private
*/
export declare class Parser {
readonly document: XmlDocument;
private currentNode;
private readonly options;
private readonly scanner;
/**
* @param xml XML string to parse.
* @param options Parser options.
*/
constructor(xml: string, options?: ParserOptions);
/**
* Adds the given `XmlNode` as a child of `this.currentNode`.
*/
addNode(node: XmlNode, charIndex: number): boolean;
/**
* Adds the given _text_ to the document, either by appending it to a
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
*/
addText(text: string, charIndex: number): boolean;
/**
* Consumes element attributes.
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
*/
consumeAttributes(): Record<string, string>;
/**
* Consumes an `AttValue` (attribute value) if possible.
*
* @returns
* Contents of the `AttValue` minus quotes, or `false` if nothing was
* consumed. An empty string indicates that an `AttValue` was consumed but
* was empty.
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
*/
consumeAttributeValue(): string | false;
/**
* Consumes a CDATA section if possible.
*
* @returns Whether a CDATA section was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
*/
consumeCdataSection(): boolean;
/**
* Consumes character data if possible.
*
* @returns Whether character data was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
*/
consumeCharData(): boolean;
/**
* Consumes a comment if possible.
*
* @returns Whether a comment was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
*/
consumeComment(): boolean;
/**
* Consumes a reference in a content context if possible.
*
* This differs from `consumeReference()` in that a consumed reference will be
* added to the document as a text node instead of returned.
*
* @returns Whether a reference was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
*/
consumeContentReference(): boolean;
/**
* Consumes a doctype declaration if possible.
*
* This is a loose implementation since doctype declarations are currently
* discarded without further parsing.
*
* @returns Whether a doctype declaration was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
*/
consumeDoctypeDeclaration(): boolean;
/**
* Consumes an element if possible.
*
* @returns Whether an element was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
*/
consumeElement(): boolean;
/**
* Consumes an `Eq` production if possible.
*
* @returns Whether an `Eq` production was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
*/
consumeEqual(): boolean;
/**
* Consumes `Misc` content if possible.
*
* @returns Whether anything was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
*/
consumeMisc(): boolean;
/**
* Consumes one or more `Name` characters if possible.
*
* @returns `Name` characters, or an empty string if none were consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
*/
consumeName(): string;
/**
* Consumes a processing instruction if possible.
*
* @returns Whether a processing instruction was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
*/
consumeProcessingInstruction(): boolean;
/**
* Consumes a prolog if possible.
*
* @returns Whether a prolog was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
*/
consumeProlog(): boolean;
/**
* Consumes a public identifier literal if possible.
*
* @returns
* Value of the public identifier literal minus quotes, or `false` if
* nothing was consumed. An empty string indicates that a public id literal
* was consumed but was empty.
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
*/
consumePubidLiteral(): string | false;
/**
* Consumes a reference if possible.
*
* This differs from `consumeContentReference()` in that a consumed reference
* will be returned rather than added to the document.
*
* @returns
* Parsed reference value, or `false` if nothing was consumed (to
* distinguish from a reference that resolves to an empty string).
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
*/
consumeReference(): string | false;
/**
* Consumes a `SystemLiteral` if possible.
*
* A `SystemLiteral` is similar to an attribute value, but allows the
* characters `<` and `&` and doesn't replace references.
*
* @returns
* Value of the `SystemLiteral` minus quotes, or `false` if nothing was
* consumed. An empty string indicates that a `SystemLiteral` was consumed
* but was empty.
*
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
*/
consumeSystemLiteral(): string | false;
/**
* Consumes one or more whitespace characters if possible.
*
* @returns Whether any whitespace characters were consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
*/
consumeWhitespace(): boolean;
/**
* Consumes an XML declaration if possible.
*
* @returns Whether an XML declaration was consumed.
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
*/
consumeXmlDeclaration(): boolean;
/**
* Returns an `XmlError` for the current scanner position.
*/
error(message: string): XmlError;
/**
* Parses the XML input.
*/
parse(): void;
/**
* Throws an invalid character error if any character in the given _string_
* isn't a valid XML character.
*/
validateChars(string: string): void;
}
export type ParserOptions = {
/**
* When `true`, an undefined named entity (like "&bogus;") will be left in the
* output as is instead of causing a parse error.
*
* @default false
*/
ignoreUndefinedEntities?: boolean;
/**
* When `true`, the starting and ending byte offsets of each node in the input
* string will be made available via `start` and `end` properties on the node.
*
* @default false
*/
includeOffsets?: boolean;
/**
* When `true`, CDATA sections will be preserved in the document as `XmlCdata`
* nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
* which keeps the node tree simpler and easier to work with.
*
* @default false
*/
preserveCdata?: boolean;
/**
* When `true`, comments will be preserved in the document as `XmlComment`
* nodes. Otherwise comments will not be included in the node tree.
*
* @default false
*/
preserveComments?: boolean;
/**
* When `true`, a document type declaration (if present) will be preserved in
* the document as an `XmlDocumentType` node. Otherwise the declaration will
* not be included in the node tree.
*
* Note that when this is `true` and a document type declaration is present,
* the DTD will precede the root node in the node tree (normally the root
* node would be first).
*
* @default false
*/
preserveDocumentType?: boolean;
/**
* When `true`, an XML declaration (if present) will be preserved in the
* document as an `XmlDeclaration` node. Otherwise the declaration will not be
* included in the node tree.
*
* Note that when this is `true` and an XML declaration is present, the
* XML declaration will be the first child of the document (normally the root
* node would be first).
*
* @default false
*/
preserveXmlDeclaration?: boolean;
/**
* When an undefined named entity is encountered, this function will be called
* with the entity as its only argument. It should return a string value with
* which to replace the entity, or `null` or `undefined` to treat the entity
* as undefined (which may result in a parse error depending on the value of
* `ignoreUndefinedEntities`).
*/
resolveUndefinedEntity?: (entity: string) => string | null | undefined;
/**
* When `true`, attributes in an element's `attributes` object will be sorted
* in alphanumeric order by name. Otherwise they'll retain their original
* order as found in the XML.
*
* @default false
*/
sortAttributes?: boolean;
};
//# sourceMappingURL=Parser.d.ts.map