first

2026-03-31 16:38:22 -07:00
commit 38940436a7
2112 changed files with 376929 additions and 0 deletions
--- a/node_modules/@rgrove/parse-xml/src/index.ts
+++ b/node_modules/@rgrove/parse-xml/src/index.ts
@ -0,0 +1,33 @@
+import { Parser } from './lib/Parser.js';
+
+import type { ParserOptions } from './lib/Parser.js';
+
+export * from './lib/types.js';
+export { XmlCdata } from './lib/XmlCdata.js';
+export { XmlComment } from './lib/XmlComment.js';
+export { XmlDeclaration } from './lib/XmlDeclaration.js';
+export { XmlDocument } from './lib/XmlDocument.js';
+export { XmlDocumentType } from './lib/XmlDocumentType.js';
+export { XmlElement } from './lib/XmlElement.js';
+export { XmlError } from './lib/XmlError.js';
+export { XmlNode } from './lib/XmlNode.js';
+export { XmlProcessingInstruction } from './lib/XmlProcessingInstruction.js';
+export { XmlText } from './lib/XmlText.js';
+
+export type { ParserOptions } from './lib/Parser.js';
+
+/**
+ * Parses the given XML string and returns an `XmlDocument` instance
+ * representing the document tree.
+ *
+ * @example
+ *
+ * import { parseXml } from '@rgrove/parse-xml';
+ * let doc = parseXml('<kittens fuzzy="yes">I like fuzzy kittens.</kittens>');
+ *
+ * @param xml XML string to parse.
+ * @param options Parser options.
+ */
+export function parseXml(xml: string, options?: ParserOptions) {
+  return (new Parser(xml, options)).document;
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/Parser.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/Parser.ts
@ -0,0 +1,906 @@
+import { StringScanner } from './StringScanner.js';
+import * as syntax from './syntax.js';
+import { XmlCdata } from './XmlCdata.js';
+import { XmlComment } from './XmlComment.js';
+import { XmlDeclaration } from './XmlDeclaration.js';
+import { XmlDocument } from './XmlDocument.js';
+import { XmlDocumentType } from './XmlDocumentType.js';
+import { XmlElement } from './XmlElement.js';
+import { XmlError } from './XmlError.js';
+import { XmlNode } from './XmlNode.js';
+import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
+import { XmlText } from './XmlText.js';
+
+const emptyString = '';
+
+/**
+ * Parses an XML string into an `XmlDocument`.
+ *
+ * @private
+ */
+export class Parser {
+  readonly document: XmlDocument;
+
+  private currentNode: XmlDocument | XmlElement;
+  private readonly options: ParserOptions;
+  private readonly scanner: StringScanner;
+
+  /**
+   * @param xml XML string to parse.
+   * @param options Parser options.
+   */
+  constructor(xml: string, options: ParserOptions = {}) {
+    let doc = this.document = new XmlDocument();
+
+    this.currentNode = doc;
+    this.options = options;
+    this.scanner = new StringScanner(xml);
+
+    if (this.options.includeOffsets) {
+      doc.start = 0;
+      doc.end = xml.length;
+    }
+
+    this.parse();
+  }
+
+  /**
+   * Adds the given `XmlNode` as a child of `this.currentNode`.
+   */
+  addNode(node: XmlNode, charIndex: number) {
+    node.parent = this.currentNode;
+
+    if (this.options.includeOffsets) {
+      node.start = this.scanner.charIndexToByteIndex(charIndex);
+      node.end = this.scanner.charIndexToByteIndex();
+    }
+
+    // @ts-expect-error: XmlDocument has a more limited set of possible children
+    // than XmlElement so TypeScript is unhappy, but we always do the right
+    // thing.
+    this.currentNode.children.push(node);
+    return true;
+  }
+
+  /**
+   * Adds the given _text_ to the document, either by appending it to a
+   * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
+   */
+  addText(text: string, charIndex: number) {
+    let { children } = this.currentNode;
+    let { length } = children;
+
+    text = normalizeLineBreaks(text);
+
+    if (length > 0) {
+      let prevNode = children[length - 1];
+
+      if (prevNode?.type === XmlNode.TYPE_TEXT) {
+        let textNode = prevNode as XmlText;
+
+        // The previous node is a text node, so we can append to it and avoid
+        // creating another node.
+        textNode.text += text;
+
+        if (this.options.includeOffsets) {
+          textNode.end = this.scanner.charIndexToByteIndex();
+        }
+
+        return true;
+      }
+    }
+
+    return this.addNode(new XmlText(text), charIndex);
+  }
+
+  /**
+   * Consumes element attributes.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
+   */
+  consumeAttributes(): Record<string, string> {
+    let attributes = Object.create(null);
+
+    while (this.consumeWhitespace()) {
+      let attrName = this.consumeName();
+
+      if (!attrName) {
+        break;
+      }
+
+      let attrValue = this.consumeEqual() && this.consumeAttributeValue();
+
+      if (attrValue === false) {
+        throw this.error('Attribute value expected');
+      }
+
+      if (attrName in attributes) {
+        throw this.error(`Duplicate attribute: ${attrName}`);
+      }
+
+      if (attrName === 'xml:space'
+          && attrValue !== 'default'
+          && attrValue !== 'preserve') {
+
+        throw this.error('Value of the `xml:space` attribute must be "default" or "preserve"');
+      }
+
+      attributes[attrName] = attrValue;
+    }
+
+    if (this.options.sortAttributes) {
+      let attrNames = Object.keys(attributes).sort();
+      let sortedAttributes = Object.create(null);
+
+      for (let i = 0; i < attrNames.length; ++i) {
+        let attrName = attrNames[i] as string;
+        sortedAttributes[attrName] = attributes[attrName];
+      }
+
+      attributes = sortedAttributes;
+    }
+
+    return attributes;
+  }
+
+  /**
+   * Consumes an `AttValue` (attribute value) if possible.
+   *
+   * @returns
+   *   Contents of the `AttValue` minus quotes, or `false` if nothing was
+   *   consumed. An empty string indicates that an `AttValue` was consumed but
+   *   was empty.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
+   */
+  consumeAttributeValue(): string | false {
+    let { scanner } = this;
+    let quote = scanner.peek();
+
+    if (quote !== '"' && quote !== "'") {
+      return false;
+    }
+
+    scanner.advance();
+
+    let chars;
+    let isClosed = false;
+    let value = emptyString;
+    let regex = quote === '"'
+      ? syntax.attValueCharDoubleQuote
+      : syntax.attValueCharSingleQuote;
+
+    matchLoop: while (!scanner.isEnd) {
+      chars = scanner.consumeUntilMatch(regex);
+
+      if (chars) {
+        this.validateChars(chars);
+        value += chars.replace(syntax.attValueNormalizedWhitespace, ' ');
+      }
+
+      switch (scanner.peek()) {
+        case quote:
+          isClosed = true;
+          break matchLoop;
+
+        case '&':
+          value += this.consumeReference();
+          continue;
+
+        case '<':
+          throw this.error('Unescaped `<` is not allowed in an attribute value');
+
+        default:
+          break matchLoop;
+      }
+    }
+
+    if (!isClosed) {
+      throw this.error('Unclosed attribute');
+    }
+
+    scanner.advance();
+    return value;
+  }
+
+  /**
+   * Consumes a CDATA section if possible.
+   *
+   * @returns Whether a CDATA section was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
+   */
+  consumeCdataSection(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    if (!scanner.consumeString('<![CDATA[')) {
+      return false;
+    }
+
+    let text = scanner.consumeUntilString(']]>');
+    this.validateChars(text);
+
+    if (!scanner.consumeString(']]>')) {
+      throw this.error('Unclosed CDATA section');
+    }
+
+    return this.options.preserveCdata
+      ? this.addNode(new XmlCdata(normalizeLineBreaks(text)), startIndex)
+      : this.addText(text, startIndex);
+  }
+
+  /**
+   * Consumes character data if possible.
+   *
+   * @returns Whether character data was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
+   */
+  consumeCharData(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+    let charData = scanner.consumeUntilMatch(syntax.endCharData);
+
+    if (!charData) {
+      return false;
+    }
+
+    this.validateChars(charData);
+
+    if (scanner.peek(3) === ']]>') {
+      throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
+    }
+
+    return this.addText(charData, startIndex);
+  }
+
+  /**
+   * Consumes a comment if possible.
+   *
+   * @returns Whether a comment was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
+   */
+  consumeComment(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    if (!scanner.consumeString('<!--')) {
+      return false;
+    }
+
+    let content = scanner.consumeUntilString('--');
+    this.validateChars(content);
+
+    if (!scanner.consumeString('-->')) {
+      if (scanner.peek(2) === '--') {
+        throw this.error("The string `--` isn't allowed inside a comment");
+      }
+
+      throw this.error('Unclosed comment');
+    }
+
+    return this.options.preserveComments
+      ? this.addNode(new XmlComment(normalizeLineBreaks(content)), startIndex)
+      : true;
+  }
+
+  /**
+   * Consumes a reference in a content context if possible.
+   *
+   * This differs from `consumeReference()` in that a consumed reference will be
+   * added to the document as a text node instead of returned.
+   *
+   * @returns Whether a reference was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
+   */
+  consumeContentReference(): boolean {
+    let startIndex = this.scanner.charIndex;
+    let ref = this.consumeReference();
+
+    return ref
+      ? this.addText(ref, startIndex)
+      : false;
+  }
+
+  /**
+   * Consumes a doctype declaration if possible.
+   *
+   * This is a loose implementation since doctype declarations are currently
+   * discarded without further parsing.
+   *
+   * @returns Whether a doctype declaration was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
+   */
+  consumeDoctypeDeclaration(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    if (!scanner.consumeString('<!DOCTYPE')) {
+      return false;
+    }
+
+    let name = this.consumeWhitespace()
+      && this.consumeName();
+
+    if (!name) {
+      throw this.error('Expected a name');
+    }
+
+    let publicId;
+    let systemId;
+
+    if (this.consumeWhitespace()) {
+      if (scanner.consumeString('PUBLIC')) {
+        publicId = this.consumeWhitespace()
+          && this.consumePubidLiteral();
+
+        if (publicId === false) {
+          throw this.error('Expected a public identifier');
+        }
+
+        this.consumeWhitespace();
+      }
+
+      if (publicId !== undefined || scanner.consumeString('SYSTEM')) {
+        this.consumeWhitespace();
+        systemId = this.consumeSystemLiteral();
+
+        if (systemId === false) {
+          throw this.error('Expected a system identifier');
+        }
+
+        this.consumeWhitespace();
+      }
+    }
+
+    let internalSubset;
+
+    if (scanner.consumeString('[')) {
+      // The internal subset may contain comments that contain `]` characters,
+      // so we can't use `consumeUntilString()` here.
+      internalSubset = scanner.consumeUntilMatch(/\][\x20\t\r\n]*>/);
+
+      if (!scanner.consumeString(']')) {
+        throw this.error('Unclosed internal subset');
+      }
+
+      this.consumeWhitespace();
+    }
+
+    if (!scanner.consumeString('>')) {
+      throw this.error('Unclosed doctype declaration');
+    }
+
+    return this.options.preserveDocumentType
+      ? this.addNode(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex)
+      : true;
+    }
+
+  /**
+   * Consumes an element if possible.
+   *
+   * @returns Whether an element was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
+   */
+  consumeElement(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    if (!scanner.consumeString('<')) {
+      return false;
+    }
+
+    let name = this.consumeName();
+
+    if (!name) {
+      scanner.reset(startIndex);
+      return false;
+    }
+
+    let attributes = this.consumeAttributes();
+    let isEmpty = !!scanner.consumeString('/>');
+    let element = new XmlElement(name, attributes);
+
+    element.parent = this.currentNode;
+
+    if (!isEmpty) {
+      if (!scanner.consumeString('>')) {
+        throw this.error(`Unclosed start tag for element \`${name}\``);
+      }
+
+      this.currentNode = element;
+
+      do {
+        this.consumeCharData();
+      } while (
+        this.consumeElement()
+          || this.consumeContentReference()
+          || this.consumeCdataSection()
+          || this.consumeProcessingInstruction()
+          || this.consumeComment()
+      );
+
+      let endTagMark = scanner.charIndex;
+      let endTagName;
+
+      if (!scanner.consumeString('</')
+          || !(endTagName = this.consumeName())
+          || endTagName !== name) {
+
+        scanner.reset(endTagMark);
+        throw this.error(`Missing end tag for element ${name}`);
+      }
+
+      this.consumeWhitespace();
+
+      if (!scanner.consumeString('>')) {
+        throw this.error(`Unclosed end tag for element ${name}`);
+      }
+
+      this.currentNode = element.parent;
+    }
+
+    return this.addNode(element, startIndex);
+  }
+
+  /**
+   * Consumes an `Eq` production if possible.
+   *
+   * @returns Whether an `Eq` production was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
+   */
+  consumeEqual(): boolean {
+    this.consumeWhitespace();
+
+    if (this.scanner.consumeString('=')) {
+      this.consumeWhitespace();
+      return true;
+    }
+
+    return false;
+  }
+
+  /**
+   * Consumes `Misc` content if possible.
+   *
+   * @returns Whether anything was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
+   */
+  consumeMisc(): boolean {
+    return this.consumeComment()
+      || this.consumeProcessingInstruction()
+      || this.consumeWhitespace();
+  }
+
+  /**
+   * Consumes one or more `Name` characters if possible.
+   *
+   * @returns `Name` characters, or an empty string if none were consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
+   */
+  consumeName(): string {
+    return syntax.isNameStartChar(this.scanner.peek())
+      ? this.scanner.consumeMatchFn(syntax.isNameChar)
+      : emptyString;
+  }
+
+  /**
+   * Consumes a processing instruction if possible.
+   *
+   * @returns Whether a processing instruction was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
+   */
+  consumeProcessingInstruction(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    if (!scanner.consumeString('<?')) {
+      return false;
+    }
+
+    let name = this.consumeName();
+
+    if (name) {
+      if (name.toLowerCase() === 'xml') {
+        scanner.reset(startIndex);
+        throw this.error("XML declaration isn't allowed here");
+      }
+    } else {
+      throw this.error('Invalid processing instruction');
+    }
+
+    if (!this.consumeWhitespace()) {
+      if (scanner.consumeString('?>')) {
+        return this.addNode(new XmlProcessingInstruction(name), startIndex);
+      }
+
+      throw this.error('Whitespace is required after a processing instruction name');
+    }
+
+    let content = scanner.consumeUntilString('?>');
+    this.validateChars(content);
+
+    if (!scanner.consumeString('?>')) {
+      throw this.error('Unterminated processing instruction');
+    }
+
+    return this.addNode(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
+  }
+
+  /**
+   * Consumes a prolog if possible.
+   *
+   * @returns Whether a prolog was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
+   */
+  consumeProlog(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    this.consumeXmlDeclaration();
+
+    while (this.consumeMisc()) {} // eslint-disable-line no-empty
+
+    if (this.consumeDoctypeDeclaration()) {
+      while (this.consumeMisc()) {} // eslint-disable-line no-empty
+    }
+
+    return startIndex < scanner.charIndex;
+  }
+
+  /**
+   * Consumes a public identifier literal if possible.
+   *
+   * @returns
+   *   Value of the public identifier literal minus quotes, or `false` if
+   *   nothing was consumed. An empty string indicates that a public id literal
+   *   was consumed but was empty.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
+   */
+  consumePubidLiteral(): string | false {
+    let startIndex = this.scanner.charIndex;
+    let value = this.consumeSystemLiteral();
+
+    if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
+      this.scanner.reset(startIndex);
+      throw this.error('Invalid character in public identifier');
+    }
+
+    return value;
+  }
+
+  /**
+   * Consumes a reference if possible.
+   *
+   * This differs from `consumeContentReference()` in that a consumed reference
+   * will be returned rather than added to the document.
+   *
+   * @returns
+   *   Parsed reference value, or `false` if nothing was consumed (to
+   *   distinguish from a reference that resolves to an empty string).
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
+   */
+  consumeReference(): string | false {
+    let { scanner } = this;
+
+    if (!scanner.consumeString('&')) {
+      return false;
+    }
+
+    let ref = scanner.consumeMatchFn(syntax.isReferenceChar);
+
+    if (scanner.consume() !== ';') {
+      throw this.error('Unterminated reference (a reference must end with `;`)');
+    }
+
+    let parsedValue;
+
+    if (ref[0] === '#') {
+      // This is a character reference.
+      let codePoint = ref[1] === 'x'
+        ? parseInt(ref.slice(2), 16) // Hex codepoint.
+        : parseInt(ref.slice(1), 10); // Decimal codepoint.
+
+      if (isNaN(codePoint)) {
+        throw this.error('Invalid character reference');
+      }
+
+      if (!syntax.isXmlCodePoint(codePoint)) {
+        throw this.error('Character reference resolves to an invalid character');
+      }
+
+      parsedValue = String.fromCodePoint(codePoint);
+    } else {
+      // This is an entity reference.
+      parsedValue = syntax.predefinedEntities[ref];
+
+      if (parsedValue === undefined) {
+        let {
+          ignoreUndefinedEntities,
+          resolveUndefinedEntity,
+        } = this.options;
+
+        let wrappedRef = `&${ref};`; // for backcompat with <= 2.x
+
+        if (resolveUndefinedEntity) {
+          let resolvedValue = resolveUndefinedEntity(wrappedRef);
+
+          if (resolvedValue !== null && resolvedValue !== undefined) {
+            let type = typeof resolvedValue;
+
+            if (type !== 'string') {
+              throw new TypeError(`\`resolveUndefinedEntity()\` must return a string, \`null\`, or \`undefined\`, but returned a value of type ${type}`);
+            }
+
+            return resolvedValue;
+          }
+        }
+
+        if (ignoreUndefinedEntities) {
+          return wrappedRef;
+        }
+
+        scanner.reset(-wrappedRef.length);
+        throw this.error(`Named entity isn't defined: ${wrappedRef}`);
+      }
+    }
+
+    return parsedValue;
+  }
+
+  /**
+   * Consumes a `SystemLiteral` if possible.
+   *
+   * A `SystemLiteral` is similar to an attribute value, but allows the
+   * characters `<` and `&` and doesn't replace references.
+   *
+   * @returns
+   *   Value of the `SystemLiteral` minus quotes, or `false` if nothing was
+   *   consumed. An empty string indicates that a `SystemLiteral` was consumed
+   *   but was empty.
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
+   */
+  consumeSystemLiteral(): string | false {
+    let { scanner } = this;
+    let quote = scanner.consumeString('"') || scanner.consumeString("'");
+
+    if (!quote) {
+      return false;
+    }
+
+    let value = scanner.consumeUntilString(quote);
+    this.validateChars(value);
+
+    if (!scanner.consumeString(quote)) {
+      throw this.error('Missing end quote');
+    }
+
+    return value;
+  }
+
+  /**
+   * Consumes one or more whitespace characters if possible.
+   *
+   * @returns Whether any whitespace characters were consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
+   */
+  consumeWhitespace(): boolean {
+    return !!this.scanner.consumeMatchFn(syntax.isWhitespace);
+  }
+
+  /**
+   * Consumes an XML declaration if possible.
+   *
+   * @returns Whether an XML declaration was consumed.
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
+   */
+  consumeXmlDeclaration(): boolean {
+    let { scanner } = this;
+    let startIndex = scanner.charIndex;
+
+    if (!scanner.consumeString('<?xml')) {
+      return false;
+    }
+
+    if (!this.consumeWhitespace()) {
+      throw this.error('Invalid XML declaration');
+    }
+
+    let version = !!scanner.consumeString('version')
+      && this.consumeEqual()
+      && this.consumeSystemLiteral();
+
+    if (version === false) {
+      throw this.error('XML version is missing or invalid');
+    } else if (!/^1\.[0-9]+$/.test(version)) {
+      throw this.error('Invalid character in version number');
+    }
+
+    let encoding;
+    let standalone;
+
+    if (this.consumeWhitespace()) {
+      encoding = !!scanner.consumeString('encoding')
+        && this.consumeEqual()
+        && this.consumeSystemLiteral();
+
+      if (encoding) {
+        if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
+          throw this.error('Invalid character in encoding name');
+        }
+        this.consumeWhitespace();
+      }
+
+      standalone = !!scanner.consumeString('standalone')
+        && this.consumeEqual()
+        && this.consumeSystemLiteral();
+
+      if (standalone) {
+        if (standalone !== 'yes' && standalone !== 'no') {
+          throw this.error('Only "yes" and "no" are permitted as values of `standalone`');
+        }
+
+        this.consumeWhitespace();
+      }
+    }
+
+    if (!scanner.consumeString('?>')) {
+      throw this.error('Invalid or unclosed XML declaration');
+    }
+
+    return this.options.preserveXmlDeclaration
+      ? this.addNode(new XmlDeclaration(
+          version,
+          encoding || undefined,
+          (standalone as 'yes' | 'no' | false) || undefined,
+        ), startIndex)
+      : true;
+  }
+
+  /**
+   * Returns an `XmlError` for the current scanner position.
+   */
+  error(message: string) {
+    let { scanner } = this;
+    return new XmlError(message, scanner.charIndex, scanner.string);
+  }
+
+  /**
+   * Parses the XML input.
+   */
+  parse() {
+    this.scanner.consumeString('\uFEFF'); // byte order mark
+    this.consumeProlog();
+
+    if (!this.consumeElement()) {
+      throw this.error('Root element is missing or invalid');
+    }
+
+    while (this.consumeMisc()) {} // eslint-disable-line no-empty
+
+    if (!this.scanner.isEnd) {
+      throw this.error('Extra content at the end of the document');
+    }
+  }
+
+  /**
+   * Throws an invalid character error if any character in the given _string_
+   * isn't a valid XML character.
+   */
+  validateChars(string: string) {
+    let { length } = string;
+
+    for (let i = 0; i < length; ++i) {
+      let cp = string.codePointAt(i) as number;
+
+      if (!syntax.isXmlCodePoint(cp)) {
+        this.scanner.reset(-([ ...string ].length - i));
+        throw this.error('Invalid character');
+      }
+
+      if (cp > 65535) {
+        i += 1;
+      }
+    }
+  }
+}
+
+// -- Private Functions --------------------------------------------------------
+
+/**
+ * Normalizes line breaks in the given text by replacing CRLF sequences and lone
+ * CR characters with LF characters.
+ */
+function normalizeLineBreaks(text: string): string {
+  let i = 0;
+
+  while ((i = text.indexOf('\r', i)) !== -1) {
+    text = text[i + 1] === '\n'
+      ? text.slice(0, i) + text.slice(i + 1)
+      : text.slice(0, i) + '\n' + text.slice(i + 1);
+  }
+
+  return text;
+}
+
+// -- Types --------------------------------------------------------------------
+export type ParserOptions = {
+  /**
+   * When `true`, an undefined named entity (like "&bogus;") will be left in the
+   * output as is instead of causing a parse error.
+   *
+   * @default false
+   */
+  ignoreUndefinedEntities?: boolean;
+
+  /**
+   * When `true`, the starting and ending byte offsets of each node in the input
+   * string will be made available via `start` and `end` properties on the node.
+   *
+   * @default false
+   */
+  includeOffsets?: boolean;
+
+  /**
+   * When `true`, CDATA sections will be preserved in the document as `XmlCdata`
+   * nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
+   * which keeps the node tree simpler and easier to work with.
+   *
+   * @default false
+   */
+  preserveCdata?: boolean;
+
+  /**
+   * When `true`, comments will be preserved in the document as `XmlComment`
+   * nodes. Otherwise comments will not be included in the node tree.
+   *
+   * @default false
+   */
+  preserveComments?: boolean;
+
+  /**
+   * When `true`, a document type declaration (if present) will be preserved in
+   * the document as an `XmlDocumentType` node. Otherwise the declaration will
+   * not be included in the node tree.
+   *
+   * Note that when this is `true` and a document type declaration is present,
+   * the DTD will precede the root node in the node tree (normally the root
+   * node would be first).
+   *
+   * @default false
+   */
+  preserveDocumentType?: boolean;
+
+  /**
+   * When `true`, an XML declaration (if present) will be preserved in the
+   * document as an `XmlDeclaration` node. Otherwise the declaration will not be
+   * included in the node tree.
+   *
+   * Note that when this is `true` and an XML declaration is present, the
+   * XML declaration will be the first child of the document (normally the root
+   * node would be first).
+   *
+   * @default false
+   */
+  preserveXmlDeclaration?: boolean;
+
+  /**
+   * When an undefined named entity is encountered, this function will be called
+   * with the entity as its only argument. It should return a string value with
+   * which to replace the entity, or `null` or `undefined` to treat the entity
+   * as undefined (which may result in a parse error depending on the value of
+   * `ignoreUndefinedEntities`).
+   */
+  resolveUndefinedEntity?: (entity: string) => string | null | undefined;
+
+  /**
+   * When `true`, attributes in an element's `attributes` object will be sorted
+   * in alphanumeric order by name. Otherwise they'll retain their original
+   * order as found in the XML.
+   *
+   * @default false
+   */
+  sortAttributes?: boolean;
+};
--- a/node_modules/@rgrove/parse-xml/src/lib/StringScanner.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/StringScanner.ts
@ -0,0 +1,217 @@
+const emptyString = '';
+const surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
+
+/** @private */
+export class StringScanner {
+  charIndex: number;
+  readonly string: string;
+
+  private readonly charCount: number;
+  private readonly charsToBytes: number[] | undefined;
+  private readonly length: number;
+  private readonly multiByteMode: boolean;
+
+  constructor(string: string) {
+    this.charCount = this.charLength(string, true);
+    this.charIndex = 0;
+    this.length = string.length;
+    this.multiByteMode = this.charCount !== this.length;
+    this.string = string;
+
+    if (this.multiByteMode) {
+      let charsToBytes = [];
+
+      // Create a mapping of character indexes to byte indexes. Since the string
+      // contains multibyte characters, a byte index may not necessarily align
+      // with a character index.
+      for (let byteIndex = 0, charIndex = 0; charIndex < this.charCount; ++charIndex) {
+        charsToBytes[charIndex] = byteIndex;
+        byteIndex += (string.codePointAt(byteIndex) as number) > 65535 ? 2 : 1;
+      }
+
+      this.charsToBytes = charsToBytes;
+    }
+  }
+
+  /**
+   * Whether the current character index is at the end of the input string.
+   */
+  get isEnd() {
+    return this.charIndex >= this.charCount;
+  }
+
+  // -- Protected Methods ------------------------------------------------------
+
+  /**
+   * Returns the number of characters in the given string, which may differ from
+   * the byte length if the string contains multibyte characters.
+   */
+  protected charLength(string: string, multiByteSafe = this.multiByteMode): number {
+    // We could get the char length with `[ ...string ].length`, but that's
+    // actually slower than replacing surrogate pairs with single-byte
+    // characters and then counting the result.
+    return multiByteSafe
+      ? string.replace(surrogatePair, '_').length
+      : string.length;
+  }
+
+  // -- Public Methods ---------------------------------------------------------
+
+  /**
+   * Advances the scanner by the given number of characters, stopping if the end
+   * of the string is reached.
+   */
+  advance(count = 1) {
+    this.charIndex = Math.min(this.charCount, this.charIndex + count);
+  }
+
+  /**
+   * Returns the byte index of the given character index in the string. The two
+   * may differ in strings that contain multibyte characters.
+   */
+  charIndexToByteIndex(charIndex: number = this.charIndex): number {
+    return this.multiByteMode
+      ? (this.charsToBytes as number[])[charIndex] ?? Infinity
+      : charIndex;
+  }
+
+  /**
+   * Consumes and returns the given number of characters if possible, advancing
+   * the scanner and stopping if the end of the string is reached.
+   *
+   * If no characters could be consumed, an empty string will be returned.
+   */
+  consume(charCount = 1): string {
+    let chars = this.peek(charCount);
+    this.advance(charCount);
+    return chars;
+  }
+
+  /**
+   * Consumes and returns the given number of bytes if possible, advancing the
+   * scanner and stopping if the end of the string is reached.
+   *
+   * It's up to the caller to ensure that the given byte count doesn't split a
+   * multibyte character.
+   *
+   * If no bytes could be consumed, an empty string will be returned.
+   */
+  consumeBytes(byteCount: number): string {
+    let byteIndex = this.charIndexToByteIndex();
+    let result = this.string.slice(byteIndex, byteIndex + byteCount);
+    this.advance(this.charLength(result));
+    return result;
+  }
+
+  /**
+   * Consumes and returns all characters for which the given function returns
+   * `true`, stopping when `false` is returned or the end of the input is
+   * reached.
+   */
+  consumeMatchFn(fn: (char: string) => boolean): string {
+    let { length, multiByteMode, string } = this;
+    let startByteIndex = this.charIndexToByteIndex();
+    let endByteIndex = startByteIndex;
+
+    if (multiByteMode) {
+      while (endByteIndex < length) {
+        let char = string[endByteIndex] as string;
+        let isSurrogatePair = char >= '\uD800' && char <= '\uDBFF';
+
+        if (isSurrogatePair) {
+          char += string[endByteIndex + 1];
+        }
+
+        if (!fn(char)) {
+          break;
+        }
+
+        endByteIndex += isSurrogatePair ? 2 : 1;
+      }
+    } else {
+      while (endByteIndex < length && fn(string[endByteIndex] as string)) {
+        ++endByteIndex;
+      }
+    }
+
+    return this.consumeBytes(endByteIndex - startByteIndex);
+  }
+
+  /**
+   * Consumes the given string if it exists at the current character index, and
+   * advances the scanner.
+   *
+   * If the given string doesn't exist at the current character index, an empty
+   * string will be returned and the scanner will not be advanced.
+   */
+  consumeString(stringToConsume: string): string {
+    let { length } = stringToConsume;
+    let byteIndex = this.charIndexToByteIndex();
+
+    if (stringToConsume === this.string.slice(byteIndex, byteIndex + length)) {
+      this.advance(length === 1 ? 1 : this.charLength(stringToConsume));
+      return stringToConsume;
+    }
+
+    return emptyString;
+  }
+
+  /**
+   * Consumes characters until the given global regex is matched, advancing the
+   * scanner up to (but not beyond) the beginning of the match. If the regex
+   * doesn't match, nothing will be consumed.
+   *
+   * Returns the consumed string, or an empty string if nothing was consumed.
+   */
+  consumeUntilMatch(regex: RegExp): string {
+    let matchByteIndex = this.string
+      .slice(this.charIndexToByteIndex())
+      .search(regex);
+
+    return matchByteIndex > 0
+      ? this.consumeBytes(matchByteIndex)
+      : emptyString;
+  }
+
+  /**
+   * Consumes characters until the given string is found, advancing the scanner
+   * up to (but not beyond) that point. If the string is never found, nothing
+   * will be consumed.
+   *
+   * Returns the consumed string, or an empty string if nothing was consumed.
+   */
+  consumeUntilString(searchString: string): string {
+    let byteIndex = this.charIndexToByteIndex();
+    let matchByteIndex = this.string.indexOf(searchString, byteIndex);
+
+    return matchByteIndex > 0
+      ? this.consumeBytes(matchByteIndex - byteIndex)
+      : emptyString;
+  }
+
+  /**
+   * Returns the given number of characters starting at the current character
+   * index, without advancing the scanner and without exceeding the end of the
+   * input string.
+   */
+  peek(count = 1): string {
+    let { charIndex, string } = this;
+
+    return this.multiByteMode
+      ? string.slice(this.charIndexToByteIndex(charIndex), this.charIndexToByteIndex(charIndex + count))
+      : string.slice(charIndex, charIndex + count);
+  }
+
+  /**
+   * Resets the scanner position to the given character _index_, or to the start
+   * of the input string if no index is given.
+   *
+   * If _index_ is negative, the scanner position will be moved backward by that
+   * many characters, stopping if the beginning of the string is reached.
+   */
+  reset(index = 0) {
+    this.charIndex = index >= 0
+      ? Math.min(this.charCount, index)
+      : Math.max(0, this.charIndex + index);
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlCdata.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlCdata.ts
@ -0,0 +1,11 @@
+import { XmlNode } from './XmlNode.js';
+import { XmlText } from './XmlText.js';
+
+/**
+ * A CDATA section within an XML document.
+ */
+export class XmlCdata extends XmlText {
+  override get type() {
+    return XmlNode.TYPE_CDATA;
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlComment.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlComment.ts
@ -0,0 +1,26 @@
+import { XmlNode } from './XmlNode.js';
+
+/**
+ * A comment within an XML document.
+ */
+export class XmlComment extends XmlNode {
+  /**
+   * Content of this comment.
+   */
+  content: string;
+
+  constructor(content = '') {
+    super();
+    this.content = content;
+  }
+
+  override get type() {
+    return XmlNode.TYPE_COMMENT;
+  }
+
+  override toJSON() {
+    return Object.assign(XmlNode.prototype.toJSON.call(this), {
+      content: this.content,
+    });
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlDeclaration.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlDeclaration.ts
@ -0,0 +1,58 @@
+import { XmlNode } from './XmlNode.js';
+
+/**
+ * An XML declaration within an XML document.
+ *
+ * @example
+ *
+ * ```xml
+ * <?xml version="1.0" encoding="UTF-8"?>
+ * ```
+ */
+export class XmlDeclaration extends XmlNode {
+  /**
+   * Value of the encoding declaration in this XML declaration, or `null` if no
+   * encoding declaration was present.
+   */
+  encoding: string | null;
+
+  /**
+   * Value of the standalone declaration in this XML declaration, or `null` if
+   * no standalone declaration was present.
+   */
+  standalone: 'yes' | 'no' | null;
+
+  /**
+   * Value of the version declaration in this XML declaration.
+   */
+  version: string;
+
+  constructor(
+    version: string,
+    encoding?: string,
+    standalone?: typeof XmlDeclaration.prototype.standalone,
+  ) {
+    super();
+
+    this.version = version;
+    this.encoding = encoding ?? null;
+    this.standalone = standalone ?? null;
+  }
+
+  override get type() {
+    return XmlNode.TYPE_XML_DECLARATION;
+  }
+
+  override toJSON() {
+    let json = XmlNode.prototype.toJSON.call(this);
+    json.version = this.version;
+
+    for (let key of ['encoding', 'standalone'] as const) {
+      if (this[key] !== null) {
+        json[key] = this[key];
+      }
+    }
+
+    return json;
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlDocument.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlDocument.ts
@ -0,0 +1,59 @@
+import { XmlElement } from './XmlElement.js';
+import { XmlNode } from './XmlNode.js';
+
+import type { XmlComment } from './XmlComment.js';
+import type { XmlDeclaration } from './XmlDeclaration.js';
+import type { XmlDocumentType } from './XmlDocumentType.js';
+import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
+
+/**
+ * Represents an XML document. All elements within the document are descendants
+ * of this node.
+ */
+export class XmlDocument extends XmlNode {
+  /**
+   * Child nodes of this document.
+   */
+  readonly children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlProcessingInstruction | XmlElement>;
+
+  constructor(children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlElement | XmlProcessingInstruction> = []) {
+    super();
+    this.children = children;
+  }
+
+  override get document() {
+    return this;
+  }
+
+  /**
+   * Root element of this document, or `null` if this document is empty.
+   */
+  get root(): XmlElement | null {
+    for (let child of this.children) {
+      if (child instanceof XmlElement) {
+        return child;
+      }
+    }
+
+    return null;
+  }
+
+  /**
+   * Text content of this document and all its descendants.
+   */
+  get text(): string {
+    return this.children
+      .map(child => 'text' in child ? child.text : '')
+      .join('');
+  }
+
+  override get type() {
+    return XmlNode.TYPE_DOCUMENT;
+  }
+
+  override toJSON() {
+    return Object.assign(XmlNode.prototype.toJSON.call(this), {
+      children: this.children.map(child => child.toJSON()),
+    });
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlDocumentType.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlDocumentType.ts
@ -0,0 +1,67 @@
+import { XmlNode } from './XmlNode.js';
+
+/**
+ * A document type declaration within an XML document.
+ *
+ * @example
+ *
+ * ```xml
+ * <!DOCTYPE kittens [
+ *   <!ELEMENT kittens (#PCDATA)>
+ * ]>
+ * ```
+ */
+export class XmlDocumentType extends XmlNode {
+  /**
+   * Name of the root element described by this document type declaration.
+   */
+  name: string;
+
+  /**
+   * Public identifier of the external subset of this document type declaration,
+   * or `null` if no public identifier was present.
+   */
+  publicId: string | null;
+
+  /**
+   * System identifier of the external subset of this document type declaration,
+   * or `null` if no system identifier was present.
+   */
+  systemId: string | null;
+
+  /**
+   * Internal subset of this document type declaration, or `null` if no internal
+   * subset was present.
+   */
+  internalSubset: string | null;
+
+  constructor(
+    name: string,
+    publicId?: string,
+    systemId?: string,
+    internalSubset?: string,
+  ) {
+    super();
+    this.name = name;
+    this.publicId = publicId ?? null;
+    this.systemId = systemId ?? null;
+    this.internalSubset = internalSubset ?? null;
+  }
+
+  override get type() {
+    return XmlNode.TYPE_DOCUMENT_TYPE;
+  }
+
+  override toJSON() {
+    let json = XmlNode.prototype.toJSON.call(this);
+    json.name = this.name;
+
+    for (let key of ['publicId', 'systemId', 'internalSubset'] as const) {
+      if (this[key] !== null) {
+        json[key] = this[key];
+      }
+    }
+
+    return json;
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlElement.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlElement.ts
@ -0,0 +1,81 @@
+import { XmlNode } from './XmlNode.js';
+
+import type { JsonObject } from './types.js';
+import type { XmlCdata } from './XmlCdata.js';
+import type { XmlComment } from './XmlComment.js';
+import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
+import type { XmlText } from './XmlText.js';
+
+/**
+ * Element in an XML document.
+ */
+export class XmlElement extends XmlNode {
+  /**
+   * Attributes on this element.
+   */
+  attributes: {[attrName: string]: string};
+
+  /**
+   * Child nodes of this element.
+   */
+  children: Array<XmlCdata | XmlComment | XmlElement | XmlProcessingInstruction | XmlText>;
+
+  /**
+   * Name of this element.
+   */
+  name: string;
+
+  constructor(
+    name: string,
+    attributes: {[attrName: string]: string} = Object.create(null),
+    children: Array<XmlCdata | XmlComment | XmlElement | XmlProcessingInstruction | XmlText> = [],
+  ) {
+    super();
+
+    this.name = name;
+    this.attributes = attributes;
+    this.children = children;
+  }
+
+  /**
+   * Whether this element is empty (meaning it has no children).
+   */
+  get isEmpty(): boolean {
+    return this.children.length === 0;
+  }
+
+  override get preserveWhitespace(): boolean {
+    let node: XmlNode | null = this; // eslint-disable-line @typescript-eslint/no-this-alias
+
+    while (node instanceof XmlElement) {
+      if ('xml:space' in node.attributes) {
+        return node.attributes['xml:space'] === 'preserve';
+      }
+
+      node = node.parent;
+    }
+
+    return false;
+  }
+
+  /**
+   * Text content of this element and all its descendants.
+   */
+  get text(): string {
+    return this.children
+      .map(child => 'text' in child ? child.text : '')
+      .join('');
+  }
+
+  override get type() {
+    return XmlNode.TYPE_ELEMENT;
+  }
+
+  override toJSON(): JsonObject {
+    return Object.assign(XmlNode.prototype.toJSON.call(this), {
+      name: this.name,
+      attributes: this.attributes,
+      children: this.children.map(child => child.toJSON()),
+    });
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlError.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlError.ts
@ -0,0 +1,80 @@
+/**
+ * An error that occurred while parsing XML.
+ */
+export class XmlError extends Error {
+  /**
+   * Character column at which this error occurred (1-based).
+   */
+  readonly column: number;
+
+  /**
+   * Short excerpt from the input string that contains the problem.
+   */
+  readonly excerpt: string;
+
+  /**
+   * Line number at which this error occurred (1-based).
+   */
+  readonly line: number;
+
+  /**
+   * Character position at which this error occurred relative to the beginning
+   * of the input (0-based).
+   */
+  readonly pos: number;
+
+  constructor(
+    message: string,
+    charIndex: number,
+    xml: string,
+  ) {
+    let column = 1;
+    let excerpt = '';
+    let line = 1;
+
+    // Find the line and column where the error occurred.
+    for (let i = 0; i < charIndex; ++i) {
+      let char = xml[i];
+
+      if (char === '\n') {
+        column = 1;
+        excerpt = '';
+        line += 1;
+      } else {
+        column += 1;
+        excerpt += char;
+      }
+    }
+
+    let eol = xml.indexOf('\n', charIndex);
+
+    excerpt += eol === -1
+      ? xml.slice(charIndex)
+      : xml.slice(charIndex, eol);
+
+    let excerptStart = 0;
+
+    // Keep the excerpt below 50 chars, but always keep the error position in
+    // view.
+    if (excerpt.length > 50) {
+      if (column < 40) {
+        excerpt = excerpt.slice(0, 50);
+      } else {
+        excerptStart = column - 20;
+        excerpt = excerpt.slice(excerptStart, column + 30);
+      }
+    }
+
+    super(
+      `${message} (line ${line}, column ${column})\n`
+        + `  ${excerpt}\n`
+        + ' '.repeat(column - excerptStart + 1) + '^\n',
+    );
+
+    this.column = column;
+    this.excerpt = excerpt;
+    this.line = line;
+    this.name = 'XmlError';
+    this.pos = charIndex;
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlNode.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlNode.ts
@ -0,0 +1,137 @@
+import type { JsonObject } from './types.js';
+import type { XmlDocument } from './XmlDocument.js';
+import type { XmlElement } from './XmlElement.js';
+
+/**
+ * Base interface for a node in an XML document.
+ */
+export class XmlNode {
+  /**
+   * Type value for an `XmlCdata` node.
+   */
+  static readonly TYPE_CDATA = 'cdata';
+
+  /**
+   * Type value for an `XmlComment` node.
+   */
+  static readonly TYPE_COMMENT = 'comment';
+
+  /**
+   * Type value for an `XmlDocument` node.
+   */
+  static readonly TYPE_DOCUMENT = 'document';
+
+  /**
+   * Type value for an `XmlDocumentType` node.
+   */
+  static readonly TYPE_DOCUMENT_TYPE = 'doctype';
+
+  /**
+   * Type value for an `XmlElement` node.
+   */
+  static readonly TYPE_ELEMENT = 'element';
+
+  /**
+   * Type value for an `XmlProcessingInstruction` node.
+   */
+  static readonly TYPE_PROCESSING_INSTRUCTION = 'pi';
+
+  /**
+   * Type value for an `XmlText` node.
+   */
+  static readonly TYPE_TEXT = 'text';
+
+  /**
+   * Type value for an `XmlDeclaration` node.
+   */
+  static readonly TYPE_XML_DECLARATION = 'xmldecl';
+
+  /**
+   * Parent node of this node, or `null` if this node has no parent.
+   */
+  parent: XmlDocument | XmlElement | null = null;
+
+  /**
+   * Starting byte offset of this node in the original XML string, or `-1` if
+   * the offset is unknown.
+   */
+  start = -1;
+
+  /**
+   * Ending byte offset of this node in the original XML string, or `-1` if the
+   * offset is unknown.
+   */
+  end = -1;
+
+  /**
+   * Document that contains this node, or `null` if this node is not associated
+   * with a document.
+   */
+  get document(): XmlDocument | null {
+    return this.parent?.document ?? null;
+  }
+
+  /**
+   * Whether this node is the root node of the document (also known as the
+   * document element).
+   */
+  get isRootNode(): boolean {
+    return this.parent !== null
+      && this.parent === this.document
+      && this.type === XmlNode.TYPE_ELEMENT;
+  }
+
+  /**
+   * Whether whitespace should be preserved in the content of this element and
+   * its children.
+   *
+   * This is influenced by the value of the special `xml:space` attribute, and
+   * will be `true` for any node whose `xml:space` attribute is set to
+   * "preserve". If a node has no such attribute, it will inherit the value of
+   * the nearest ancestor that does (if any).
+   *
+   * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
+   */
+  get preserveWhitespace(): boolean {
+    return !!this.parent?.preserveWhitespace;
+  }
+
+  /**
+   * Type of this node.
+   *
+   * The value of this property is a string that matches one of the static
+   * `TYPE_*` properties on the `XmlNode` class (e.g. `TYPE_ELEMENT`,
+   * `TYPE_TEXT`, etc.).
+   *
+   * The `XmlNode` class itself is a base class and doesn't have its own type
+   * name.
+   */
+  get type() {
+    return '';
+  }
+
+  /**
+   * Returns a JSON-serializable object representing this node, minus properties
+   * that could result in circular references.
+   */
+  toJSON(): JsonObject {
+    let json: JsonObject = {
+      type: this.type,
+    };
+
+    if (this.isRootNode) {
+      json.isRootNode = true;
+    }
+
+    if (this.preserveWhitespace) {
+      json.preserveWhitespace = true;
+    }
+
+    if (this.start !== -1) {
+      json.start = this.start;
+      json.end = this.end;
+    }
+
+    return json;
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlProcessingInstruction.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlProcessingInstruction.ts
@ -0,0 +1,35 @@
+import { XmlNode } from './XmlNode.js';
+
+/**
+ * A processing instruction within an XML document.
+ */
+export class XmlProcessingInstruction extends XmlNode {
+  /**
+   * Content of this processing instruction.
+   */
+  content: string;
+
+  /**
+   * Name of this processing instruction. Also sometimes referred to as the
+   * processing instruction "target".
+   */
+  name: string;
+
+  constructor(name: string, content = '') {
+    super();
+
+    this.name = name;
+    this.content = content;
+  }
+
+  override get type() {
+    return XmlNode.TYPE_PROCESSING_INSTRUCTION;
+  }
+
+  override toJSON() {
+    return Object.assign(XmlNode.prototype.toJSON.call(this), {
+      name: this.name,
+      content: this.content,
+    });
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/XmlText.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/XmlText.ts
@ -0,0 +1,26 @@
+import { XmlNode } from './XmlNode.js';
+
+/**
+ * Text content within an XML document.
+ */
+export class XmlText extends XmlNode {
+  /**
+   * Text content of this node.
+   */
+  text: string;
+
+  constructor(text = '') {
+    super();
+    this.text = text;
+  }
+
+  override get type() {
+    return XmlNode.TYPE_TEXT;
+  }
+
+  override toJSON() {
+    return Object.assign(XmlNode.prototype.toJSON.call(this), {
+      text: this.text,
+    });
+  }
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/syntax.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/syntax.ts
@ -0,0 +1,130 @@
+/**
+ * Regular expression that matches one or more `AttValue` characters in a
+ * double-quoted attribute value.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
+ */
+export const attValueCharDoubleQuote = /["&<]/;
+
+/**
+ * Regular expression that matches one or more `AttValue` characters in a
+ * single-quoted attribute value.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
+ */
+export const attValueCharSingleQuote = /['&<]/;
+
+/**
+ * Regular expression that matches a whitespace character that should be
+ * normalized to a space character in an attribute value.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#AVNormalize
+ */
+export const attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
+
+/**
+ * Regular expression that matches one or more characters that signal the end of
+ * XML `CharData` content.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
+ */
+export const endCharData = /<|&|]]>/;
+
+/**
+ * Mapping of predefined entity names to their replacement values.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-predefined-ent
+ */
+export const predefinedEntities: Readonly<{[name: string]: string;}> = Object.freeze(Object.assign(Object.create(null), {
+  amp: '&',
+  apos: "'",
+  gt: '>',
+  lt: '<',
+  quot: '"',
+}));
+
+/**
+ * Returns `true` if _char_ is an XML `NameChar`, `false` if it isn't.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
+ */
+export function isNameChar(char: string): boolean {
+  let cp = char.codePointAt(0) as number;
+
+  // Including the most common NameStartChars here improves performance
+  // slightly.
+  return (cp >= 0x61 && cp <= 0x7A) // a-z
+    || (cp >= 0x41 && cp <= 0x5A) // A-Z
+    || (cp >= 0x30 && cp <= 0x39) // 0-9
+    || cp === 0x2D // -
+    || cp === 0x2E // .
+    || cp === 0xB7
+    || (cp >= 0x300 && cp <= 0x36F)
+    || cp === 0x203F
+    || cp === 0x2040
+    || isNameStartChar(char, cp);
+}
+
+/**
+ * Returns `true` if _char_ is an XML `NameStartChar`, `false` if it isn't.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
+ */
+export function isNameStartChar(char: string, cp = char.codePointAt(0) as number): boolean {
+  return (cp >= 0x61 && cp <= 0x7A) // a-z
+    || (cp >= 0x41 && cp <= 0x5A) // A-Z
+    || cp === 0x3A // :
+    || cp === 0x5F // _
+    || (cp >= 0xC0 && cp <= 0xD6)
+    || (cp >= 0xD8 && cp <= 0xF6)
+    || (cp >= 0xF8 && cp <= 0x2FF)
+    || (cp >= 0x370 && cp <= 0x37D)
+    || (cp >= 0x37F && cp <= 0x1FFF)
+    || cp === 0x200C
+    || cp === 0x200D
+    || (cp >= 0x2070 && cp <= 0x218F)
+    || (cp >= 0x2C00 && cp <= 0x2FEF)
+    || (cp >= 0x3001 && cp <= 0xD7FF)
+    || (cp >= 0xF900 && cp <= 0xFDCF)
+    || (cp >= 0xFDF0 && cp <= 0xFFFD)
+    || (cp >= 0x10000 && cp <= 0xEFFFF);
+}
+
+/**
+ * Returns `true` if _char_ is a valid reference character (which may appear
+ * between `&` and `;` in a reference), `false` otherwise.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-references
+ */
+export function isReferenceChar(char: string): boolean {
+  return char === '#' || isNameChar(char);
+}
+
+/**
+ * Returns `true` if _char_ is an XML whitespace character, `false` otherwise.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
+ */
+export function isWhitespace(char: string): boolean {
+  let cp = char.codePointAt(0);
+
+  return cp === 0x20
+    || cp === 0x9
+    || cp === 0xA
+    || cp === 0xD;
+}
+
+/**
+ * Returns `true` if _codepoint_ is a valid XML `Char` code point, `false`
+ * otherwise.
+ *
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
+ */
+export function isXmlCodePoint(cp: number): boolean {
+  return (cp >= 0x20 && cp <= 0xD7FF)
+    || cp === 0xA
+    || cp === 0x9
+    || cp === 0xD
+    || (cp >= 0xE000 && cp <= 0xFFFD)
+    || (cp >= 0x10000 && cp <= 0x10FFFF);
+}
--- a/node_modules/@rgrove/parse-xml/src/lib/types.ts
+++ b/node_modules/@rgrove/parse-xml/src/lib/types.ts
@ -0,0 +1,2 @@
+export type JsonObject = {[key in string]?: JsonValue};
+export type JsonValue = string | number | boolean | JsonObject | JsonValue[] | null;