All files / xml / parse_stream.ts

100.00% Branches 8/8
100.00% Functions 3/3
100.00% Lines 32/32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
 
 
 
 
 
 
 
 
 
 
x4
x4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x4
x4
x4
x4
 
x41
x41
x41
x41
 
x41
x94
x94
x27
x27
x41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x4
x4
x4
x4
 
x7
x7
x7
 
 
x4
x4
 
x7
x7
x17
x17
 
x7
x7
x7




























































































































// Copyright 2018-2026 the Deno authors. MIT license.
// This module is browser compatible.

/**
 * Streaming XML parser with callback-based API for maximum throughput.
 *
 * @module
 */

import type { ParseStreamOptions, XmlEventCallbacks } from "./types.ts";
import { XmlTokenizer } from "./_tokenizer.ts";
import { XmlEventParser } from "./_parser.ts";

export type { ParseStreamOptions, XmlEventCallbacks } from "./types.ts";

/**
 * Parse XML from a stream with maximum throughput using direct callbacks.
 *
 * This function provides the highest performance streaming XML parsing by
 * invoking callbacks directly without creating intermediate event objects.
 * Use this when you need maximum throughput and are comfortable with the
 * callback-based API.
 *
 * @example Collecting data from elements
 * ```ts
 * import { parseXmlStream } from "@std/xml/parse-stream";
 * import { assertEquals } from "@std/assert";
 *
 * const xml = `<root><item id="1">First</item><item id="2">Second</item></root>`;
 * const stream = ReadableStream.from([xml]);
 *
 * const items: string[] = [];
 * let currentText = "";
 * let inItem = false;
 *
 * await parseXmlStream(stream, {
 *   onStartElement(name) {
 *     if (name === "item") {
 *       inItem = true;
 *       currentText = "";
 *     }
 *   },
 *   onText(text) {
 *     if (inItem) currentText += text;
 *   },
 *   onEndElement(name) {
 *     if (name === "item") {
 *       items.push(currentText);
 *       inItem = false;
 *     }
 *   },
 * });
 *
 * assertEquals(items, ["First", "Second"]);
 * ```
 *
 * @param source The async iterable of XML string chunks to parse.
 * @param callbacks The event callbacks invoked during parsing.
 * @param options Options for configuring the parser.
 * @returns A promise that resolves when parsing is complete.
 */
export async function parseXmlStream(
  source: AsyncIterable<string>,
  callbacks: XmlEventCallbacks,
  options: ParseStreamOptions = {},
): Promise<void> {
  const trackPosition = options.trackPosition ?? false;
  const disallowDoctype = options.disallowDoctype ?? true;
  const tokenizer = new XmlTokenizer({ trackPosition, disallowDoctype });
  const parser = new XmlEventParser(callbacks, options);

  for await (const chunk of source) {
    tokenizer.process(chunk, parser);
  }
  tokenizer.finalize(parser);
  parser.finalize();
}

/**
 * Parse XML from a byte stream with maximum throughput using direct callbacks.
 *
 * This is a convenience wrapper around {@linkcode parseXmlStream} that handles
 * text decoding. For pre-decoded text streams, use `parseXmlStream` directly.
 *
 * @example Basic usage
 * ```ts
 * import { parseXmlStreamFromBytes } from "@std/xml/parse-stream";
 * import { assertEquals } from "@std/assert";
 *
 * const xml = new TextEncoder().encode("<root>Hello</root>");
 * const stream = ReadableStream.from([xml]);
 *
 * let text = "";
 * await parseXmlStreamFromBytes(stream, {
 *   onText(t) { text += t; },
 * });
 *
 * assertEquals(text, "Hello");
 * ```
 *
 * @param source The async iterable of XML byte chunks to parse.
 * @param callbacks The event callbacks invoked during parsing.
 * @param options Options for configuring the parser.
 * @returns A promise that resolves when parsing is complete.
 */
export function parseXmlStreamFromBytes(
  source: AsyncIterable<Uint8Array>,
  callbacks: XmlEventCallbacks,
  options: ParseStreamOptions = {},
): Promise<void> {
  const textStream = decodeAsyncIterable(source);
  return parseXmlStream(textStream, callbacks, options);
}

/** Helper to decode an AsyncIterable of bytes to strings. */
async function* decodeAsyncIterable(
  source: AsyncIterable<Uint8Array>,
): AsyncGenerator<string> {
  const decoder = new TextDecoder();
  for await (const chunk of source) {
    yield decoder.decode(chunk, { stream: true });
  }
  // Flush any remaining bytes
  const final = decoder.decode();
  if (final) yield final;
}