All files / xml / parse_stream.ts

100.00% Branches 5/5
100.00% Lines 31/31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
 
 
 
 
 
 
 
 
 
 
x4
x4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x4
x4
x4
x4
 
x45
x135
x45
 
x45
x139
x139
x72
x72
x45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x4
x4
x4
x4
 
x11
x11
x11
 
 
x4
x4
 
x11
x11
x84
x28
 
x11
x11
x11





























































































































// Copyright 2018-2026 the Deno authors. MIT license.
// This module is browser compatible.

/**
 * Streaming XML parser with callback-based API for maximum throughput.
 *
 * @module
 */

import type { ParseStreamOptions, XmlEventCallbacks } from "./types.ts";
import { XmlTokenizer } from "./_tokenizer.ts";
import { XmlEventParser } from "./_parser.ts";

export type { ParseStreamOptions, XmlEventCallbacks } from "./types.ts";

/**
 * Parse XML from a stream with maximum throughput using direct callbacks.
 *
 * This function provides the highest performance streaming XML parsing by
 * invoking callbacks directly without creating intermediate event objects.
 * Use this when you need maximum throughput and are comfortable with the
 * callback-based API.
 *
 * @example Collecting data from elements
 * ```ts
 * import { parseXmlStream } from "@std/xml/parse-stream";
 * import { assertEquals } from "@std/assert";
 *
 * const xml = `<root><item id="1">First</item><item id="2">Second</item></root>`;
 * const stream = ReadableStream.from([xml]);
 *
 * const items: string[] = [];
 * let currentText = "";
 * let inItem = false;
 *
 * await parseXmlStream(stream, {
 *   onStartElement(name) {
 *     if (name === "item") {
 *       inItem = true;
 *       currentText = "";
 *     }
 *   },
 *   onText(text) {
 *     if (inItem) currentText += text;
 *   },
 *   onEndElement(name) {
 *     if (name === "item") {
 *       items.push(currentText);
 *       inItem = false;
 *     }
 *   },
 * });
 *
 * assertEquals(items, ["First", "Second"]);
 * ```
 *
 * @param source The XML text stream to parse. Can be any AsyncIterable that
 *               yields string chunks, including ReadableStream.
 * @param callbacks Callback functions invoked for each XML event. All callbacks
 *                  are optional - only provide the ones you need.
 * @param options Parsing options.
 * @returns A promise that resolves when parsing is complete.
 */
export async function parseXmlStream(
  source: AsyncIterable<string>,
  callbacks: XmlEventCallbacks,
  options: ParseStreamOptions = {},
): Promise<void> {
  const trackPosition = options.trackPosition ?? false;
  const tokenizer = new XmlTokenizer({ trackPosition });
  const parser = new XmlEventParser(callbacks, options);

  for await (const chunk of source) {
    tokenizer.process(chunk, parser);
  }
  tokenizer.finalize(parser);
  parser.finalize();
}

/**
 * Parse XML from a byte stream with maximum throughput using direct callbacks.
 *
 * This is a convenience wrapper around {@linkcode parseXmlStream} that handles
 * text decoding. For pre-decoded text streams, use `parseXmlStream` directly.
 *
 * @example Basic usage
 * ```ts
 * import { parseXmlStreamFromBytes } from "@std/xml/parse-stream";
 * import { assertEquals } from "@std/assert";
 *
 * const xml = new TextEncoder().encode("<root>Hello</root>");
 * const stream = ReadableStream.from([xml]);
 *
 * let text = "";
 * await parseXmlStreamFromBytes(stream, {
 *   onText(t) { text += t; },
 * });
 *
 * assertEquals(text, "Hello");
 * ```
 *
 * @param source The XML byte stream to parse.
 * @param callbacks Callback functions invoked for each XML event.
 * @param options Parsing options.
 * @returns A promise that resolves when parsing is complete.
 */
export function parseXmlStreamFromBytes(
  source: AsyncIterable<Uint8Array>,
  callbacks: XmlEventCallbacks,
  options: ParseStreamOptions = {},
): Promise<void> {
  const textStream = decodeAsyncIterable(source);
  return parseXmlStream(textStream, callbacks, options);
}

/** Helper to decode an AsyncIterable of bytes to strings. */
async function* decodeAsyncIterable(
  source: AsyncIterable<Uint8Array>,
): AsyncGenerator<string> {
  const decoder = new TextDecoder();
  for await (const chunk of source) {
    yield decoder.decode(chunk, { stream: true });
  }
  // Flush any remaining bytes
  const final = decoder.decode();
  if (final) yield final;
}