1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122 |
x9
x9
x9
x9
x9
x9
x9
x9
x45
x9
x9
x9
x9
x9
x9
x361
x361
x9
x9
x9
x9
x9
x9
x9
x9
x9
x9
x22
x22
x22
x5
x5
x5
x5
x5
x5
x5
x5
x5
x22
x22
x22
x22
x22
x10
x10
x10
x10 |
|
// Copyright 2018-2026 the Deno authors. MIT license.
// This module is browser compatible.
/** Object structure for a list of HTML entities. */
export type EntityList = Record<string, string>;
const rawToEntityEntries = [
["&", "&"],
["<", "<"],
[">", ">"],
['"', """],
["'", "'"],
] as const;
const defaultEntityList: EntityList = Object.fromEntries([
...rawToEntityEntries.map(([raw, entity]) => [entity, raw]),
["'", "'"],
[" ", "\xa0"],
]);
const rawToEntity = new Map<string, string>(rawToEntityEntries);
const rawRe = new RegExp(`[${[...rawToEntity.keys()].join("")}]`, "g");
/**
* Escapes text for safe interpolation into HTML text content and quoted attributes.
*
* @example Usage
* ```ts
* import { escape } from "@std/html/entities";
* import { assertEquals } from "@std/assert";
*
* assertEquals(escape("<>'&AA"), "<>'&AA");
*
* // Characters that don't need to be escaped will be left alone,
* // even if named HTML entities exist for them.
* assertEquals(escape("þð"), "þð");
* ```
*
* @param str The string to escape.
* @returns The escaped string.
*/
export function escape(str: string): string {
return str.replaceAll(rawRe, (m) => rawToEntity.get(m)!);
}
/** Options for {@linkcode unescape}. */
export type UnescapeOptions = { entityList: EntityList };
const defaultUnescapeOptions: UnescapeOptions = {
entityList: defaultEntityList,
};
const MAX_CODE_POINT = 0x10ffff;
const DEC_ENTITY_REGEXP = /&#([0-9]+);/g;
const HEX_ENTITY_REGEXP = /&#x(\p{AHex}+);/gu;
const entityListRegexCache = new WeakMap<EntityList, RegExp>();
/**
* Unescapes HTML entities in text.
*
* Default options only handle `&<>'"` and numeric entities.
*
* @example Basic usage
* ```ts
* import { unescape } from "@std/html/entities";
* import { assertEquals } from "@std/assert";
*
* assertEquals(unescape("<>'&AA"), "<>'&AA");
* assertEquals(unescape("þð"), "þð");
* ```
*
* @example Using a custom entity list
*
* This uses the full named entity list from the HTML spec (~47K un-minified)
*
* ```ts
* import { unescape } from "@std/html/entities";
* import entityList from "@std/html/named-entity-list.json" with { type: "json" };
* import { assertEquals } from "@std/assert";
*
* assertEquals(unescape("<>'&AA", { entityList }), "<>'&AA");
* ```
*
* @param str The string to unescape.
* @param options Options for unescaping.
* @returns The unescaped string.
*/
export function unescape(
str: string,
options: Partial<UnescapeOptions> = {},
): string {
const { entityList } = { ...defaultUnescapeOptions, ...options };
let entityRe = entityListRegexCache.get(entityList);
if (!entityRe) {
entityRe = new RegExp(
`(${
Object.keys(entityList)
.sort((a, b) => b.length - a.length)
.join("|")
})`,
"g",
);
entityListRegexCache.set(entityList, entityRe);
}
return str
.replaceAll(entityRe, (m) => entityList[m]!)
.replaceAll(DEC_ENTITY_REGEXP, (_, dec) => codePointStrToChar(dec, 10))
.replaceAll(HEX_ENTITY_REGEXP, (_, hex) => codePointStrToChar(hex, 16));
}
function codePointStrToChar(codePointStr: string, radix: number) {
const codePoint = parseInt(codePointStr, radix);
return codePoint > MAX_CODE_POINT ? "�" : String.fromCodePoint(codePoint);
}
|