1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122 |
 
 
 
 
 
 
x6
x24
x24
x24
x24
x18
x6
 
x6
x84
x24
x18
x6
 
x6
 
x18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x6
x274
x274
 
 
 
 
x6
x6
x6
 
x6
 
x6
x6
 
x6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x6
x6
x6
 
x112
 
x28
 
x28
x33
x33
x33
x33
x33
x33
x33
 
 
x33
x33
 
x28
x28
x28
x28
x28
 
x16
x16
 
x16
x16 |
|
// Copyright 2018-2025 the Deno authors. MIT license.
// This module is browser compatible.
/** Object structure for a list of HTML entities. */
export type EntityList = Record<string, string>;
const rawToEntityEntries = [
["&", "&"],
["<", "<"],
[">", ">"],
['"', """],
["'", "'"],
] as const;
const defaultEntityList: EntityList = Object.fromEntries([
...rawToEntityEntries.map(([raw, entity]) => [entity, raw]),
["'", "'"],
[" ", "\xa0"],
]);
const rawToEntity = new Map<string, string>(rawToEntityEntries);
const rawRe = new RegExp(`[${[...rawToEntity.keys()].join("")}]`, "g");
/**
* Escapes text for safe interpolation into HTML text content and quoted attributes.
*
* @example Usage
* ```ts
* import { escape } from "@std/html/entities";
* import { assertEquals } from "@std/assert";
*
* assertEquals(escape("<>'&AA"), "<>'&AA");
*
* // Characters that don't need to be escaped will be left alone,
* // even if named HTML entities exist for them.
* assertEquals(escape("þð"), "þð");
* ```
*
* @param str The string to escape.
* @returns The escaped string.
*/
export function escape(str: string): string {
return str.replaceAll(rawRe, (m) => rawToEntity.get(m)!);
}
/** Options for {@linkcode unescape}. */
export type UnescapeOptions = { entityList: EntityList };
const defaultUnescapeOptions: UnescapeOptions = {
entityList: defaultEntityList,
};
const MAX_CODE_POINT = 0x10ffff;
const RX_DEC_ENTITY = /&#([0-9]+);/g;
const RX_HEX_ENTITY = /&#x(\p{AHex}+);/gu;
const entityListRegexCache = new WeakMap<EntityList, RegExp>();
/**
* Unescapes HTML entities in text.
*
* Default options only handle `&<>'"` and numeric entities.
*
* @example Basic usage
* ```ts
* import { unescape } from "@std/html/entities";
* import { assertEquals } from "@std/assert";
*
* assertEquals(unescape("<>'&AA"), "<>'&AA");
* assertEquals(unescape("þð"), "þð");
* ```
*
* @example Using a custom entity list
*
* This uses the full named entity list from the HTML spec (~47K un-minified)
*
* ```ts
* import { unescape } from "@std/html/entities";
* import entityList from "@std/html/named-entity-list.json" with { type: "json" };
* import { assertEquals } from "@std/assert";
*
* assertEquals(unescape("<>'&AA", { entityList }), "<>'&AA");
* ```
*
* @param str The string to unescape.
* @param options Options for unescaping.
* @returns The unescaped string.
*/
export function unescape(
str: string,
options: Partial<UnescapeOptions> = {},
): string {
const { entityList } = { ...defaultUnescapeOptions, ...options };
let entityRe = entityListRegexCache.get(entityList);
if (!entityRe) {
entityRe = new RegExp(
`(${
Object.keys(entityList)
.sort((a, b) => b.length - a.length)
.join("|")
})`,
"g",
);
entityListRegexCache.set(entityList, entityRe);
}
return str
.replaceAll(entityRe, (m) => entityList[m]!)
.replaceAll(RX_DEC_ENTITY, (_, dec) => codePointStrToChar(dec, 10))
.replaceAll(RX_HEX_ENTITY, (_, hex) => codePointStrToChar(hex, 16));
}
function codePointStrToChar(codePointStr: string, radix: number) {
const codePoint = parseInt(codePointStr, radix);
return codePoint > MAX_CODE_POINT ? "�" : String.fromCodePoint(codePoint);
}
|