All files / cli / unicode_width.ts

100.00% Branches 15/15
100.00% Lines 29/29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
 
 
 
 
x9
x3
 
x3
x233
x233
x233
x233
x233
 
x233
 
x233
x233
 
x3
x702
x702
 
x976
x976
 
x702
x742
x702
x1166
x936
x940
x940
 
x976
x976
x702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x3
x72
x24













































































// Copyright 2018-2025 the Deno authors. MIT license.
// This module is browser compatible.
// Ported from unicode_width rust crate, Copyright (c) 2015 The Rust Project Developers. MIT license.

import data from "./_data.json" with { type: "json" };
import { runLengthDecode } from "./_run_length.ts";

let tables: Uint8Array[] | null = null;
function lookupWidth(cp: number) {
  if (!tables) tables = data.tables.map(runLengthDecode);
  const t1Offset = tables[0]![(cp >> 13) & 0xff]!;
  const t2Offset = tables[1]![128 * t1Offset + ((cp >> 6) & 0x7f)]!;
  const packedWidths = tables[2]![16 * t2Offset + ((cp >> 2) & 0xf)]!;

  const width = (packedWidths >> (2 * (cp & 0b11))) & 0b11;

  return width === 3 ? 1 : width;
}

const cache = new Map<string, number | null>();
function charWidth(char: string) {
  if (cache.has(char)) return cache.get(char)!;

  const codePoint = char.codePointAt(0)!;
  let width: number | null = null;

  if (codePoint < 0x7f) {
    width = codePoint >= 0x20 ? 1 : codePoint === 0 ? 0 : null;
  } else if (codePoint >= 0xa0) {
    width = lookupWidth(codePoint);
  } else {
    width = null;
  }

  cache.set(char, width);
  return width;
}

/**
 * Calculate the physical width of a string in a TTY-like environment. This is
 * useful for cases such as calculating where a line-wrap will occur and
 * underlining strings.
 *
 * The physical width is given by the number of columns required to display
 * the string. The number of columns a given unicode character occupies can
 * vary depending on the character itself.
 *
 * @param str The string to measure.
 * @returns The unicode width of the string.
 *
 * @example Calculating the unicode width of a string
 * ```ts
 * import { unicodeWidth } from "@std/cli/unicode-width";
 * import { assertEquals } from "@std/assert";
 *
 * assertEquals(unicodeWidth("hello world"), 11);
 * assertEquals(unicodeWidth("天地玄黃宇宙洪荒"), 16);
 * assertEquals(unicodeWidth("fullwidth"), 18);
 * ```
 *
 * @example Calculating the unicode width of a color-encoded string
 * ```ts
 * import { unicodeWidth } from "@std/cli/unicode-width";
 * import { stripAnsiCode } from "@std/fmt/colors";
 * import { assertEquals } from "@std/assert";
 *
 * assertEquals(unicodeWidth(stripAnsiCode("\x1b[36mголубой\x1b[39m")), 7);
 * assertEquals(unicodeWidth(stripAnsiCode("\x1b[31m紅色\x1b[39m")), 4);
 * assertEquals(unicodeWidth(stripAnsiCode("\x1B]8;;https://deno.land\x07🦕\x1B]8;;\x07")), 2);
 * ```
 *
 * Use
 * {@linkcode https://jsr.io/@std/fmt/doc/colors/~/stripAnsiCode | stripAnsiCode}
 * to remove ANSI escape codes from a string before passing it to
 * {@linkcode unicodeWidth}.
 */
export function unicodeWidth(str: string): number {
  return [...str].map((ch) => charWidth(ch) ?? 0).reduce((a, b) => a + b, 0);
}