All files / text / levenshtein_distance.ts

100.00% Branches 15/15
100.00% Lines 98/98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
 
 
x15
 
 
 
x15
 
x127
x127
x127
x127
x431
x431
x127
x127
x127
x127
x127
x636
x636
x636
x636
x636
x636
 
 
x636
x636
x636
x636
x636
x127
x431
x431
x127
x127
 
x24
x24
x24
 
x24
x24
 
x24
x24
x24
x24
x312
x312
x24
x24
x24
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x24
x312
x312
x24
x24
x24
x43
x43
x24
x24
x24
x24
x24
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x338
x24
x43
x43
x24
x24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x15
x483
x483
 
x161
x1048
x262
x161
x186
x186
x161
x161


































































































































// Copyright 2018-2025 the Deno authors. MIT license.
// This module is browser compatible.
const { ceil } = Math;

// This implements Myers' bit-vector algorithm as described here:
// https://dl.acm.org/doi/pdf/10.1145/316542.316550
const peq = new Uint32Array(0x110000);

function myers32(t: string[], p: string[]): number {
  const n = t.length;
  const m = p.length;
  for (let i = 0; i < m; i++) {
    peq[p[i]!.codePointAt(0)!]! |= 1 << i;
  }
  const last = m - 1;
  let pv = -1;
  let mv = 0;
  let score = m;
  for (let j = 0; j < n; j++) {
    const eq = peq[t[j]!.codePointAt(0)!]!;
    const xv = eq | mv;
    const xh = (((eq & pv) + pv) ^ pv) | eq;
    let ph = mv | ~(xh | pv);
    let mh = pv & xh;
    score += ((ph >>> last) & 1) - ((mh >>> last) & 1);
    // Set the horizontal delta in the first row to +1
    // because we are computing the distance between two full strings.
    ph = (ph << 1) | 1;
    mh = mh << 1;
    pv = mh | ~(xv | ph);
    mv = ph & xv;
  }
  for (let i = 0; i < m; i++) {
    peq[p[i]!.codePointAt(0)!] = 0;
  }
  return score;
}

function myersX(t: string[], p: string[]): number {
  const n = t.length;
  const m = p.length;
  // Initialize the horizontal deltas to +1.
  const h = new Int8Array(n).fill(1);
  const bmax = ceil(m / 32) - 1;
  // Process the blocks row by row so that we can use the fixed-size peq array.
  for (let b = 0; b < bmax; b++) {
    const start = b * 32;
    const end = (b + 1) * 32;
    for (let i = start; i < end; i++) {
      peq[p[i]!.codePointAt(0)!]! |= 1 << i;
    }
    let pv = -1;
    let mv = 0;
    for (let j = 0; j < n; j++) {
      const hin = h[j]!;
      let eq = peq[t[j]!.codePointAt(0)!]!;
      const xv = eq | mv;
      eq |= hin >>> 31;
      const xh = (((eq & pv) + pv) ^ pv) | eq;
      let ph = mv | ~(xh | pv);
      let mh = pv & xh;
      h[j] = (ph >>> 31) - (mh >>> 31);
      ph = (ph << 1) | (-hin >>> 31);
      mh = (mh << 1) | (hin >>> 31);
      pv = mh | ~(xv | ph);
      mv = ph & xv;
    }
    for (let i = start; i < end; i++) {
      peq[p[i]!.codePointAt(0)!] = 0;
    }
  }
  const start = bmax * 32;
  for (let i = start; i < m; i++) {
    peq[p[i]!.codePointAt(0)!]! |= 1 << i;
  }
  const last = m - 1;
  let pv = -1;
  let mv = 0;
  let score = m;
  for (let j = 0; j < n; j++) {
    const hin = h[j]!;
    let eq = peq[t[j]!.codePointAt(0)!]!;
    const xv = eq | mv;
    eq |= hin >>> 31;
    const xh = (((eq & pv) + pv) ^ pv) | eq;
    let ph = mv | ~(xh | pv);
    let mh = pv & xh;
    score += ((ph >>> last) & 1) - ((mh >>> last) & 1);
    ph = (ph << 1) | (-hin >>> 31);
    mh = (mh << 1) | (hin >>> 31);
    pv = mh | ~(xv | ph);
    mv = ph & xv;
  }
  for (let i = start; i < m; i++) {
    peq[p[i]!.codePointAt(0)!] = 0;
  }
  return score;
}

/**
 * Calculates the
 * {@link https://en.wikipedia.org/wiki/Levenshtein_distance | Levenshtein distance}
 * between two strings.
 *
 * > [!NOTE]
 * > The complexity of this function is O(m * n), where m and n are the lengths
 * > of the two strings. It's recommended to limit the length and validate input
 * > if arbitrarily accepting input.
 *
 * @example Usage
 * ```ts
 * import { levenshteinDistance } from "@std/text/levenshtein-distance";
 * import { assertEquals } from "@std/assert";
 *
 * assertEquals(levenshteinDistance("aa", "bb"), 2);
 * ```
 * @param str1 The first string.
 * @param str2 The second string.
 * @returns The Levenshtein distance between the two strings.
 */
export function levenshteinDistance(str1: string, str2: string): number {
  let t = [...str1];
  let p = [...str2];

  if (t.length < p.length) {
    [p, t] = [t, p];
  }
  if (p.length === 0) {
    return t.length;
  }
  return p.length <= 32 ? myers32(t, p) : myersX(t, p);
}