All files / text / unstable_slugify.ts

100.00% Branches 12/12
100.00% Lines 29/29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
 
 
 
x21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x7
 
 
 
 
 
 
 
 
 
 
 
x7
 
 
 
 
 
 
 
 
 
 
 
x7
 
 
 
 
 
 
 
 
 
 
 
x7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
x7
x7
x7
 
 
x57
x57
 
x57
x57
x57
 
x57
x392
x842
x723
x939
x939
x392
 
x57
x57
x57
x57
x57
x57
x57
x57
x57




























































































































// Copyright 2018-2025 the Deno authors. MIT license.
// This module is browser compatible.

const wordSegmenter = new Intl.Segmenter("en-US", { granularity: "word" });

/** Options for {@linkcode slugify}. */
export type SlugifyOptions = {
  /**
   * The regular expression to use for stripping characters.
   * @default {typeof NON_WORD}
   */
  strip: RegExp;
  /**
   * The transliteration function to use for converting non-Latin text.
   * Called on each word in the input before joining them with dashes.
   * @default {undefined}
   */
  transliterate: ((word: string) => string) | undefined;
};

/**
 * A regular expression for stripping non-word characters from slugs.
 *
 * @example Usage
 * ```ts
 * import { NON_WORD, slugify } from "@std/text/unstable-slugify";
 * import { assertEquals } from "@std/assert";
 * assertEquals(slugify("déjà-vu", { strip: NON_WORD }), "déjà-vu");
 * assertEquals(slugify("Συστημάτων Γραφής", { strip: NON_WORD }), "συστημάτων-γραφής");
 * ```
 */
export const NON_WORD = /[^\p{L}\p{M}\p{N}\-]+/gu;
/**
 * A regular expression for stripping diacritics from slugs.
 *
 * @example Usage
 * ```ts
 * import { DIACRITICS, slugify } from "@std/text/unstable-slugify";
 * import { assertEquals } from "@std/assert";
 * assertEquals(slugify("déjà-vu", { strip: DIACRITICS }), "deja-vu");
 * assertEquals(slugify("Συστημάτων Γραφής", { strip: DIACRITICS }), "συστηματων-γραφης");
 * ```
 */
export const DIACRITICS = /[^\p{L}\p{N}\-]+/gu;
/**
 * A regular expression for stripping ASCII diacritics (but not other diacritics) from slugs.
 *
 * @example Usage
 * ```ts
 * import { ASCII_DIACRITICS, slugify } from "@std/text/unstable-slugify";
 * import { assertEquals } from "@std/assert";
 * assertEquals(slugify("déjà-vu", { strip: ASCII_DIACRITICS }), "deja-vu");
 * assertEquals(slugify("Συστημάτων Γραφής", { strip: ASCII_DIACRITICS }), "συστημάτων-γραφής");
 * ```
 */
export const ASCII_DIACRITICS = /(?<=[a-zA-Z])\p{M}+|[^\p{L}\p{M}\p{N}\-]+/gu;
/**
 * A regular expression for stripping non-ASCII characters from slugs.
 *
 * @example Usage
 * ```ts
 * import { NON_ASCII, slugify } from "@std/text/unstable-slugify";
 * import { assertEquals } from "@std/assert";
 * assertEquals(slugify("déjà-vu", { strip: NON_ASCII }), "deja-vu");
 * assertEquals(slugify("Συστημάτων Γραφής", { strip: NON_ASCII }), "-");
 * ```
 */
export const NON_ASCII = /[^0-9a-zA-Z\-]/g;

/**
 * Converts a string into a {@link https://en.wikipedia.org/wiki/Clean_URL#Slug | slug}.
 *
 * @experimental **UNSTABLE**: New API, yet to be vetted.
 *
 * @param input The string that is going to be converted into a slug
 * @param options The options for the slugify function
 * @returns The string as a slug
 *
 * @example Basic usage
 * ```ts
 * import { slugify } from "@std/text/unstable-slugify";
 * import { assertEquals } from "@std/assert";
 *
 * assertEquals(slugify("Hello, world!"), "hello-world");
 * assertEquals(slugify("Συστημάτων Γραφής"), "συστημάτων-γραφής");
 * ```
 *
 * @example With transliteration using a third-party library
 * ```ts no-assert
 * import { NON_ASCII, slugify } from "@std/text/unstable-slugify";
 * // example third-party transliteration library
 * import transliterate from "npm:any-ascii";
 *
 * slugify("Συστημάτων Γραφής", { transliterate, strip: NON_ASCII });
 * // => "sistimaton-grafis"
 * ```
 */
export function slugify(
  input: string,
  options?: Partial<SlugifyOptions>,
): string {
  // clone with `new RegExp` in case `lastIndex` isn't zeroed
  const stripRe = new RegExp(options?.strip ?? NON_WORD);
  const words: string[] = [];

  for (
    const s of wordSegmenter.segment(
      input.trim().normalize("NFD").toLowerCase(),
    )
  ) {
    if (s.isWordLike) {
      words.push(s.segment);
    } else if (s.segment.length) {
      words.push("-");
    }
  }

  return words
    .map(options?.transliterate ?? ((x) => x))
    .join(options?.transliterate ? "-" : "")
    .replaceAll(stripRe, "")
    .normalize("NFC")
    .replaceAll(/-{2,}/g, "-")
    .replaceAll(/^-|-$/g, "") ||
    "-";
}