regress/
charclasses.rs

1use crate::codepointset::Interval;
2
3// Character classes like \d or \S.
4
5/// Construct an interval from an inclusive range of char.
6const fn r(first: char, last: char) -> Interval {
7    Interval {
8        first: first as u32,
9        last: last as u32,
10    }
11}
12
13/// Construct an interval from a single char.
14const fn r1(c: char) -> Interval {
15    Interval {
16        first: c as u32,
17        last: c as u32,
18    }
19}
20
21// Note all of these are sorted.
22
23/// ES9 21.2.2.6.1.
24pub const WORD_CHARS: [Interval; 4] = [r('0', '9'), r('A', 'Z'), r1('_'), r('a', 'z')];
25
26/// ES9 21.2.2.12
27pub const DIGITS: [Interval; 1] = [r('0', '9')];
28
29/// [`ES13 12.2 White Space`][spec]
30///
31/// [spec]: https://262.ecma-international.org/13.0/#prod-WhiteSpace
32pub const WHITESPACE: [Interval; 9] = [
33    // U+0009 - Character Tabulation - <TAB>
34    // U+000B - Line Tabulation      - <VT>
35    // U+000C - Form Feed (FF)       - <FF>
36    r('\u{0009}', '\u{000C}'),
37    // From unicode “Space_Separator” (`Zs`) category:
38    //
39    // U+0020 - Space - <SP>
40    r1('\u{0020}'),
41    // From unicode “Space_Separator” (`Zs`) category:
42    //
43    // U+00A0 - No-Break Space - <NBSP>
44    r1('\u{00A0}'),
45    // From unicode “Space_Separator” (`Zs`) category:
46    //
47    // U+1680 - Ogham Space Mark
48    r1('\u{1680}'),
49    // From unicode “Space_Separator” (`Zs`) category:
50    //
51    // U+2000 - En Quad
52    // U+2001 - Em Quad
53    // U+2002 - En Space
54    // U+2003 - Em Space
55    // U+2004 - Three-Per-Em Space
56    // U+2005 - Four-Per-Em Space
57    // U+2006 - Six-Per-Em Space
58    // U+2007 - Figure Space
59    // U+2008 - Punctuation Space
60    // U+2009 - Thin Space
61    // U+200A - Hair Space
62    r('\u{2000}', '\u{200A}'),
63    // From unicode “Space_Separator” (`Zs`) category:
64    //
65    // U+202F - Narrow No-Break Space - <NNBSP>
66    r1('\u{202F}'),
67    // From unicode “Space_Separator” (`Zs`) category:
68    //
69    // U+205F - Medium Mathematical Space - <MMSP>
70    r1('\u{205F}'),
71    // From unicode “Space_Separator” (`Zs`) category:
72    //
73    // U+3000 - Ideographic Space
74    r1('\u{3000}'),
75    // U+FEFF - ZERO WIDTH NO-BREAK SPACE - <ZWNBSP>
76    r1('\u{FEFF}'),
77];
78
79/// ES9 11.3
80pub const LINE_TERMINATOR: [Interval; 3] =
81    [r1('\u{000A}'), r1('\u{000D}'), r('\u{2028}', '\u{2029}')];