regress/charclasses.rs
1use crate::codepointset::Interval;
2
3// Character classes like \d or \S.
4
5/// Construct an interval from an inclusive range of char.
6const fn r(first: char, last: char) -> Interval {
7 Interval {
8 first: first as u32,
9 last: last as u32,
10 }
11}
12
13/// Construct an interval from a single char.
14const fn r1(c: char) -> Interval {
15 Interval {
16 first: c as u32,
17 last: c as u32,
18 }
19}
20
21// Note all of these are sorted.
22
23/// ES9 21.2.2.6.1.
24pub const WORD_CHARS: [Interval; 4] = [r('0', '9'), r('A', 'Z'), r1('_'), r('a', 'z')];
25
26/// ES9 21.2.2.12
27pub const DIGITS: [Interval; 1] = [r('0', '9')];
28
29/// [`ES13 12.2 White Space`][spec]
30///
31/// [spec]: https://262.ecma-international.org/13.0/#prod-WhiteSpace
32pub const WHITESPACE: [Interval; 9] = [
33 // U+0009 - Character Tabulation - <TAB>
34 // U+000B - Line Tabulation - <VT>
35 // U+000C - Form Feed (FF) - <FF>
36 r('\u{0009}', '\u{000C}'),
37 // From unicode “Space_Separator” (`Zs`) category:
38 //
39 // U+0020 - Space - <SP>
40 r1('\u{0020}'),
41 // From unicode “Space_Separator” (`Zs`) category:
42 //
43 // U+00A0 - No-Break Space - <NBSP>
44 r1('\u{00A0}'),
45 // From unicode “Space_Separator” (`Zs`) category:
46 //
47 // U+1680 - Ogham Space Mark
48 r1('\u{1680}'),
49 // From unicode “Space_Separator” (`Zs`) category:
50 //
51 // U+2000 - En Quad
52 // U+2001 - Em Quad
53 // U+2002 - En Space
54 // U+2003 - Em Space
55 // U+2004 - Three-Per-Em Space
56 // U+2005 - Four-Per-Em Space
57 // U+2006 - Six-Per-Em Space
58 // U+2007 - Figure Space
59 // U+2008 - Punctuation Space
60 // U+2009 - Thin Space
61 // U+200A - Hair Space
62 r('\u{2000}', '\u{200A}'),
63 // From unicode “Space_Separator” (`Zs`) category:
64 //
65 // U+202F - Narrow No-Break Space - <NNBSP>
66 r1('\u{202F}'),
67 // From unicode “Space_Separator” (`Zs`) category:
68 //
69 // U+205F - Medium Mathematical Space - <MMSP>
70 r1('\u{205F}'),
71 // From unicode “Space_Separator” (`Zs`) category:
72 //
73 // U+3000 - Ideographic Space
74 r1('\u{3000}'),
75 // U+FEFF - ZERO WIDTH NO-BREAK SPACE - <ZWNBSP>
76 r1('\u{FEFF}'),
77];
78
79/// ES9 11.3
80pub const LINE_TERMINATOR: [Interval; 3] =
81 [r1('\u{000A}'), r1('\u{000D}'), r('\u{2028}', '\u{2029}')];