-
-
Notifications
You must be signed in to change notification settings - Fork 670
/
Copy pathunicode-identifier.js
42 lines (38 loc) · 1.99 KB
/
unicode-identifier.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// see https://github.com/microsoft/TypeScript/blob/main/scripts/regenerate-unicode-identifier-parts.js
const MAX_UNICODE_CODEPOINT = 0x10FFFF;
const isStart = c => /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u.test(c); // Other_ID_Start explicitly included for back compat - see http://www.unicode.org/reports/tr31/#Introduction
const isPart = c => /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u.test(c) || isStart(c); // Likewise for Other_ID_Continue
const parts = [];
let partsActive = false;
let startsActive = false;
const starts = [];
// Skip 0-9 (48..57), A-Z (65..90), a-z (97..122) - checked otherwise
for (let cp = 123; cp <= MAX_UNICODE_CODEPOINT; cp++) {
if (isStart(String.fromCodePoint(cp)) !== startsActive) {
starts.push(cp - +startsActive);
startsActive = !startsActive;
}
if (isPart(String.fromCodePoint(cp)) !== partsActive) {
parts.push(cp - +partsActive);
partsActive = !partsActive;
}
}
if (startsActive) starts.push(MAX_UNICODE_CODEPOINT);
if (partsActive) parts.push(MAX_UNICODE_CODEPOINT);
function tablify(cps) {
let sb = ["/*\n| from ... to | from ... to | from ... to | from ... to |*/"];
let i = 0;
while (i < cps.length) {
if (!(i % 8)) sb.push("\n ");
sb.push(`${cps[i++].toString().padEnd(6)}, `);
}
return sb.join("") + "\n";
}
console.log(`/** Unicode ${process.versions.unicode} ID_Start/Other_ID_Start ranges */`);
console.log(`const unicodeIdentifierStart: i32[] = [${tablify(starts)}];`);
console.log(`const unicodeIdentifierStartMin = ${starts[0]};`);
console.log(`const unicodeIdentifierStartMax = ${starts[starts.length - 1]};\n`);
console.log(`/** Unicode ${process.versions.unicode} ID_Continue/Other_ID_Continue + ID_Start/Other_ID_Start ranges*/`);
console.log(`const unicodeIdentifierPart: i32[] = [${tablify(parts)}];`);
console.log(`const unicodeIdentifierPartMin = ${parts[0]};`);
console.log(`const unicodeIdentifierPartMax = ${parts[parts.length - 1]};\n`);