|
| 1 | +// see https://github.com/microsoft/TypeScript/blob/main/scripts/regenerate-unicode-identifier-parts.js |
| 2 | + |
| 3 | +const MAX_UNICODE_CODEPOINT = 0x10FFFF; |
| 4 | +const isStart = c => /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u.test(c); // Other_ID_Start explicitly included for back compat - see http://www.unicode.org/reports/tr31/#Introduction |
| 5 | +const isPart = c => /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u.test(c) || isStart(c); // Likewise for Other_ID_Continue |
| 6 | +const parts = []; |
| 7 | +let partsActive = false; |
| 8 | +let startsActive = false; |
| 9 | +const starts = []; |
| 10 | + |
| 11 | +// Skip 0-9 (48..57), A-Z (65..90), a-z (97..122) - checked otherwise |
| 12 | +for (let cp = 123; cp <= MAX_UNICODE_CODEPOINT; cp++) { |
| 13 | + if (isStart(String.fromCodePoint(cp)) !== startsActive) { |
| 14 | + starts.push(cp - +startsActive); |
| 15 | + startsActive = !startsActive; |
| 16 | + } |
| 17 | + if (isPart(String.fromCodePoint(cp)) !== partsActive) { |
| 18 | + parts.push(cp - +partsActive); |
| 19 | + partsActive = !partsActive; |
| 20 | + } |
| 21 | +} |
| 22 | +if (startsActive) starts.push(MAX_UNICODE_CODEPOINT); |
| 23 | +if (partsActive) parts.push(MAX_UNICODE_CODEPOINT); |
| 24 | + |
| 25 | +function tablify(cps) { |
| 26 | + let sb = ["/*\n| from ... to | from ... to | from ... to | from ... to |*/"]; |
| 27 | + let i = 0; |
| 28 | + while (i < cps.length) { |
| 29 | + if (!(i % 8)) sb.push("\n "); |
| 30 | + sb.push(`${cps[i++].toString().padEnd(6)}, `); |
| 31 | + } |
| 32 | + return sb.join("") + "\n"; |
| 33 | +} |
| 34 | + |
| 35 | +console.log(`/** Unicode ${process.versions.unicode} ID_Start/Other_ID_Start ranges */`); |
| 36 | +console.log(`const unicodeIdentifierStart: i32[] = [${tablify(starts)}];`); |
| 37 | +console.log(`const unicodeIdentifierStartMin = ${starts[0]};`); |
| 38 | +console.log(`const unicodeIdentifierStartMax = ${starts[starts.length - 1]};\n`); |
| 39 | +console.log(`/** Unicode ${process.versions.unicode} ID_Continue/Other_ID_Continue + ID_Start/Other_ID_Start ranges*/`); |
| 40 | +console.log(`const unicodeIdentifierPart: i32[] = [${tablify(parts)}];`); |
| 41 | +console.log(`const unicodeIdentifierPartMin = ${parts[0]};`); |
| 42 | +console.log(`const unicodeIdentifierPartMax = ${parts[parts.length - 1]};\n`); |
0 commit comments