@@ -2,6 +2,8 @@ import fs from "fs"
22import type { DOMWindow } from "jsdom"
33import { JSDOM } from "jsdom"
44import { ESLint } from "eslint"
5+ import { getLatestUnicodeGeneralCategoryValues } from "./get-latest-unicode-general-category-values"
6+ import { getLatestUnicodeScriptValues } from "./get-latest-unicode-script-values"
57
68const DATA_SOURCES = [
79 {
@@ -43,8 +45,8 @@ const DATA_SOURCES = [
4345 url : "https://tc39.es/ecma262/multipage/text-processing.html" ,
4446 version : 2023 ,
4547 binProperties : "#table-binary-unicode-properties" ,
46- gcValues : "#table-unicode-general-category-values" ,
47- scValues : "#table-unicode-script-values" ,
48+ gcValues : getLatestUnicodeGeneralCategoryValues ,
49+ scValues : getLatestUnicodeScriptValues ,
4850 } ,
4951]
5052const FILE_PATH = "src/unicode/properties.ts"
@@ -96,13 +98,21 @@ type Datum = {
9698 } while ( window == null )
9799
98100 logger . log ( "Parsing tables" )
99- datum . binProperties = collectValues (
101+ datum . binProperties = await collectValues (
100102 window ,
101103 binProperties ,
102104 existing . binProperties ,
103105 )
104- datum . gcValues = collectValues ( window , gcValues , existing . gcValues )
105- datum . scValues = collectValues ( window , scValues , existing . scValues )
106+ datum . gcValues = await collectValues (
107+ window ,
108+ gcValues ,
109+ existing . gcValues ,
110+ )
111+ datum . scValues = await collectValues (
112+ window ,
113+ scValues ,
114+ existing . scValues ,
115+ )
106116
107117 logger . log ( "Done" )
108118 }
@@ -169,32 +179,55 @@ export function isValidLoneUnicodeProperty(version: number, value: string): bool
169179 process . exitCode = 1
170180} )
171181
172- function collectValues (
182+ async function collectValues (
173183 window : DOMWindow ,
174- id : string ,
184+ idSelectorOrProvider : string | ( ( ) => AsyncIterable < string > ) ,
175185 existingSet : Set < string > ,
176- ) : string [ ] {
177- const selector = `${ id } td:nth-child(1) code`
178- const nodes = window . document . querySelectorAll ( selector )
179- const values = Array . from ( nodes , ( node ) => node . textContent ?? "" )
180- . filter ( ( value ) => {
181- if ( existingSet . has ( value ) ) {
182- return false
183- }
184- existingSet . add ( value )
185- return true
186- } )
187- . sort ( undefined )
186+ ) : Promise < string [ ] > {
187+ const getValues =
188+ typeof idSelectorOrProvider === "function"
189+ ? idSelectorOrProvider
190+ : function * ( ) : Iterable < string > {
191+ const selector = `${ idSelectorOrProvider } td:nth-child(1) code`
192+ const nodes = window . document . querySelectorAll ( selector )
193+ if ( nodes . length === 0 ) {
194+ throw new Error ( `No nodes found for selector ${ selector } ` )
195+ }
196+ logger . log (
197+ "%o nodes of %o were found." ,
198+ nodes . length ,
199+ selector ,
200+ )
201+ for ( const node of Array . from ( nodes ) ) {
202+ yield node . textContent ?? ""
203+ }
204+ }
205+
206+ const missing = new Set ( existingSet )
207+ const values = new Set < string > ( )
208+ let allCount = 0
209+
210+ for await ( const value of getValues ( ) ) {
211+ allCount ++
212+ missing . delete ( value )
213+ if ( existingSet . has ( value ) ) {
214+ continue
215+ }
216+ existingSet . add ( value )
217+ values . add ( value )
218+ }
219+
220+ if ( missing . size > 0 ) {
221+ throw new Error ( `Missing values: ${ Array . from ( missing ) . join ( ", " ) } ` )
222+ }
188223
189224 logger . log (
190- "%o nodes of %o were found, then %o adopted and %o ignored as duplication." ,
191- nodes . length ,
192- selector ,
193- values . length ,
194- nodes . length - values . length ,
225+ "%o adopted and %o ignored as duplication." ,
226+ values . size ,
227+ allCount - values . size ,
195228 )
196229
197- return values
230+ return [ ... values ] . sort ( ( a , b ) => ( a > b ? 1 : a < b ? - 1 : 0 ) )
198231}
199232
200233function makeClassDeclarationCode ( versions : string [ ] ) : string {
0 commit comments