unicode-rs
diff --git a/‎.github/workflows/rust.yml‎
Lines changed: 5 additions & 4 deletions b/‎.github/workflows/rust.yml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 0 additions & 3 deletions b/‎Cargo.toml‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎scripts/unicode.py‎
Lines changed: 23 additions & 12 deletions b/‎scripts/unicode.py‎
Lines changed: 23 additions & 12 deletions
@@ -17,7 +17,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Build
       run: cargo build --verbose
     - name: Run tests
@@ -28,14 +28,15 @@ jobs:
       run: cargo fmt --check
     - name: Check clippy
       run: cargo clippy --lib --tests
+
   regen:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: actions/setup-python@v5
       with:
         python-version: '3.12'
     - name: Regen
-      run: cd scripts && python3 unicode.py
+      run: rm tests/NormalizationTest.txt && cd scripts && python3 unicode.py
     - name: Diff
-      run: diff src/tables.rs scripts/tables.rs
+      run: git update-index --refresh && git diff-index --quiet HEAD --
@@ -31,9 +31,6 @@ std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
 core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
 compiler_builtins = { version = "0.1", optional = true }
 
-[dev-dependencies]
-unicode-normalization = "0.1.23"
-
 [features]
 default = []
 rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
 
@@ -27,9 +27,13 @@
 import os
 import re
 import sys
+import urllib.request
 from collections import defaultdict
 from itertools import batched
 
+UNICODE_VERSION = "15.1.0"
+"""The version of the Unicode data files to download."""
+
 NUM_CODEPOINTS = 0x110000
 """An upper bound for which `range(0, NUM_CODEPOINTS)` contains Unicode's codespace."""
 
@@ -61,24 +65,28 @@ class OffsetType(enum.IntEnum):
 
 If this is edited, you must ensure that `emit_module` reflects your changes."""
 
-MODULE_FILENAME = "tables.rs"
-"""The filename of the emitted Rust module (will be created in the working directory)"""
+MODULE_PATH = "../src/tables.rs"
+"""The path of the emitted Rust module (relative to the working directory)"""
 
 Codepoint = int
 BitPos = int
 
 
-def fetch_open(filename: str):
+def fetch_open(filename: str, local_prefix: str = ""):
     """Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
-    fetches it from `http://www.unicode.org/Public/UNIDATA/`. Exits with code 1 on failure.
+    fetches it from `https://www.unicode.org/Public/`. Exits with code 1 on failure.
     """
     basename = os.path.basename(filename)
-    if not os.path.exists(basename):
-        os.system(f"curl -O http://www.unicode.org/Public/UNIDATA/{filename}")
+    localname = os.path.join(local_prefix, basename)
+    if not os.path.exists(localname):
+        urllib.request.urlretrieve(
+            f"https://www.unicode.org/Public/{UNICODE_VERSION}/ucd/{filename}",
+            localname,
+        )
     try:
-        return open(basename, encoding="utf-8")
+        return open(localname, encoding="utf-8")
     except OSError:
-        sys.stderr.write(f"cannot load {basename}")
+        sys.stderr.write(f"cannot load {localname}")
         sys.exit(1)
 
 
@@ -637,7 +645,7 @@ def emit_module(
         module.write("}\n")
 
 
-def main(module_filename: str):
+def main(module_path: str):
     """Obtain character data from the latest version of Unicode, transform it into a multi-level
     lookup table for character width, and write a Rust module utilizing that table to
     `module_filename`.
@@ -677,6 +685,9 @@ def main(module_filename: str):
     emoji_variations = load_variation_sequences()
     variation_table = make_variation_sequence_table(emoji_variations, width_map)
 
+    # Download normalization test file for use by tests
+    fetch_open("NormalizationTest.txt", "../tests/")
+
     print("------------------------")
     total_size = 0
     for i, table in enumerate(tables):
@@ -692,9 +703,9 @@ def main(module_filename: str):
     print("------------------------")
     print(f"  Total size: {total_size} bytes")
 
-    emit_module(module_filename, version, tables, variation_table)
-    print(f'Wrote to "{module_filename}"')
+    emit_module(module_path, version, tables, variation_table)
+    print(f'Wrote to "{module_path}"')
 
 
 if __name__ == "__main__":
-    main(MODULE_FILENAME)
+    main(MODULE_PATH)