Skip to content

Commit 78fb4a3

Browse files
committed
ci: stabilize schema verification, normalize schemas, and reduce noise
1 parent 1ac6742 commit 78fb4a3

File tree

3 files changed

+191
-83
lines changed

3 files changed

+191
-83
lines changed

.github/workflows/tidy3d-python-client-tests.yml

Lines changed: 53 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ jobs:
8989
echo "Draft: $DRAFT_STATE"
9090
echo "Review State: $REVIEW_STATE"
9191
echo "Git REF: $REF"
92-
echo "Input local: $INPUT_FAST"
93-
echo "Input remote: $INPUT_FULL"
92+
echo "Input local: $INPUT_LOCAL"
93+
echo "Input remote: $INPUT_REMOTE"
9494
echo "Approved: $APPROVED"
9595
9696
remote_tests=false
@@ -177,7 +177,7 @@ jobs:
177177
with:
178178
ref: ${{ github.event.pull_request.head.ref }}
179179
repository: ${{ github.event.pull_request.head.repo.full_name }}
180-
fetch-depth: '0'
180+
fetch-depth: 0
181181

182182
- name: git-config
183183
run: |
@@ -200,56 +200,68 @@ jobs:
200200
201201
- name: verify-committed-schema
202202
run: |
203-
echo "Regenerating schema to check if committed files are up-to-date..."
203+
set -euo pipefail
204+
echo "Regenerating docs-free canonical schemas into repo schemas/ ..."
204205
source $GITHUB_WORKSPACE/.venv/bin/activate
205206
python $GITHUB_WORKSPACE/scripts/regenerate_schema.py
206-
207-
cd $GITHUB_WORKSPACE
208-
echo "Checking for differences with HEAD commit..."
209-
git status
210-
git log
211-
git diff HEAD --name-status --exit-code -- $GITHUB_WORKSPACE/schemas
212-
echo "✅ Committed schema is up-to-date."
207+
208+
echo "Verifying committed schemas match generated output..."
209+
if ! git diff --name-status --exit-code -- schemas; then
210+
echo "❌ Committed schemas are not up-to-date. See diff above."
211+
exit 1
212+
fi
213+
echo "✅ Committed schemas are up-to-date."
213214
214215
- name: run-schema-diff
215216
id: schema-diff
216217
run: |
217-
set -e
218-
# Use git diff to compare the two directories and get a list of changes
219-
# The command exits with 0 if no changes, 1 if changes are found.
220-
# We use '|| true' to prevent the workflow from stopping here on failure.
221-
cd $GITHUB_WORKSPACE
222-
diff_output=$(git diff origin/develop --name-status -- $GITHUB_WORKSPACE/schemas || true)
218+
set -euo pipefail
219+
cd "$GITHUB_WORKSPACE"
223220
224-
# Check if there are any changes
225-
if [ -z "$diff_output" ]; then
226-
echo "✅ Schemas are up-to-date."
227-
echo "changed=false" >> $GITHUB_OUTPUT
221+
# Determine base repo/ref for PRs; default to current repo and 'develop' otherwise
222+
BASE_REPO="${{ github.event.pull_request.base.repo.full_name }}"
223+
BASE_REF="${{ github.event.pull_request.base.ref }}"
224+
if [ -z "$BASE_REPO" ]; then
225+
BASE_REPO="${{ github.repository }}"
226+
fi
227+
if [ -z "$BASE_REF" ]; then
228+
BASE_REF="develop"
229+
fi
230+
231+
echo "Fetching base branch $BASE_REPO@$BASE_REF (shallow)..."
232+
git remote add upstream "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${BASE_REPO}.git" || true
233+
git fetch --no-tags --prune --depth=1 upstream "+refs/heads/${BASE_REF}:refs/remotes/upstream/${BASE_REF}"
234+
235+
# Name-status diff between base and head limited to schemas/
236+
DIFF_OUTPUT=$(git diff --name-status "upstream/${BASE_REF}...HEAD" -- schemas || true)
237+
238+
if [ -z "$DIFF_OUTPUT" ]; then
239+
echo "✅ No schema changes relative to ${BASE_REF}."
240+
echo "changed=false" >> "$GITHUB_OUTPUT"
228241
exit 0
229242
fi
230243
231-
# If there are changes, create a Markdown table
232-
echo "❌ Schema has changed compared to develop! Please regenerate and commit the changes."
233-
echo "changed=true" >> $GITHUB_OUTPUT
244+
echo "Schema changes detected relative to ${BASE_REF}."
245+
echo "changed=true" >> "$GITHUB_OUTPUT"
234246
235-
# Write the report to the GitHub Step Summary
236-
echo "### Schema Change Summary" >> $GITHUB_STEP_SUMMARY
237-
echo "| Status | File |" >> $GITHUB_STEP_SUMMARY
238-
echo "|:---:|:---|" >> $GITHUB_STEP_SUMMARY
239-
240-
echo "$diff_output" | while read -r line; do
241-
status_code=$(echo "$line" | cut -f1)
242-
file_path=$(echo "$line" | cut -f2 | sed 's#tidy3d/schemas/##') # Clean up path
243-
244-
case $status_code in
245-
"A") status="Added 🟢";;
246-
"M") status="Modified 🟡";;
247-
"D") status="Removed 🔴";;
248-
*) status="Unknown";;
247+
# Summarize changes
248+
{
249+
echo "### Schema Change Summary"
250+
echo "| Status | File |"
251+
echo "|:---:|:---|"
252+
} >> "$GITHUB_STEP_SUMMARY"
253+
254+
while IFS=$'\t' read -r status file; do
255+
# Map short status to human-friendly
256+
case "$status" in
257+
A|AM) label="Added 🟢" ;;
258+
M|MM) label="Modified 🟡" ;;
259+
D) label="Removed 🔴" ;;
260+
R*) label="Renamed 🟠" ;;
261+
*) label="$status" ;;
249262
esac
250-
251-
echo "| $status | \`$file_path\` |" >> $GITHUB_STEP_SUMMARY
252-
done
263+
echo "| $label | \`$file\` |" >> "$GITHUB_STEP_SUMMARY"
264+
done <<< "$DIFF_OUTPUT"
253265
254266
- name: verify-allowed-changes
255267
if: steps.schema-diff.outputs.changed == 'true'

schemas/README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
11
# Tidy3D Python Client API Schemas
22

3-
This directory contains the `jsonschema` of the Tidy3D API classes that have GUI support.
3+
This directory contains JSON Schemas for Tidy3D API classes with GUI support.
4+
5+
Schemas are intentionally docs-free: all `title` and `description` fields are stripped to improve stability and make diffs meaningful. Output is canonicalized (sorted keys and order-insensitive arrays) so files are deterministic across Python versions.
6+
7+
## Regenerating Schemas
8+
9+
- Preferred command (stable output):
10+
`uv run -p 3.11 python scripts/regenerate_schema.py`
11+
- The generator always writes docs‑free, canonicalized schemas to `schemas/`.
12+
13+
If you don’t have `uv`, use any Python (output is deterministic):
14+
15+
`python scripts/regenerate_schema.py`

scripts/regenerate_schema.py

Lines changed: 125 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,119 @@
11
"""
2-
Generates and saves JSON schemas for key tidy3d data structures.
2+
Generate Tidy3D JSON Schemas (docs-free, deterministic).
33
4-
This script iterates through a predefined dictionary of Tidy3D classes,
5-
generates a Pydantic JSON schema for each, and saves it as a formatted
6-
JSON file in the 'schemas' directory. It's designed to be run as a
7-
standalone utility to update schema definitions.
4+
This utility exports JSON Schemas for key Tidy3D models and writes them into
5+
the repository `schemas/` directory, with two strict guarantees:
86
9-
All are GUI supported classes.
7+
- Documentation-free: remove all "title", "description", and "units" fields at every level.
8+
- Canonicalized: deterministically sort keys and certain lists for stable output
9+
across Python versions.
10+
11+
Note: The behavior is always docs-free and canonicalized.
1012
"""
1113

1214
from __future__ import annotations
1315

16+
import argparse
1417
import json
1518
import pathlib
1619
import sys
17-
18-
# Attempt to import necessary classes from tidy3d.
19-
try:
20-
from tidy3d import (
21-
EMESimulation,
22-
HeatChargeSimulation,
23-
HeatSimulation,
24-
ModeSimulation,
25-
Simulation,
26-
)
27-
from tidy3d.plugins.smatrix import TerminalComponentModeler
28-
except ImportError as e:
29-
print(
30-
f"Error: Failed to import from 'tidy3d'. Ensure it's installed. Details: {e}",
31-
file=sys.stderr,
32-
)
33-
sys.exit(1)
34-
20+
from typing import Any
3521

3622
# Define the output directory relative to this script's location.
3723
# Assumes the script is in a subdirectory like 'scripts' and 'schemas' is a sibling.
38-
SCHEMA_DIR = pathlib.Path(__file__).parent.parent / "schemas"
24+
DEFAULT_SCHEMA_DIR = pathlib.Path(__file__).parent.parent / "schemas"
3925

4026
# Dictionary mapping a clean name to the Pydantic model class.
4127
# This is the single source of truth for which schemas to export.
42-
export_api_schema_dictionary = {
43-
"Simulation": Simulation,
44-
"ModeSimulation": ModeSimulation,
45-
"EMESimulation": EMESimulation,
46-
"HeatSimulation": HeatSimulation,
47-
"HeatChargeSimulation": HeatChargeSimulation,
48-
"TerminalComponentModeler": TerminalComponentModeler,
49-
}
28+
export_api_schema_dictionary = None # populated lazily when generating
29+
30+
31+
def _stable_sort_key_for_schema_item(item: Any) -> str:
32+
"""Return a stable string key for sorting schema objects in anyOf/oneOf/allOf arrays.
33+
34+
Input is assumed to be already canonicalized and docs-free; we defensively drop
35+
top-level doc fields in case of partially processed inputs.
36+
"""
37+
try:
38+
if isinstance(item, dict):
39+
item = {k: v for k, v in item.items() if k not in {"title", "description", "units"}}
40+
return json.dumps(item, sort_keys=True, separators=(",", ":"))
41+
except Exception:
42+
return str(item)
43+
44+
45+
def _canonicalize(obj: Any) -> Any:
46+
"""Recursively canonicalize a schema object for deterministic, docs-free output.
47+
48+
Rules:
49+
- Drop all "description", "title", and "units" keys everywhere.
50+
- Sort all dict keys recursively.
51+
- Sort arrays that are order-insensitive: "required", "enum", and "type" (when list).
52+
- For "anyOf"/"oneOf"/"allOf", sort entries by a stable key after canonicalization.
53+
"""
54+
if isinstance(obj, dict):
55+
# Canonicalize nested values and drop doc keys
56+
canon: dict[str, Any] = {}
57+
for k, v in obj.items():
58+
if k in {"description", "title", "units"}:
59+
continue # drop docs
60+
canon[k] = _canonicalize(v)
61+
62+
# Normalize known order-insensitive array fields
63+
if isinstance(canon.get("required"), list):
64+
canon["required"] = sorted(set(canon["required"]))
65+
if isinstance(canon.get("enum"), list):
66+
try:
67+
canon["enum"] = sorted(canon["enum"], key=lambda x: json.dumps(x, sort_keys=True))
68+
except Exception:
69+
canon["enum"] = sorted(canon["enum"], key=str)
70+
if isinstance(canon.get("type"), list):
71+
canon["type"] = sorted(canon["type"], key=str)
72+
73+
# Combination keywords: ensure stable order using canonicalized entries
74+
for key in ("anyOf", "oneOf", "allOf"):
75+
if isinstance(canon.get(key), list):
76+
canon[key] = sorted(canon[key], key=_stable_sort_key_for_schema_item)
77+
78+
# Return dict with sorted keys
79+
return {k: canon[k] for k in sorted(canon.keys())}
80+
elif isinstance(obj, list):
81+
return [_canonicalize(x) for x in obj]
82+
else:
83+
return obj
84+
85+
86+
def _load_tidy3d_models():
87+
"""Import and return the mapping of schema names to Tidy3D model classes.
88+
89+
Import is done lazily to avoid importing tidy3d when the module is merely inspected.
90+
"""
91+
try:
92+
from tidy3d import (
93+
EMESimulation,
94+
HeatChargeSimulation,
95+
HeatSimulation,
96+
ModeSimulation,
97+
Simulation,
98+
)
99+
from tidy3d.plugins.smatrix import TerminalComponentModeler
100+
except Exception as e:
101+
print(
102+
f"Error: Failed to import from 'tidy3d'. Ensure it's installed. Details: {e}",
103+
file=sys.stderr,
104+
)
105+
sys.exit(1)
106+
return {
107+
"Simulation": Simulation,
108+
"ModeSimulation": ModeSimulation,
109+
"EMESimulation": EMESimulation,
110+
"HeatSimulation": HeatSimulation,
111+
"HeatChargeSimulation": HeatChargeSimulation,
112+
"TerminalComponentModeler": TerminalComponentModeler,
113+
}
50114

51115

52-
def generate_schemas():
116+
def generate_schemas(output_dir: pathlib.Path = DEFAULT_SCHEMA_DIR):
53117
"""
54118
Generates and saves a JSON schema for each class in the global dictionary.
55119
@@ -64,19 +128,22 @@ def generate_schemas():
64128
"""
65129
try:
66130
# Create the output directory if it doesn't exist.
67-
SCHEMA_DIR.mkdir(parents=True, exist_ok=True)
68-
print(f"Saving schemas to '{SCHEMA_DIR}/'")
131+
output_dir.mkdir(parents=True, exist_ok=True)
132+
print(f"Saving schemas to '{output_dir}/'")
69133

70-
for name, class_instance in export_api_schema_dictionary.items():
71-
output_path = SCHEMA_DIR / f"{name}.json"
134+
models = _load_tidy3d_models()
135+
for name, class_instance in models.items():
136+
output_path = output_dir / f"{name}.json"
72137
print(f" -> Generating schema for '{name}'...")
73138

74139
# Generate the schema dictionary from the class.
75140
schema_dict = class_instance.schema()
141+
schema_dict = _canonicalize(schema_dict)
76142

77143
# Write the schema to a file with pretty printing.
78-
with open(output_path, "w") as f:
79-
json.dump(schema_dict, f, indent=2)
144+
with open(output_path, "w", encoding="utf-8") as f:
145+
json.dump(schema_dict, f, indent=2, sort_keys=True, ensure_ascii=True)
146+
f.write("\n")
80147

81148
except OSError as e:
82149
print(
@@ -92,4 +159,21 @@ def generate_schemas():
92159

93160

94161
if __name__ == "__main__":
95-
generate_schemas()
162+
parser = argparse.ArgumentParser(description="Regenerate Tidy3D JSON Schemas")
163+
parser.add_argument(
164+
"--output-dir",
165+
type=pathlib.Path,
166+
default=DEFAULT_SCHEMA_DIR,
167+
help="Directory to write schema JSON files (default: repo 'schemas').",
168+
)
169+
args = parser.parse_args()
170+
171+
# Encourage use of a pinned Python for stable output
172+
if not (sys.version_info.major == 3 and sys.version_info.minor == 11):
173+
print(
174+
f"Warning: Running with Python {sys.version_info.major}.{sys.version_info.minor}. "
175+
"For stable schema output, prefer Python 3.11 (matches CI).",
176+
file=sys.stderr,
177+
)
178+
179+
generate_schemas(args.output_dir)

0 commit comments

Comments
 (0)