From be307c774a0ef04ea7a007b0aa888925c3d2b094 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Fri, 21 Nov 2025 13:44:53 -0500 Subject: [PATCH 1/8] planning --- test/INTEGRATION_FINDINGS.md | 223 +++++++++++++ test/INTEGRATION_PLAN_REVISED.md | 127 ++++++++ test/TRANSFORMER_OUTPUT_ANALYSIS.md | 195 ++++++++++++ test/test_stress_period_data_converter.py | 199 ++++++++++++ test/test_transformer_output_format.py | 361 ++++++++++++++++++++++ 5 files changed, 1105 insertions(+) create mode 100644 test/INTEGRATION_FINDINGS.md create mode 100644 test/INTEGRATION_PLAN_REVISED.md create mode 100644 test/TRANSFORMER_OUTPUT_ANALYSIS.md create mode 100644 test/test_stress_period_data_converter.py create mode 100644 test/test_transformer_output_format.py diff --git a/test/INTEGRATION_FINDINGS.md b/test/INTEGRATION_FINDINGS.md new file mode 100644 index 0000000..127549f --- /dev/null +++ b/test/INTEGRATION_FINDINGS.md @@ -0,0 +1,223 @@ +# TypedTransformer → Converter Integration Findings + +## Key Discovery: Period Block Format Matches! + +Looking at the unstructure code (`converter/unstructure.py:305-312`), the **write side** produces: +```python +{ + 'options': {...}, + 'dimensions': {...}, + 'period 1': {'head': , 'aux': }, + 'period 2': {'head': , 'aux': } +} +``` + +The TypedTransformer currently outputs: +```python +{ + 'options': {...}, + 'dimensions': {...}, + 'period 1': {'stress_period_data': [[2, 3, 4, 10.0], [2, 8, 4, 12.0]]}, + 'period 2': {'stress_period_data': [[...]]} +} +``` + +**Note:** Both use **string keys** `'period N'` with **1-based indexing**! + +## The Data Flow + +### Current State (Write Path) +``` +Component with separate arrays: + head: xr.DataArray(shape=(nper, nodes)) + aux: xr.DataArray(shape=(nper, nodes)) + ↓ + unstructure_component() + ↓ +Period blocks with separate fields: + 'period 1': {'head': array_kper0, 'aux': array_kper0} + ↓ + Writer (codec) + ↓ + MF6 input file +``` + +### Desired State (Read Path) +``` + MF6 input file + ↓ +Parser + TypedTransformer + ↓ +Period blocks with tabular data: + 'period 1': {'stress_period_data': [[k,i,j,head,aux], ...]} + ↓ + ??? Converter ??? + ↓ +Component initialization: + Chd(head={0: {cellid: val}, 1: {...}}, + aux={0: {cellid: val}, 1: {...}}) + ↓ +Component with separate arrays: + head: xr.DataArray(shape=(nper, nodes)) + aux: xr.DataArray(shape=(nper, nodes)) +``` + +## The Missing Piece + +The gap is converting from: +```python +# What transformer outputs +{'period 1': {'stress_period_data': [[2,3,4,10.0,5.0], ...]}} +``` + +To: +```python +# What Chd.__init__ expects +{ + 'head': {0: {(2,3,4): 10.0, ...}}, + 'aux': {0: {(2,3,4): 5.0, ...}} +} +``` + +**This conversion must happen BEFORE calling structure_array!** + +## Two Possible Approaches + +### Approach 1: Transformer Does the Conversion +Modify TypedTransformer to: +1. Know about the DFN field schema for period blocks +2. Split stress_period_data into separate field dicts +3. Output: `{'head': {0: list}, 'aux': {0: list}}` + +**Pros:** +- Transformer output directly matches what structure_array expects +- No additional conversion layer needed + +**Cons:** +- Transformer becomes more complex +- Tightly couples parser to data model + +### Approach 2: Add a Converter Layer +Keep TypedTransformer simple, add a `structure_from_parsed()` function: +1. TypedTransformer outputs tabular data as-is +2. Converter layer splits period blocks into fields +3. Calls existing `structure()` function + +**Pros:** +- Separation of concerns (parsing vs. structuring) +- Transformer stays focused on syntactic conversion +- Easier to test and maintain + +**Cons:** +- Extra layer of code +- Needs to handle various period block formats (CHD, WEL, RCH all different) + +## Recommended Approach: **Approach 2** (Converter Layer) + +Because: +1. **Separation of concerns**: Transformer handles syntax, converter handles semantics +2. **Flexibility**: Different packages have different stress period formats +3. **Testability**: Can test transformer and converter independently +4. **Maintainability**: Changes to data model don't require parser changes + +## Implementation Plan + +### Step 1: Fix Array Metadata (High Priority) +Change TypedTransformer to put control metadata in xarray attrs: +```python +# Current +{'control': {'type': 'constant', 'value': 1.0}, 'data': xr.DataArray(1.0)} + +# Target +xr.DataArray(1.0, attrs={'control_type': 'constant', 'control_value': 1.0}) +``` + +**Files:** `transformer.py:274-283, 130-158` + +### Step 2: Create Converter Layer +Add `flopy4/mf6/converter/reader.py`: +```python +def structure_from_parsed(parsed: dict, component_type: type, path: Path) -> Component: + """Convert TypedTransformer output to Component.""" + + # 1. Split period blocks into field arrays + converted = _split_period_blocks(parsed, component_type) + + # 2. Convert 'period N' string keys to integer keys + converted = _convert_period_keys(converted) + + # 3. Convert stress_period_data lists to dicts + converted = _convert_stress_data(converted, component_type) + + # 4. Use existing structure() function + return structure(converted, path) +``` + +### Step 3: Period Block Conversion +```python +def _split_period_blocks(data: dict, component_type: type) -> dict: + """Split 'stress_period_data' into separate field arrays.""" + + # Get DFN to know field names and order + dfn = component_type.dfn + period_fields = [f for f in dfn.fields if f.block == 'period'] + + # Find period blocks + period_blocks = {k: v for k, v in data.items() if k.startswith('period ')} + + # Initialize field dicts + field_arrays = {f.name: {} for f in period_fields} + + # Process each period + for period_key, period_data in period_blocks.items(): + kper = int(period_key.split()[1]) - 1 # Convert to 0-indexed + + if 'stress_period_data' in period_data: + spd = period_data['stress_period_data'] + + # Split each record into fields + for record in spd: + # Determine cellid length (3 for structured, 1 for unstructured) + # ... conversion logic ... + + return {**data, **field_arrays} +``` + +### Step 4: Integration Points +1. Modify `codec/reader/__init__.py` to provide `load_component()`: + ```python + def load_component(fp: IO[str], component_type: type, path: Path) -> Component: + """Load and structure a component from file.""" + parser = get_typed_parser(component_type.package_abbr) + transformer = TypedTransformer(dfn=component_type.dfn) + tree = parser.parse(fp.read()) + parsed = transformer.transform(tree) + return structure_from_parsed(parsed, component_type, path) + ``` + +2. Use in high-level API (Model.load(), etc.) + +## Testing Strategy + +1. **Unit tests** for each conversion function +2. **Integration tests** parsing real files → structured components +3. **Roundtrip tests** read → write → read → compare +4. **Regression tests** on all MF6 example models + +## Open Questions + +1. **Array control metadata**: What keys to use in attrs? `control_type`, `control_value`, `control_factor`? Or nest under `control` dict? +2. **External arrays**: How to preserve Path + metadata through the pipeline? +3. **Layered arrays**: How to handle list of controls in attrs? +4. **Package variations**: Some packages (WEL, CHD, etc.) have different stress data formats - need generic solution? + +## Next Steps + +1. ✅ Write tests to inspect TypedTransformer output (DONE) +2. Implement Step 1 (array metadata in attrs) +3. Test with structure_array to verify arrays work +4. Implement Step 2 (converter layer skeleton) +5. Implement Step 3 (period block splitting) - start with simple case (WEL) +6. Test integration with one package (WEL or CHD) +7. Generalize to other packages +8. Full integration testing with example models diff --git a/test/INTEGRATION_PLAN_REVISED.md b/test/INTEGRATION_PLAN_REVISED.md new file mode 100644 index 0000000..6b123e0 --- /dev/null +++ b/test/INTEGRATION_PLAN_REVISED.md @@ -0,0 +1,127 @@ +# Revised Integration Plan + +## Key Discovery: stress_period_data Setter Already Exists! + +The recent commits (#277, #280) implemented a `stress_period_data` property with getter and setter that **already does the conversion we need**! + +### What It Does + +**Setter:** `package.stress_period_data = DataFrame` +- Takes DataFrame with columns: `kper`, spatial coords (`layer`/`row`/`col` or `node`), and field values +- Internally calls `structure_array()` for each field +- Automatically splits multi-field data into separate arrays +- Handles all the complexcoordinates/reshaping/conversion logic + +**Getter:** `df = package.stress_period_data` +- Returns DataFrame with all period data combined +- Converts arrays → DataFrame format + +## Simplified Integration Path + +Instead of building a complex converter layer, we just need: + +### Step 1: Fix Array Metadata (Easy Win) +Modify TypedTransformer to put control metadata in xarray attrs instead of wrapper dict. + +**Current:** +```python +{'control': {'type': 'constant', 'value': 1.0}, 'data': xr.DataArray(1.0)} +``` + +**Target:** +```python +xr.DataArray(1.0, attrs={'control_type': 'constant', 'control_value': 1.0}) +``` + +### Step 2: Add Simple Period Data Converter +Create one simple function to convert TypedTransformer output → DataFrame: + +```python +def parsed_to_dataframe(parsed: dict, dfn: Dfn) -> pd.DataFrame: + """ + Convert TypedTransformer period blocks to DataFrame. + + Input: {'period 1': {'stress_period_data': [[2,3,4,-35000.0], ...]}} + Output: DataFrame with columns [kper, layer, row, col, q] + """ + # 1. Find period blocks + # 2. Get field names and order from DFN + # 3. Split cellid from values + # 4. Build DataFrame records + pass +``` + +### Step 3: Integration in Reader +```python +# In codec/reader/__init__.py or similar + +def load_component(path: Path, component_type: type) -> Component: + """Load component from MF6 input file.""" + + # 1. Parse and transform + parser = get_typed_parser(component_type.package_abbr) + transformer = TypedTransformer(dfn=component_type.dfn) + with open(path) as f: + tree = parser.parse(f.read()) + parsed = transformer.transform(tree) + + # 2. Extract non-period fields + non_period = {k: v for k, v in parsed.items() if not k.startswith('period ')} + + # 3. Create component with non-period fields + # (arrays already have correct format after Step 1) + component = component_type(**non_period) + + # 4. If has period data, convert and use setter + if any(k.startswith('period ') for k in parsed): + df = parsed_to_dataframe(parsed, component_type.dfn) + component.stress_period_data = df # ← Magic happens here! + + return component +``` + +## Benefits of This Approach + +1. **Reuses existing code**: The setter already handles all the complex logic +2. **Simple converter**: Just one function to convert lists → DataFrame +3. **Tested**: The setter is already tested and working +4. **Maintainable**: Minimal new code +5. **Flexible**: Works for all packages (WEL, CHD, DRN, etc.) + +## Implementation Steps + +### Phase 1: Array Metadata (This Session) +- [ ] Modify `TypedTransformer.try_create_dataarray()` to attach control to attrs +- [ ] Update `array()`, `single_array()`, `layered_array()` methods +- [ ] Test with real files +- [ ] Update existing transformer tests + +### Phase 2: Period Data Converter (Next Session) +- [ ] Implement `parsed_to_dataframe()` function +- [ ] Handle field name/order lookup from DFN +- [ ] Handle both structured and unstructured grids +- [ ] Test with WEL, CHD packages + +### Phase 3: Reader Integration (Following Session) +- [ ] Create `load_component()` function +- [ ] Handle parent model attachment for dims +- [ ] Test end-to-end parsing +- [ ] Roundtrip tests (read → write → read) + +### Phase 4: Full Integration (Final Session) +- [ ] Integrate with Model.load() +- [ ] Test on all MF6 example models +- [ ] Handle edge cases (empty periods, external arrays, etc.) +- [ ] Documentation and cleanup + +## Proof of Concept + +See `test/test_stress_period_data_converter.py` for working proof: +- ✅ `parsed_to_dataframe()` function works +- ✅ DataFrame format is correct +- ✅ Setter accepts the DataFrame (just needs proper parent setup) +- ✅ Getter returns matching format + +## Next Action + +Let's start with Phase 1: Fix array metadata in TypedTransformer! diff --git a/test/TRANSFORMER_OUTPUT_ANALYSIS.md b/test/TRANSFORMER_OUTPUT_ANALYSIS.md new file mode 100644 index 0000000..98cd20d --- /dev/null +++ b/test/TRANSFORMER_OUTPUT_ANALYSIS.md @@ -0,0 +1,195 @@ +# TypedTransformer Output Format Analysis + +## Summary of Current Issues + +Based on parsing real MF6 files, the TypedTransformer outputs data in a format that doesn't directly match what `structure_array()` expects. Here are the key mismatches: + +## 1. Array Fields - Control Metadata Wrapper + +### Current Format +```python +{ + 'griddata': { + 'strt': { + 'control': {'type': 'constant', 'value': -10.7}, + 'data': + } + } +} +``` + +### What structure_array() Expects +- Raw xarray.DataArray or numpy.ndarray +- Dict with integer keys `{0: value, 1: value}` for period/layer data +- Scalars, lists, or DataFrames + +### Required Change +Output just the DataArray with control metadata in `.attrs`: +```python +{ + 'griddata': { + 'strt': + } +} +``` + +**Files to modify:** +- `transformer.py:274-283` - `try_create_dataarray()` method +- `transformer.py:130-158` - `array()`, `single_array()`, `layered_array()` methods + +## 2. Layered Arrays + +### Current Format +```python +{ + 'botm': { + 'control': [ + {'type': 'constant', 'value': -12.2}, + {'type': 'constant', 'value': -21.3}, + {'type': 'constant', 'value': -30.5} + ], + 'data': , + 'attrs': {...}, + 'dims': {'layer': 3} + } +} +``` + +### Required Change +Return DataArray with each layer's control metadata in attrs: +```python +{ + 'botm': +} +``` + +**Note:** For layered arrays, control is a list (one per layer). Store as `attrs['controls']` (plural). + +## 3. Period Blocks - String Keys + +### Current Format +```python +{ + 'period 1': {'saverecord': [...]}, + 'period 2': {'stress_period_data': [[2, 3, 4, -35000.0], ...]} +} +``` + +### Required Change +Use integer keys (0-indexed for Python, even though MF6 uses 1-indexed): +```python +{ + 'period': { + 0: {'saverecord': [...]}, + 1: {'stress_period_data': ...} + } +} +``` + +**OR** flatten to top-level with integer keys: +```python +{ + 0: {'saverecord': [...]}, + 1: {'stress_period_data': ...} +} +``` + +**Files to modify:** +- `transformer.py:105-125` - `start()` method +- `transformer.py:285-300` - `__default__()` method handling for `_block` rules + +**Question:** Should period data be nested under a 'period' key or flattened? Need to check how structure_array handles this. + +## 4. Stress Period Data - List of Lists + +### Current Format +```python +{ + 'stress_period_data': [[2, 3, 4, -35000.0], [2, 8, 4, -35000.0]] +} +``` +- Each inner list has: `[layer, row, col, q_value]` (for structured grids) +- Or: `[node, q_value]` (for unstructured grids) + +### What structure_array() Expects +Looking at `converter/structure.py:567-578`, it handles: +- List of tuples: `[((k, i, j), value), ((k, i, j), value), ...]` +- Nested dict: `{cellid: value, cellid: value}` +- List of lists with cellid: `[[cellid, value], [cellid, value]]` + +The current format `[[k, i, j, q], ...]` should actually work if we convert to: +```python +{ + 'stress_period_data': [[(2, 3, 4), -35000.0], [(2, 8, 4), -35000.0]] +} +``` + +**Alternative:** Convert to dict format: +```python +{ + 'stress_period_data': { + (2, 3, 4): -35000.0, + (2, 8, 4): -35000.0 + } +} +``` + +**Files to modify:** +- `transformer.py:236-266` - `stress_period_data()` and `record()` methods + +## 5. Record Fields - Looks OK + +### Current Format +```python +{ + 'budget_filerecord': {'budgetfile': 'ex-gwf-bcf2ss.cbc'}, + 'saverecord': [ + {'rtype': 'HEAD', 'ocsetting': 'all'}, + {'rtype': 'BUDGET', 'ocsetting': 'all'} + ] +} +``` + +This format looks compatible with structure_array(). Records are dicts, lists of records are lists of dicts. ✓ + +## Implementation Plan + +### Phase 1: Array Metadata (High Priority) +1. Modify `try_create_dataarray()` to attach control metadata to DataArray attrs +2. Modify `array()`, `single_array()`, `layered_array()` to return just the DataArray +3. Update tests in `test_mf6_reader.py` to check for attrs instead of control dict + +### Phase 2: Period Block Keys (High Priority) +1. Modify `start()` to convert "period N" string keys to integer keys +2. Decide on nesting structure (nested under 'period' key vs. flattened) +3. Update grammar/transformer to handle period indexing correctly + +### Phase 3: Stress Period Data Format (Medium Priority) +1. Modify `stress_period_data()` and `record()` to convert flat lists to tuples +2. Convert `[k, i, j, q]` to `[(k, i, j), q]` or dict format +3. Handle both structured and unstructured cellid formats + +### Phase 4: Integration Testing (High Priority) +1. Test that modified transformer output works with structure_array() +2. Create end-to-end tests: parse → transform → structure → roundtrip +3. Verify array metadata is preserved through write operations + +## Questions to Resolve + +1. **Period block structure:** Nested under 'period' key or flattened to top level? +2. **Stress period data format:** List of tuples or dict? Which is more efficient? +3. **Zero vs one indexing:** Should transformer output 0-indexed (Pythonic) or 1-indexed (MF6 native)? +4. **External arrays:** Do they work correctly as Path objects? (Need to test) + +## Next Steps + +1. Review this analysis with team +2. Make architectural decisions on open questions +3. Implement Phase 1 (array metadata) +4. Test with structure_array() +5. Iterate through remaining phases diff --git a/test/test_stress_period_data_converter.py b/test/test_stress_period_data_converter.py new file mode 100644 index 0000000..1d174eb --- /dev/null +++ b/test/test_stress_period_data_converter.py @@ -0,0 +1,199 @@ +""" +Proof of concept: Converting TypedTransformer output to stress_period_data setter format. + +This demonstrates that we can use the existing stress_period_data setter +rather than building a complex converter layer. +""" + +import pandas as pd +import pytest + +from flopy4.mf6.gwf import Chd, Wel + + +def parsed_to_dataframe(parsed: dict, component_type: type, dfn=None) -> pd.DataFrame: + """ + Convert TypedTransformer period block output to DataFrame format. + + Parameters + ---------- + parsed : dict + Output from TypedTransformer with 'period N' keys + component_type : type + Component class (e.g., Chd, Wel) - used to determine field order + dfn : Dfn, optional + Definition file for field metadata + + Returns + ------- + pd.DataFrame + DataFrame ready for stress_period_data setter + """ + # Find period blocks + period_blocks = {k: v for k, v in parsed.items() if k.startswith("period ")} + + if not period_blocks: + return pd.DataFrame() + + # Get period block field names from DFN or component + # For now, hardcode based on component type (would use DFN in real implementation) + if component_type.__name__ == "Wel": + cellid_len = 3 # layer, row, col (structured) + field_names = ["q"] + elif component_type.__name__ == "Chd": + cellid_len = 3 + field_names = ["head"] + else: + raise NotImplementedError(f"Component {component_type.__name__} not yet supported") + + # Build records + records = [] + for period_key, period_data in period_blocks.items(): + # Extract period number (1-indexed in MF6, convert to 0-indexed for Python) + kper = int(period_key.split()[1]) - 1 + + if "stress_period_data" not in period_data: + continue + + spd = period_data["stress_period_data"] + + # Each record is [cellid_components..., field_values...] + for rec in spd: + if not rec: + continue + + # Split cellid and values + cellid = rec[:cellid_len] + values = rec[cellid_len:] + + # Build record dict + record = {"kper": kper} + + # Add spatial coordinates + if cellid_len == 3: + record["layer"] = cellid[0] + record["row"] = cellid[1] + record["col"] = cellid[2] + elif cellid_len == 1: + record["node"] = cellid[0] + + # Add field values + for i, field_name in enumerate(field_names): + if i < len(values): + record[field_name] = values[i] + + records.append(record) + + return pd.DataFrame(records) + + +class TestParsedToDataFrame: + """Test converting parsed data to DataFrame.""" + + def test_wel_simple(self): + """Test simple WEL conversion.""" + parsed = { + "dimensions": {"maxbound": 2}, + "period 1": {"stress_period_data": [[2, 3, 4, -35000.0], [2, 8, 4, -35000.0]]}, + } + + df = parsed_to_dataframe(parsed, Wel) + + assert len(df) == 2 + assert list(df.columns) == ["kper", "layer", "row", "col", "q"] + assert df.iloc[0]["kper"] == 0 # 0-indexed + assert df.iloc[0]["layer"] == 2 + assert df.iloc[0]["row"] == 3 + assert df.iloc[0]["col"] == 4 + assert df.iloc[0]["q"] == -35000.0 + + def test_wel_multiple_periods(self): + """Test WEL with multiple stress periods.""" + parsed = { + "period 1": {"stress_period_data": [[2, 3, 4, -35000.0]]}, + "period 2": {"stress_period_data": [[2, 3, 4, -30000.0], [2, 8, 4, -30000.0]]}, + } + + df = parsed_to_dataframe(parsed, Wel) + + assert len(df) == 3 + assert df[df["kper"] == 0].iloc[0]["q"] == -35000.0 + assert df[df["kper"] == 1].iloc[0]["q"] == -30000.0 + + def test_chd_simple(self): + """Test simple CHD conversion.""" + parsed = {"period 1": {"stress_period_data": [[0, 0, 0, 1.0], [0, 9, 9, 0.0]]}} + + df = parsed_to_dataframe(parsed, Chd) + + assert len(df) == 2 + assert list(df.columns) == ["kper", "layer", "row", "col", "head"] + assert df.iloc[0]["head"] == 1.0 + assert df.iloc[1]["head"] == 0.0 + + +class TestIntegrationWithSetter: + """Test that converted DataFrame works with stress_period_data setter.""" + + def test_wel_roundtrip(self): + """Test WEL: parsed → DataFrame → setter → getter → DataFrame.""" + import numpy as np + + from flopy4.mf6.gwf import Dis, Gwf + + parsed = {"period 1": {"stress_period_data": [[2, 3, 4, -35000.0], [2, 8, 4, -30000.0]]}} + + # Convert to DataFrame + df_input = parsed_to_dataframe(parsed, Wel) + + # Create parent model with dims (setter looks for parent.data.dims) + dis = Dis( + nlay=3, nrow=10, ncol=10, delr=1.0, delc=1.0, top=10.0, botm=np.array([5.0, 0.0, -5.0]) + ) + gwf = Gwf(dis=dis) + + # Create package attached to parent + wel = Wel(parent=gwf) + + # Use setter + wel.stress_period_data = df_input + + # Use getter + df_output = wel.stress_period_data + + # Verify roundtrip + assert len(df_output) == 2 + assert list(df_output.columns) == ["kper", "node", "q"] + + # Values should match (node computed from layer/row/col) + # node = layer * nrow * ncol + row * ncol + col + # node = 2 * 100 + 3 * 10 + 4 = 234 + assert df_output.iloc[0]["kper"] == 0 + assert df_output.iloc[0]["node"] == 234 + assert df_output.iloc[0]["q"] == -35000.0 + + def test_chd_roundtrip(self): + """Test CHD: parsed → DataFrame → setter → getter → DataFrame.""" + import numpy as np + + from flopy4.mf6.gwf import Dis, Gwf + + parsed = {"period 1": {"stress_period_data": [[0, 0, 0, 1.0], [0, 9, 9, 0.0]]}} + + df_input = parsed_to_dataframe(parsed, Chd) + + dis = Dis(nlay=1, nrow=10, ncol=10, delr=1.0, delc=1.0, top=1.0, botm=np.array([0.0])) + gwf = Gwf(dis=dis) + chd = Chd(parent=gwf) + + chd.stress_period_data = df_input + + df_output = chd.stress_period_data + + assert len(df_output) == 2 + assert df_output.iloc[0]["head"] == 1.0 + assert df_output.iloc[1]["head"] == 0.0 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/test/test_transformer_output_format.py b/test/test_transformer_output_format.py new file mode 100644 index 0000000..72eb828 --- /dev/null +++ b/test/test_transformer_output_format.py @@ -0,0 +1,361 @@ +""" +Tests to inspect and document the current TypedTransformer output format. + +This test suite parses real MF6 input files and inspects the exact structure +of the TypedTransformer output to identify what needs to change for integration +with the structure_array converter. +""" + +from pathlib import Path +from pprint import pformat + +import pytest +import xarray as xr +from modflow_devtools.dfn import MapV1To2, load_flat +from modflow_devtools.models import copy_to + +from flopy4.mf6.codec.reader.parser import get_typed_parser +from flopy4.mf6.codec.reader.transformer import TypedTransformer + + +@pytest.fixture(scope="module") +def v2_dfns(dfn_path): + """Load and convert DFNs to V2 format.""" + v1_dfns = load_flat(dfn_path) + mapper = MapV1To2() + return {name: mapper.map(dfn) for name, dfn in v1_dfns.items()} + + +def parse_file(file_path: Path, component_name: str, dfn): + """Helper to parse a file and return transformed output.""" + parser = get_typed_parser(component_name) + transformer = TypedTransformer(dfn=dfn) + + with open(file_path, "r") as f: + content = f.read() + + tree = parser.parse(content) + result = transformer.transform(tree) + return result + + +class TestArrayOutput: + """Test TypedTransformer output format for array fields.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) + def test_ic_constant_array(self, tmp_path, v2_dfns, model_name): + """Test output format for CONSTANT array (IC package).""" + workspace = copy_to(tmp_path, model_name, verbose=False) + ic_files = list(workspace.rglob("*.ic")) + assert len(ic_files) > 0, "No IC files found" + + result = parse_file(ic_files[0], "gwf-ic", v2_dfns["gwf-ic"]) + + print("\n" + "=" * 80) + print("IC FILE OUTPUT:") + print("=" * 80) + print(pformat(result, width=120)) + + # Check structure + assert "griddata" in result, "Should have griddata block" + assert "strt" in result["griddata"], "Should have strt field" + + strt = result["griddata"]["strt"] + print("\n" + "-" * 80) + print("STRT FIELD STRUCTURE:") + print("-" * 80) + print(f"Type: {type(strt)}") + print(f"Keys: {strt.keys() if isinstance(strt, dict) else 'N/A'}") + + if isinstance(strt, dict): + if "control" in strt: + print(f"Control: {strt['control']}") + if "data" in strt: + data = strt["data"] + print(f"Data type: {type(data)}") + print(f"Data shape: {data.shape if hasattr(data, 'shape') else 'N/A'}") + if isinstance(data, xr.DataArray): + print(f"Data dims: {data.dims}") + print(f"Data attrs: {data.attrs}") + + # Current behavior: should be {"control": {...}, "data": ...} + assert isinstance(strt, dict), "Array field should be a dict" + assert "control" in strt, "Should have 'control' key" + assert "data" in strt, "Should have 'data' key" + + print("\n" + "=" * 80) + print("FINDING: Array fields use {'control': ..., 'data': ...} wrapper") + print("NEEDED: Should be xr.DataArray with control in attrs") + print("=" * 80) + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) + def test_dis_internal_arrays(self, tmp_path, v2_dfns, model_name): + """Test output format for INTERNAL arrays (DIS package).""" + workspace = copy_to(tmp_path, model_name, verbose=False) + dis_files = list(workspace.rglob("*.dis")) + assert len(dis_files) > 0, "No DIS files found" + + result = parse_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) + + print("\n" + "=" * 80) + print("DIS FILE OUTPUT:") + print("=" * 80) + print(pformat(result, width=120, depth=2)) # Limit depth for readability + + assert "griddata" in result + + # Check delr array + if "delr" in result["griddata"]: + delr = result["griddata"]["delr"] + print("\n" + "-" * 80) + print("DELR FIELD (INTERNAL ARRAY):") + print("-" * 80) + print(f"Type: {type(delr)}") + if isinstance(delr, dict): + print(f"Keys: {delr.keys()}") + if "control" in delr: + print(f"Control: {delr['control']}") + if "data" in delr: + print(f"Data type: {type(delr['data'])}") + print( + f"Data shape: {delr['data'].shape if hasattr(delr['data'], 'shape') else 'N/A'}" + ) + + # Check botm array (layered) + if "botm" in result["griddata"]: + botm = result["griddata"]["botm"] + print("\n" + "-" * 80) + print("BOTM FIELD (LAYERED ARRAY):") + print("-" * 80) + print(f"Type: {type(botm)}") + if isinstance(botm, dict): + print(f"Keys: {botm.keys()}") + if "control" in botm: + print(f"Control type: {type(botm['control'])}") + print(f"Control: {botm['control']}") + if "data" in botm: + data = botm["data"] + print(f"Data type: {type(data)}") + if isinstance(data, xr.DataArray): + print(f"Data shape: {data.shape}") + print(f"Data dims: {data.dims}") + + +class TestStressPeriodDataOutput: + """Test TypedTransformer output format for stress period data.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) + def test_wel_stress_period_data(self, tmp_path, v2_dfns, model_name): + """Test output format for WEL stress period data.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + wel_files = list(workspace.rglob("*.wel")) + + if len(wel_files) == 0: + pytest.skip("No WEL files in this model") + + result = parse_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) + + print("\n" + "=" * 80) + print("WEL FILE OUTPUT:") + print("=" * 80) + print(pformat(result, width=120)) + + # Find period blocks + period_keys = [k for k in result.keys() if k.startswith("period")] + print("\n" + "-" * 80) + print(f"PERIOD BLOCK KEYS: {period_keys}") + print("-" * 80) + + if period_keys: + first_period = result[period_keys[0]] + print(f"\nFirst period block: {period_keys[0]}") + print(f"Keys: {first_period.keys() if isinstance(first_period, dict) else 'N/A'}") + + if "stress_period_data" in first_period: + spd = first_period["stress_period_data"] + print(f"\nStress period data type: {type(spd)}") + print(f"Length: {len(spd) if hasattr(spd, '__len__') else 'N/A'}") + if spd and len(spd) > 0: + print(f"First record type: {type(spd[0])}") + print(f"First record: {spd[0]}") + print(f"Record length: {len(spd[0]) if hasattr(spd[0], '__len__') else 'N/A'}") + + print("\n" + "=" * 80) + print("FINDING: Period blocks use 'period N' string keys") + print("FINDING: stress_period_data is list of lists: [[k, i, j, q], ...]") + print("NEEDED: Should use integer keys {0: ..., 1: ...}") + print("NEEDED: Should be dict or list-of-tuples format for structure_array") + print("=" * 80) + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) + def test_chd_stress_period_data(self, tmp_path, v2_dfns, model_name): + """Test output format for CHD stress period data.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + chd_files = list(workspace.rglob("*.chd")) + + if len(chd_files) == 0: + pytest.skip("No CHD files in this model") + + result = parse_file(chd_files[0], "gwf-chd", v2_dfns["gwf-chd"]) + + print("\n" + "=" * 80) + print("CHD FILE OUTPUT:") + print("=" * 80) + print(pformat(result, width=120)) + + period_keys = [k for k in result.keys() if k.startswith("period")] + if period_keys: + first_period = result[period_keys[0]] + if "stress_period_data" in first_period: + spd = first_period["stress_period_data"] + print("\nCHD stress_period_data format:") + print(f"Type: {type(spd)}") + if spd and len(spd) > 0: + print(f"Sample records: {spd[:3]}") + + +class TestRecordOutput: + """Test TypedTransformer output format for record fields.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) + def test_oc_record_fields(self, tmp_path, v2_dfns, model_name): + """Test output format for OC package record fields.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + oc_files = list(workspace.rglob("*.oc")) + + if len(oc_files) == 0: + pytest.skip("No OC files in this model") + + result = parse_file(oc_files[0], "gwf-oc", v2_dfns["gwf-oc"]) + + print("\n" + "=" * 80) + print("OC FILE OUTPUT:") + print("=" * 80) + print(pformat(result, width=120)) + + # Check options block for record fields + if "options" in result: + print("\n" + "-" * 80) + print("OPTIONS BLOCK:") + print("-" * 80) + options = result["options"] + for key, val in options.items(): + print(f"{key}: {type(val)} = {val}") + + # Check period blocks + period_keys = [k for k in result.keys() if k.startswith("period")] + if period_keys: + first_period = result[period_keys[0]] + print("\n" + "-" * 80) + print(f"FIRST PERIOD BLOCK ({period_keys[0]}):") + print("-" * 80) + for key, val in first_period.items(): + print(f"{key}: {type(val)}") + if isinstance(val, list) and val: + print(f" First item: {val[0]}") + + +class TestKeywordOutput: + """Test TypedTransformer output format for keyword fields.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) + def test_npf_keywords(self, tmp_path, v2_dfns, model_name): + """Test output format for NPF package keyword fields.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + npf_files = list(workspace.rglob("*.npf")) + + if len(npf_files) == 0: + pytest.skip("No NPF files in this model") + + result = parse_file(npf_files[0], "gwf-npf", v2_dfns["gwf-npf"]) + + print("\n" + "=" * 80) + print("NPF FILE OUTPUT:") + print("=" * 80) + print(pformat(result, width=120, depth=2)) + + if "options" in result: + options = result["options"] + print("\n" + "-" * 80) + print("KEYWORD FIELDS:") + print("-" * 80) + for key, val in options.items(): + if isinstance(val, bool): + print(f"{key}: {type(val)} = {val}") + + print("\n" + "=" * 80) + print("FINDING: Keywords are stored as (name, True) tuples, transformed to {name: True}") + print("NEEDED: This format is probably OK for structure_array") + print("=" * 80) + + +class TestDimensionsOutput: + """Test TypedTransformer output format for dimension fields.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) + def test_dis_dimensions(self, tmp_path, v2_dfns, model_name): + """Test output format for DIS dimensions block.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + dis_files = list(workspace.rglob("*.dis")) + assert len(dis_files) > 0 + + result = parse_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) + + print("\n" + "=" * 80) + print("DIS DIMENSIONS BLOCK:") + print("=" * 80) + + if "dimensions" in result: + dims = result["dimensions"] + print(pformat(dims, width=120)) + print("\n" + "-" * 80) + for key, val in dims.items(): + print(f"{key}: {type(val)} = {val}") + + print("\n" + "=" * 80) + print("FINDING: Dimension fields are simple integers") + print("NEEDED: This format is correct for structure_array") + print("=" * 80) + + +class TestExternalArrayOutput: + """Test TypedTransformer output format for external arrays.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) + def test_external_array_reference(self, tmp_path, v2_dfns, model_name): + """Test output format when arrays reference external files.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + + # Try to find a package with external arrays + # DIS often has external arrays for large grids + dis_files = list(workspace.rglob("*.dis")) + if len(dis_files) == 0: + pytest.skip("No DIS files found") + + result = parse_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) + + print("\n" + "=" * 80) + print("CHECKING FOR EXTERNAL ARRAYS:") + print("=" * 80) + + # Look through griddata for external arrays + if "griddata" in result: + for field_name, field_val in result["griddata"].items(): + if isinstance(field_val, dict) and "control" in field_val: + control = field_val["control"] + if isinstance(control, dict) and control.get("type") == "external": + print(f"\nFound external array: {field_name}") + print(f"Control: {control}") + print(f"Data type: {type(field_val['data'])}") + print(f"Data value: {field_val['data']}") + + print("\n" + "-" * 80) + print("FINDING: External arrays have data as Path object") + print("NEEDED: Check if structure_array handles Path correctly") + print("-" * 80) + break + + +if __name__ == "__main__": + # Allow running this file directly for debugging + pytest.main([__file__, "-v", "-s"]) From 2898cc63ea8d196d6c75ec9c5944224f1529354f Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Fri, 21 Nov 2025 16:16:25 -0500 Subject: [PATCH 2/8] implementing --- flopy4/mf6/codec/reader/transformer.py | 243 ++++++++++++++++++++++--- test/test_mf6_reader.py | 170 ++++++++++------- 2 files changed, 324 insertions(+), 89 deletions(-) diff --git a/flopy4/mf6/codec/reader/transformer.py b/flopy4/mf6/codec/reader/transformer.py index 102e17a..31b754f 100644 --- a/flopy4/mf6/codec/reader/transformer.py +++ b/flopy4/mf6/codec/reader/transformer.py @@ -2,6 +2,7 @@ from typing import Any import numpy as np +import pandas as pd import xarray as xr from lark import Token, Transformer from modflow_devtools.dfn import Dfn @@ -102,59 +103,232 @@ def _flatten_fields(self, fields: dict) -> dict: flat.update(nested_flat) return flat - def start(self, items: list[Any]) -> dict: - """Collect and merge blocks, handling indexed blocks specially.""" + def _convert_period_blocks_to_dataframe( + self, period_blocks: dict[int, dict] + ) -> pd.DataFrame | None: + """ + Convert period block data to DataFrame. + + Parameters + ---------- + period_blocks : dict[int, dict] + Dict mapping period indices to period data + Format: {1: {"stress_period_data": [[...], ...]}, 2: {...}} + + Returns + ------- + pd.DataFrame | None + DataFrame with columns: kper, cellid columns, field columns + Returns None if no stress_period_data found + """ + if not period_blocks or not self.dfn: + return None + + # Check if any period has stress_period_data with tabular records + has_spd = any("stress_period_data" in data for data in period_blocks.values()) + if not has_spd: + return None + + # Check if stress_period_data contains tabular records (lists) or keywords (Trees) + # If it contains Trees, it's keyword-based (like STO with STEADY-STATE/TRANSIENT) + # and should not be converted to DataFrame + from lark import Tree + + for period_data in period_blocks.values(): + if "stress_period_data" in period_data: + spd = period_data["stress_period_data"] + if spd and len(spd) > 0: + # Check first item - if it's a Tree, this is keyword-based + if isinstance(spd[0], Tree): + return None + break + + # Get period block fields from DFN to determine field names and order + period_block = self.dfn.blocks.get("period", {}) + if not period_block: + return None + + # Determine field names (excluding cellid) + # For now, we'll infer from the first record + # TODO: Use DFN to get proper field order + field_names = [] + cellid_len = 3 # Default to structured (layer, row, col) + + # Build records for DataFrame + records = [] + for period_idx, period_data in period_blocks.items(): + if "stress_period_data" not in period_data: + continue + + spd = period_data["stress_period_data"] + kper = period_idx - 1 # Convert to 0-indexed + + for rec in spd: + if not rec: + continue + + # First record - infer structure + if not field_names: + # Infer cellid length and field names + # This is a heuristic - ideally we'd use DFN + if len(rec) == 2: # (node, value) + cellid_len = 1 + field_names = ["q"] # Generic field name + elif len(rec) >= 4: # (layer, row, col, value, ...) + cellid_len = 3 + # Remaining values are field values + num_fields = len(rec) - cellid_len + # For now, use generic names + # TODO: Get proper field names from DFN + field_names = [f"field_{i}" for i in range(num_fields)] + + # Split cellid and values + cellid = rec[:cellid_len] + values = rec[cellid_len:] + + # Build record dict + record = {"kper": kper} + + # Add spatial coordinates + if cellid_len == 3: + record["layer"] = cellid[0] + record["row"] = cellid[1] + record["col"] = cellid[2] + elif cellid_len == 1: + record["node"] = cellid[0] + + # Add field values + for i, field_name in enumerate(field_names): + if i < len(values): + record[field_name] = values[i] + + records.append(record) + + if not records: + return None + + return pd.DataFrame(records) + + def start(self, items: list[Any]) -> dict | Any: + """ + Collect and merge blocks, handling indexed blocks specially. + + For simple cases (e.g., "start: array"), returns the item directly. + For full component files with blocks, returns a dict of blocks. + """ merged = {} + period_blocks = {} # Collect period blocks for DataFrame conversion + for item in items: + # Handle non-dict items (e.g., simple "start: array" tests) if not isinstance(item, dict): + # If there's only one non-dict item, return it directly + if len(items) == 1: + return item + # Otherwise skip non-dict items continue + for block_name, block_data in item.items(): # Check if this is an indexed block (dict with integer keys) if isinstance(block_data, dict) and all( isinstance(k, int) for k in block_data.keys() ): - # Flatten indexed blocks into separate keys like "period 1", "period 2" - for index, data in block_data.items(): - indexed_key = f"{block_name} {index}" - merged[indexed_key] = data + # Check if this is a period block + if block_name == "period": + # Collect for DataFrame conversion + period_blocks.update(block_data) + else: + # Flatten indexed blocks into separate keys like "period 1", "period 2" + for index, data in block_data.items(): + indexed_key = f"{block_name} {index}" + merged[indexed_key] = data elif block_name not in merged: merged[block_name] = block_data else: # This shouldn't happen for well-formed input pass + + # Convert period blocks to DataFrame if present + if period_blocks: + df = self._convert_period_blocks_to_dataframe(period_blocks) + if df is not None: + # Successfully converted to DataFrame + merged["stress_period_data"] = df + else: + # No stress_period_data found, keep as indexed period blocks + for index, data in period_blocks.items(): + indexed_key = f"period {index}" + merged[indexed_key] = data + return merged def block(self, items: list[Any]) -> dict: return items[0] - def array(self, items: list[Any]) -> dict: + def array(self, items: list[Any]) -> xr.DataArray | Path: + """ + Handle array field - single or layered. + + Returns xarray DataArray directly with control metadata in attrs. + For layered arrays, concatenates along 'layer' dimension and stores + list of control dicts in attrs['controls']. + """ arrs = items[0] if isinstance(arrs, list): - data = xr.concat([arr["data"] for arr in arrs if "data" in arr], dim="layer") - return { - "control": [arr["control"] for arr in arrs if "control" in arr], - "data": data, - "attrs": {k: v for k, v in arrs[0].items() if k not in ["data"]}, - "dims": {"layer": len(arrs)}, - } + # Layered array - concatenate DataArrays + # Extract controls from each layer's attrs + controls = [] + data_arrays = [] + for arr in arrs: + if isinstance(arr, xr.DataArray): + data_arrays.append(arr) + # Extract control from attrs + control = { + k.replace("control_", ""): v + for k, v in arr.attrs.items() + if k.startswith("control_") + } + controls.append(control) + elif isinstance(arr, Path): + # External array - can't concatenate + # For now, return as-is; this needs special handling + # TODO: Handle layered external arrays + pass + + if data_arrays: + # Concatenate along layer dimension + result = xr.concat(data_arrays, dim="layer") + # Store list of controls in attrs (one per layer) + result.attrs["controls"] = controls + return result return arrs - def single_array(self, items: list[Any]) -> dict: + def single_array(self, items: list[Any]) -> xr.DataArray | Path: + """Handle single array (not layered).""" netcdf = items[0] arr = items[-1] - if netcdf: - arr["netcdf"] = netcdf - return TypedTransformer.try_create_dataarray(arr) + result = TypedTransformer.try_create_dataarray(arr) + + # Add netcdf flag to attrs if present + if netcdf and isinstance(result, xr.DataArray): + result.attrs["netcdf"] = True + + return result - def layered_array(self, items: list[Any]) -> list[dict]: + def layered_array(self, items: list[Any]) -> list[xr.DataArray | Path]: + """Handle layered array - returns list of DataArrays (one per layer).""" netcdf = items[0] layers = [] for arr in items[2:]: if arr is None: continue - if netcdf: - arr["netcdf"] = netcdf - layers.append(TypedTransformer.try_create_dataarray(arr)) + result = TypedTransformer.try_create_dataarray(arr) + + # Add netcdf flag to attrs if present + if netcdf and isinstance(result, xr.DataArray): + result.attrs["netcdf"] = True + + layers.append(result) return layers def readarray(self, items: list[Any]) -> dict[str, Any]: @@ -271,16 +445,31 @@ def _is_newline_token(item: Any) -> bool: return isinstance(item, Token) and item.type == "NEWLINE" @staticmethod - def try_create_dataarray(array_info: dict) -> dict: + def try_create_dataarray(array_info: dict) -> xr.DataArray | Path: + """ + Create xarray DataArray with control metadata in attrs. + + For external arrays, returns Path object. + For internal/constant arrays, returns DataArray with control metadata in attrs. + """ control = array_info["control"] + + # Build attrs from control metadata + attrs = {f"control_{k}": v for k, v in control.items()} + match control["type"]: case "constant": - array_info["data"] = xr.DataArray(data=control["value"]) + return xr.DataArray(data=control["value"], attrs=attrs) case "internal": - array_info["data"] = xr.DataArray(data=array_info["data"]) + return xr.DataArray(data=array_info["data"], attrs=attrs) case "external": - pass - return array_info + # For external arrays, return Path with control as attrs + # Store control metadata separately for external arrays + path = array_info["data"] + # Can't attach attrs to Path, so return a DataArray wrapper or custom object + # For now, return the path and handle control separately in parent + # TODO: Consider a custom ExternalArray class that holds path + control + return path def __default__(self, data, children, meta): if self.blocks is None or self._flat_fields is None: diff --git a/test/test_mf6_reader.py b/test/test_mf6_reader.py index e1614d0..28cd556 100644 --- a/test/test_mf6_reader.py +++ b/test/test_mf6_reader.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +import pandas as pd import pytest import xarray as xr from lark import Lark @@ -180,16 +181,18 @@ def test_transform_internal_array(): result = transformer.transform( parser.parse(""" INTERNAL FACTOR 1.5 IPRN 3 -1.2 3.7 9.3 4.2 -2.2 9.9 1.0 3.3 -4.9 7.3 7.5 8.2 +1.2 3.7 9.3 4.2 +2.2 9.9 1.0 3.3 +4.9 7.3 7.5 8.2 8.7 6.6 4.5 5.7 """) ) - assert result["control"]["type"] == "internal" - assert result["control"]["factor"] == 1.5 - assert result["control"]["iprn"] == 3 - assert result["data"].shape == (16,) + # Result is now an xarray.DataArray with control in attrs + assert isinstance(result, xr.DataArray) + assert result.attrs["control_type"] == "internal" + assert result.attrs["control_factor"] == 1.5 + assert result.attrs["control_iprn"] == 3 + assert result.shape == (16,) def test_transform_constant_array(): @@ -200,8 +203,11 @@ def test_transform_constant_array(): CONSTANT 42.5 """) ) - assert result["control"]["type"] == "constant" - assert np.array_equal(result["data"], np.array(42.5)) + # Result is now an xarray.DataArray with control in attrs + assert isinstance(result, xr.DataArray) + assert result.attrs["control_type"] == "constant" + assert result.attrs["control_value"] == 42.5 + assert np.array_equal(result, np.array(42.5)) def test_transform_external_array(): @@ -212,8 +218,10 @@ def test_transform_external_array(): OPEN/CLOSE "data/heads.dat" FACTOR 1.0 (BINARY) """) ) - assert result["control"]["type"] == "external" - assert result["data"] == Path("data/heads.dat") + # Result is now a Path for external arrays + assert isinstance(result, Path) + assert result == Path("data/heads.dat") + # TODO: Control metadata for external arrays needs special handling def test_transform_layered_array(): @@ -228,14 +236,17 @@ def test_transform_layered_array(): 2.2 9.9 1.0 3.3 """) ) - assert isinstance(result["control"], list) - assert result["control"][0]["type"] == "constant" - assert result["control"][1]["type"] == "internal" - assert result["control"][1]["factor"] == 2.0 - assert isinstance(result["data"], xr.DataArray) - assert result["data"].shape == (2, 8) - assert result["data"].dims == ("layer", "dim_0") - assert np.array_equal(result["data"][0], np.ones((8,))) + # Result is now an xarray.DataArray with controls list in attrs + assert isinstance(result, xr.DataArray) + assert result.shape == (2, 8) + assert result.dims == ("layer", "dim_0") + # Controls are stored in attrs as a list (one per layer) + assert "controls" in result.attrs + assert len(result.attrs["controls"]) == 2 + assert result.attrs["controls"][0]["type"] == "constant" + assert result.attrs["controls"][1]["type"] == "internal" + assert result.attrs["controls"][1]["factor"] == 2.0 + assert np.array_equal(result[0], np.ones((8,))) def test_transform_full_component(): @@ -296,14 +307,22 @@ def test_transform_full_component(): assert result["options"]["b"] == "nice said" assert result["options"]["c"] == 3 assert result["options"]["p"] == 0.0 - assert result["arrays"]["x"]["control"]["type"] == "constant" - assert np.array_equal(result["arrays"]["x"]["data"], np.array(1.0)) - assert result["arrays"]["y"]["control"]["type"] == "internal" - assert np.array_equal(result["arrays"]["y"]["data"], np.array([4.0, 5.0, 6.0])) - assert result["arrays"]["z"]["control"]["type"] == "external" - assert result["arrays"]["z"]["control"]["factor"] == 1.0 - assert result["arrays"]["z"]["control"]["binary"] is True - assert result["arrays"]["z"]["data"] == Path("data/z.dat") + + # Arrays are now xr.DataArray with control in attrs + x = result["arrays"]["x"] + assert isinstance(x, xr.DataArray) + assert x.attrs["control_type"] == "constant" + assert np.array_equal(x.values, np.array(1.0)) + + y = result["arrays"]["y"] + assert isinstance(y, xr.DataArray) + assert y.attrs["control_type"] == "internal" + assert np.array_equal(y.values, np.array([4.0, 5.0, 6.0])) + + # External arrays still return Path directly + z = result["arrays"]["z"] + assert isinstance(z, Path) + assert z == Path("data/z.dat") # Real model tests using modflow-devtools models API @@ -417,15 +436,16 @@ def test_transform_gwf_ic_file(model_workspace, dfn_path): assert "griddata" in result # IC has griddata block assert "strt" in result["griddata"] # Starting heads - # Check strt array structure + # Check strt array structure - now returns xr.DataArray with control in attrs strt = result["griddata"]["strt"] - assert "control" in strt - assert "data" in strt - assert strt["control"]["type"] in ["constant", "internal", "external"] - - # If internal or constant, should have data - if strt["control"]["type"] in ["constant", "internal"]: - assert strt["data"] is not None + if isinstance(strt, xr.DataArray): + # DataArray for constant or internal + assert "control_type" in strt.attrs + assert strt.attrs["control_type"] in ["constant", "internal"] + assert strt.values is not None + elif isinstance(strt, Path): + # Path for external arrays + assert strt.exists() or True # May not exist in test env @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-bcf2ss-p01a"], indirect=True) @@ -464,30 +484,35 @@ def test_transform_gwf_wel_file(model_workspace, dfn_path): assert "dimensions" in result assert result["dimensions"]["maxbound"] == 2 - # Should have a period 2 entry (indexed period blocks are flattened to "period N" keys) - assert "period 2" in result - assert "stress_period_data" in result["period 2"] + # Stress period data is now aggregated into a single DataFrame + assert "stress_period_data" in result + spd = result["stress_period_data"] + assert isinstance(spd, pd.DataFrame) - # Should have 2 rows of data (MAXBOUND = 2) - spd = result["period 2"]["stress_period_data"] - assert len(spd) == 2 + # Check DataFrame structure + assert "kper" in spd.columns + assert "layer" in spd.columns + assert "row" in spd.columns + assert "col" in spd.columns - # Each row should have 4 values (cellid components + q value) - assert len(spd[0]) == 4 - assert len(spd[1]) == 4 + # Filter for period 2 data (kper = 1 in 0-indexed) + period_2_data = spd[spd["kper"] == 1] + assert len(period_2_data) == 2 # Should have 2 wells in period 2 # Check specific values from the file # First well: 2 3 4 -3.5e4 - assert spd[0][0] == 2 # layer - assert spd[0][1] == 3 # row - assert spd[0][2] == 4 # col - assert spd[0][3] == -3.5e4 # q + first_well = period_2_data.iloc[0] + assert first_well["layer"] == 2 + assert first_well["row"] == 3 + assert first_well["col"] == 4 + assert first_well.iloc[-1] == -3.5e4 # Last column is the field value (q) # Second well: 2 8 4 -3.5e4 - assert spd[1][0] == 2 # layer - assert spd[1][1] == 8 # row - assert spd[1][2] == 4 # col - assert spd[1][3] == -3.5e4 # q + second_well = period_2_data.iloc[1] + assert second_well["layer"] == 2 + assert second_well["row"] == 8 + assert second_well["col"] == 4 + assert second_well.iloc[-1] == -3.5e4 @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-bcf2ss-p01a"], indirect=True) @@ -620,9 +645,14 @@ def test_transform_gwf_dis_file(model_workspace, dfn_path): assert "top" in griddata assert "botm" in griddata - # Each array should have control and data - assert "control" in griddata["delr"] - assert "data" in griddata["delr"] + # Arrays are now xr.DataArray with control in attrs + delr = griddata["delr"] + if isinstance(delr, xr.DataArray): + assert "control_type" in delr.attrs + assert delr.values is not None + elif isinstance(delr, Path): + # External array + pass @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-csub-p01"], indirect=True) @@ -668,11 +698,22 @@ def test_transform_gwf_npf_file(model_workspace, dfn_path): assert "icelltype" in griddata assert "k" in griddata - # Each array should have control and data - assert "control" in griddata["icelltype"] - assert "data" in griddata["icelltype"] - assert "control" in griddata["k"] - assert "data" in griddata["k"] + # Arrays are now xr.DataArray with control in attrs + icelltype = griddata["icelltype"] + if isinstance(icelltype, xr.DataArray): + assert "control_type" in icelltype.attrs + assert icelltype.values is not None + elif isinstance(icelltype, Path): + # External array + pass + + k = griddata["k"] + if isinstance(k, xr.DataArray): + assert "control_type" in k.attrs + assert k.values is not None + elif isinstance(k, Path): + # External array + pass @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-csub-p01"], indirect=True) @@ -711,5 +752,10 @@ def test_transform_gwf_sto_file(model_workspace, dfn_path): # STO should have iconvert assert "iconvert" in griddata - assert "control" in griddata["iconvert"] - assert "data" in griddata["iconvert"] + iconvert = griddata["iconvert"] + if isinstance(iconvert, xr.DataArray): + assert "control_type" in iconvert.attrs + assert iconvert.values is not None + elif isinstance(iconvert, Path): + # External array + pass From c05e8aa6382395adc9bfca027d9afa8b95aadeb6 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Fri, 21 Nov 2025 18:59:33 -0500 Subject: [PATCH 3/8] refactor/tests --- flopy4/mf6/codec/reader/transformer.py | 110 +++-- flopy4/mf6/converter/structure.py | 133 ++++++ test/test_converter_structure.py | 109 +++++ test/test_mf6_load_integration.py | 578 +++++++++++++++++++++++++ test/test_mf6_reader.py | 108 +++-- test/test_transformer_output_format.py | 361 --------------- 6 files changed, 934 insertions(+), 465 deletions(-) create mode 100644 test/test_mf6_load_integration.py delete mode 100644 test/test_transformer_output_format.py diff --git a/flopy4/mf6/codec/reader/transformer.py b/flopy4/mf6/codec/reader/transformer.py index 31b754f..d9d5287 100644 --- a/flopy4/mf6/codec/reader/transformer.py +++ b/flopy4/mf6/codec/reader/transformer.py @@ -2,7 +2,6 @@ from typing import Any import numpy as np -import pandas as pd import xarray as xr from lark import Token, Transformer from modflow_devtools.dfn import Dfn @@ -103,11 +102,11 @@ def _flatten_fields(self, fields: dict) -> dict: flat.update(nested_flat) return flat - def _convert_period_blocks_to_dataframe( + def _convert_period_blocks_to_dataset( self, period_blocks: dict[int, dict] - ) -> pd.DataFrame | None: + ) -> xr.Dataset | None: """ - Convert period block data to DataFrame. + Convert period block data to xarray Dataset. Parameters ---------- @@ -117,8 +116,8 @@ def _convert_period_blocks_to_dataframe( Returns ------- - pd.DataFrame | None - DataFrame with columns: kper, cellid columns, field columns + xr.Dataset | None + Dataset with data_vars for data fields and coords for kper and cellid Returns None if no stress_period_data found """ if not period_blocks or not self.dfn: @@ -154,8 +153,15 @@ def _convert_period_blocks_to_dataframe( field_names = [] cellid_len = 3 # Default to structured (layer, row, col) - # Build records for DataFrame - records = [] + # Build arrays for Dataset + # Collect data for each variable + kper_data = [] + layer_data = [] + row_data = [] + col_data = [] + node_data = [] + field_data = {name: [] for name in field_names} if field_names else {} + for period_idx, period_data in period_blocks.items(): if "stress_period_data" not in period_data: continue @@ -182,32 +188,50 @@ def _convert_period_blocks_to_dataframe( # TODO: Get proper field names from DFN field_names = [f"field_{i}" for i in range(num_fields)] + # Initialize field data dicts now that we know the field names + field_data = {name: [] for name in field_names} + # Split cellid and values cellid = rec[:cellid_len] values = rec[cellid_len:] - # Build record dict - record = {"kper": kper} + # Collect kper + kper_data.append(kper) - # Add spatial coordinates + # Collect spatial coordinates if cellid_len == 3: - record["layer"] = cellid[0] - record["row"] = cellid[1] - record["col"] = cellid[2] + layer_data.append(cellid[0]) + row_data.append(cellid[1]) + col_data.append(cellid[2]) elif cellid_len == 1: - record["node"] = cellid[0] + node_data.append(cellid[0]) - # Add field values + # Collect field values for i, field_name in enumerate(field_names): if i < len(values): - record[field_name] = values[i] + field_data[field_name].append(values[i]) - records.append(record) - - if not records: + if not kper_data: return None - return pd.DataFrame(records) + # Build Dataset + + data_vars = {} + coords = {"kper": (["record"], kper_data)} + + # Add spatial coords + if layer_data: # Structured grid + coords["layer"] = (["record"], layer_data) + coords["row"] = (["record"], row_data) + coords["col"] = (["record"], col_data) + elif node_data: # Unstructured grid + coords["node"] = (["record"], node_data) + + # Add data fields as data_vars + for field_name, data in field_data.items(): + data_vars[field_name] = (["record"], data) + + return xr.Dataset(data_vars=data_vars, coords=coords) def start(self, items: list[Any]) -> dict | Any: """ @@ -248,12 +272,12 @@ def start(self, items: list[Any]) -> dict | Any: # This shouldn't happen for well-formed input pass - # Convert period blocks to DataFrame if present + # Convert period blocks to Dataset if present if period_blocks: - df = self._convert_period_blocks_to_dataframe(period_blocks) - if df is not None: - # Successfully converted to DataFrame - merged["stress_period_data"] = df + ds = self._convert_period_blocks_to_dataset(period_blocks) + if ds is not None: + # Successfully converted to Dataset + merged["stress_period_data"] = ds else: # No stress_period_data found, keep as indexed period blocks for index, data in period_blocks.items(): @@ -265,13 +289,14 @@ def start(self, items: list[Any]) -> dict | Any: def block(self, items: list[Any]) -> dict: return items[0] - def array(self, items: list[Any]) -> xr.DataArray | Path: + def array(self, items: list[Any]) -> xr.DataArray: """ Handle array field - single or layered. Returns xarray DataArray directly with control metadata in attrs. For layered arrays, concatenates along 'layer' dimension and stores list of control dicts in attrs['controls']. + External arrays are included with np.nan data and path in attrs. """ arrs = items[0] if isinstance(arrs, list): @@ -289,33 +314,28 @@ def array(self, items: list[Any]) -> xr.DataArray | Path: if k.startswith("control_") } controls.append(control) - elif isinstance(arr, Path): - # External array - can't concatenate - # For now, return as-is; this needs special handling - # TODO: Handle layered external arrays - pass if data_arrays: - # Concatenate along layer dimension + # Concatenate along layer dimension (including external arrays with np.nan) result = xr.concat(data_arrays, dim="layer") # Store list of controls in attrs (one per layer) result.attrs["controls"] = controls return result return arrs - def single_array(self, items: list[Any]) -> xr.DataArray | Path: + def single_array(self, items: list[Any]) -> xr.DataArray: """Handle single array (not layered).""" netcdf = items[0] arr = items[-1] result = TypedTransformer.try_create_dataarray(arr) # Add netcdf flag to attrs if present - if netcdf and isinstance(result, xr.DataArray): + if netcdf: result.attrs["netcdf"] = True return result - def layered_array(self, items: list[Any]) -> list[xr.DataArray | Path]: + def layered_array(self, items: list[Any]) -> list[xr.DataArray]: """Handle layered array - returns list of DataArrays (one per layer).""" netcdf = items[0] layers = [] @@ -325,7 +345,7 @@ def layered_array(self, items: list[Any]) -> list[xr.DataArray | Path]: result = TypedTransformer.try_create_dataarray(arr) # Add netcdf flag to attrs if present - if netcdf and isinstance(result, xr.DataArray): + if netcdf: result.attrs["netcdf"] = True layers.append(result) @@ -445,12 +465,13 @@ def _is_newline_token(item: Any) -> bool: return isinstance(item, Token) and item.type == "NEWLINE" @staticmethod - def try_create_dataarray(array_info: dict) -> xr.DataArray | Path: + def try_create_dataarray(array_info: dict) -> xr.DataArray: """ Create xarray DataArray with control metadata in attrs. - For external arrays, returns Path object. - For internal/constant arrays, returns DataArray with control metadata in attrs. + For all array types (constant, internal, external), returns DataArray + with control metadata stored in attrs. External arrays have the file + path stored in attrs["external_path"] with np.nan as placeholder data. """ control = array_info["control"] @@ -463,13 +484,10 @@ def try_create_dataarray(array_info: dict) -> xr.DataArray | Path: case "internal": return xr.DataArray(data=array_info["data"], attrs=attrs) case "external": - # For external arrays, return Path with control as attrs - # Store control metadata separately for external arrays + # For external arrays, use np.nan as placeholder and store path in attrs path = array_info["data"] - # Can't attach attrs to Path, so return a DataArray wrapper or custom object - # For now, return the path and handle control separately in parent - # TODO: Consider a custom ExternalArray class that holds path + control - return path + attrs["external_path"] = str(path) + return xr.DataArray(data=np.nan, attrs=attrs) def __default__(self, data, children, meta): if self.blocks is None or self._flat_fields is None: diff --git a/flopy4/mf6/converter/structure.py b/flopy4/mf6/converter/structure.py index 6c0e0c2..3642768 100644 --- a/flopy4/mf6/converter/structure.py +++ b/flopy4/mf6/converter/structure.py @@ -392,6 +392,123 @@ def _parse_dataframe( return result +def _convert_dataset_to_dict( + ds: xr.Dataset, + field_name: str, + dim_dict: dict, +) -> dict[int, dict]: + """ + Convert xr.Dataset (stress period data) to dict format. + + Dataset structure from transformer: + - data_vars: Field values (e.g., field_0, q, head) + - coords: kper (temporal), layer/row/col or node (spatial) + + Convert to dict format: + {kper: {cellid: value, ...}, ...} + + Parameters + ---------- + ds : xr.Dataset + Input Dataset with stress period data + field_name : str + Name of the field to extract values for + dim_dict : dict + Resolved dimension values (for coordinate conversion) + + Returns + ------- + dict[int, dict] + Dict mapping stress periods to cellid: value dicts + Format: {kper: {cellid: value, ...}, ...} + """ + if not isinstance(ds, xr.Dataset): + return {} + + result: dict[int, dict] = {} + + # Get dimension info + if "record" not in ds.dims: + raise ValueError(f"Expected 'record' dimension in Dataset, got: {list(ds.dims.keys())}") + + # Get kper values + kper_vals = ( + ds.coords.get("kper").values + if "kper" in ds.coords + else np.zeros(ds.dims["record"], dtype=int) + ) + + # Determine spatial coordinate format + has_structured = all(coord in ds.coords for coord in ["layer", "row", "col"]) + has_node = "node" in ds.coords + + if not has_structured and not has_node: + raise ValueError("Dataset must have either (layer, row, col) or (node,) coordinates") + + # Find the data variable - prefer field_name, otherwise use first data_var + data_var_name = None + if field_name in ds.data_vars: + data_var_name = field_name + elif len(ds.data_vars) > 0: + # Use first data variable (e.g., field_0 from transformer) + data_var_name = list(ds.data_vars.keys())[0] + else: + raise ValueError("No data variables found in Dataset") + + # Build records grouped by stress period + for i in range(len(kper_vals)): + kper = int(kper_vals[i]) + if kper not in result: + result[kper] = {} + + # Extract cellid based on coordinate format + if has_structured: + cellid = ( + int(ds.coords["layer"].values[i]), + int(ds.coords["row"].values[i]), + int(ds.coords["col"].values[i]), + ) + else: + cellid = (int(ds.coords["node"].values[i]),) # type: ignore + + # Extract field value + value = ds[data_var_name].values[i] + result[kper][cellid] = value + + return result + + +def _extract_external_path(data: xr.DataArray) -> tuple[str | None, xr.DataArray]: + """ + Extract external file path from DataArray attrs if present. + + Transformer outputs external arrays as DataArray with: + - data: np.nan (placeholder) + - attrs: control_* metadata, plus external_path + + Parameters + ---------- + data : xr.DataArray + DataArray potentially containing external file path + + Returns + ------- + path : str | None + Path to external file, or None if not an external array + data : xr.DataArray + DataArray with external_path removed from attrs if it was present + """ + if isinstance(data, xr.DataArray) and "external_path" in data.attrs: + attrs = dict(data.attrs) + path = attrs.pop("external_path") + # Return updated DataArray without external_path in attrs + data_updated = xr.DataArray( + data=data.values, dims=data.dims, coords=data.coords, attrs=attrs + ) + return str(path), data_updated + return None, data + + def _parse_dict_format( value: dict, expected_dims: list[str], expected_shape: tuple, dim_dict: dict, field, self_ ) -> dict[int, Any]: @@ -543,6 +660,12 @@ def structure_array( value = _parse_dataframe(value, field.name, dim_dict) # Continue processing as dict below + if isinstance(value, xr.Dataset): + # Parse Dataset format (from transformer stress_period_data) + # Convert to dict format for processing + value = _convert_dataset_to_dict(value, field.name, dim_dict) + # Continue processing as dict below + if isinstance(value, dict): # Parse dict format with fill-forward logic parsed_dict = _parse_dict_format(value, dims_names, tuple(shape), dim_dict, field, self_) @@ -689,6 +812,16 @@ def structure_array( result = _parse_list_format(value, dims_names, tuple(shape), field) elif isinstance(value, (xr.DataArray, np.ndarray)): + # Check if this is an external array with path in attrs + external_path = None + if isinstance(value, xr.DataArray): + external_path, value = _extract_external_path(value) + if external_path: + # TODO: Store path for lazy loading + # For now, continue with placeholder (np.nan) or handle as needed + # In the future, this should trigger lazy loading of the external file + pass + # Duck array - validate and reshape if needed result = _validate_duck_array(value, dims_names, tuple(shape), dim_dict) diff --git a/test/test_converter_structure.py b/test/test_converter_structure.py index 73d2adb..00c0de8 100644 --- a/test/test_converter_structure.py +++ b/test/test_converter_structure.py @@ -10,7 +10,9 @@ import xarray as xr from flopy4.mf6.converter.structure import ( + _convert_dataset_to_dict, _detect_grid_reshape, + _extract_external_path, _fill_forward_time, _reshape_grid, _to_xarray, @@ -142,6 +144,113 @@ def test_to_xarray_numpy_array(self): assert "nper" in result.coords assert result.attrs["units"] == "m" + def test_convert_dataset_to_dict_structured(self): + """Test converting xr.Dataset to dict format (structured grid).""" + # Create Dataset as transformer would output it + ds = xr.Dataset( + data_vars={ + "field_0": (["record"], [-1000.0, -2000.0, -3000.0]), + }, + coords={ + "kper": (["record"], [0, 0, 1]), + "layer": (["record"], [0, 0, 0]), + "row": (["record"], [1, 2, 3]), + "col": (["record"], [1, 2, 3]), + }, + ) + dim_dict = {"nlay": 2, "nrow": 10, "ncol": 10} + + result = _convert_dataset_to_dict(ds, "field_0", dim_dict) + + # Check structure: {kper: {cellid: value}} + assert isinstance(result, dict) + assert 0 in result + assert 1 in result + assert (0, 1, 1) in result[0] + assert (0, 2, 2) in result[0] + assert result[0][(0, 1, 1)] == -1000.0 + assert result[0][(0, 2, 2)] == -2000.0 + assert result[1][(0, 3, 3)] == -3000.0 + + def test_convert_dataset_to_dict_unstructured(self): + """Test converting xr.Dataset to dict format (unstructured grid).""" + ds = xr.Dataset( + data_vars={ + "q": (["record"], [100.0, 200.0]), + }, + coords={ + "kper": (["record"], [0, 1]), + "node": (["record"], [5, 10]), + }, + ) + dim_dict = {"nodes": 100} + + result = _convert_dataset_to_dict(ds, "q", dim_dict) + + assert isinstance(result, dict) + assert 0 in result + assert 1 in result + assert (5,) in result[0] + assert (10,) in result[1] + assert result[0][(5,)] == 100.0 + assert result[1][(10,)] == 200.0 + + def test_convert_dataset_to_dict_missing_field_uses_first_var(self): + """Test that if field name not found, uses first data variable.""" + ds = xr.Dataset( + data_vars={ + "field_0": (["record"], [1.0, 2.0]), + }, + coords={ + "kper": (["record"], [0, 0]), + "node": (["record"], [1, 2]), + }, + ) + dim_dict = {"nodes": 100} + + # Request non-existent field, should use field_0 + result = _convert_dataset_to_dict(ds, "non_existent", dim_dict) + + assert isinstance(result, dict) + assert 0 in result + assert (1,) in result[0] + assert result[0][(1,)] == 1.0 + + def test_extract_external_path_with_path(self): + """Test extracting external file path from DataArray attrs.""" + data = xr.DataArray( + data=np.nan, + attrs={ + "control_type": "external", + "control_factor": 1.0, + "control_binary": True, + "external_path": "data/heads.dat", + }, + ) + + path, updated_data = _extract_external_path(data) + + assert path == "data/heads.dat" + assert isinstance(updated_data, xr.DataArray) + assert "external_path" not in updated_data.attrs + assert updated_data.attrs["control_type"] == "external" + assert updated_data.attrs["control_factor"] == 1.0 + + def test_extract_external_path_without_path(self): + """Test that non-external DataArray is returned unchanged.""" + data = xr.DataArray( + data=np.ones((10, 10)), + attrs={ + "control_type": "internal", + "control_factor": 1.0, + }, + ) + + path, updated_data = _extract_external_path(data) + + assert path is None + assert updated_data is data # Should be unchanged + class TestDisComponent: """Test structure_array with Dis component (array dims).""" diff --git a/test/test_mf6_load_integration.py b/test/test_mf6_load_integration.py new file mode 100644 index 0000000..8ca755f --- /dev/null +++ b/test/test_mf6_load_integration.py @@ -0,0 +1,578 @@ +""" +End-to-end integration tests for MF6 file loading. + +Tests the complete flow: parse → transform → load into components. +Tests at multiple levels: individual packages, models, and full simulations. +""" + +from pathlib import Path + +import numpy as np +import pytest +import xarray as xr +from modflow_devtools.dfn import MapV1To2, load_flat +from modflow_devtools.models import copy_to + +from flopy4.mf6.codec.reader.parser import get_typed_parser +from flopy4.mf6.codec.reader.transformer import TypedTransformer + + +@pytest.fixture(scope="module") +def v2_dfns(dfn_path): + """Load and convert DFNs to V2 format.""" + v1_dfns = load_flat(dfn_path) + mapper = MapV1To2() + return {name: mapper.map(dfn) for name, dfn in v1_dfns.items()} + + +def load_package_file(file_path: Path, component_name: str, dfn): + """Helper to parse and transform a package file.""" + parser = get_typed_parser(component_name) + transformer = TypedTransformer(dfn=dfn) + + with open(file_path, "r") as f: + content = f.read() + + tree = parser.parse(content) + result = transformer.transform(tree) + return result + + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_ic_package(tmp_path, v2_dfns, model_name): + """Test loading IC package with constant array.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + ic_files = list(workspace.rglob("*.ic")) + assert len(ic_files) > 0, "No IC files found" + + result = load_package_file(ic_files[0], "gwf-ic", v2_dfns["gwf-ic"]) + + # Verify structure + assert isinstance(result, dict) + assert "griddata" in result + assert "strt" in result["griddata"] + + # Verify array format + strt = result["griddata"]["strt"] + assert isinstance(strt, xr.DataArray) + assert "control_type" in strt.attrs + assert strt.attrs["control_type"] in ["constant", "internal", "external"] + + # Verify data + if strt.attrs["control_type"] == "constant": + assert "control_value" in strt.attrs + assert isinstance(strt.values, (int, float, np.ndarray)) + elif strt.attrs["control_type"] == "external": + assert "external_path" in strt.attrs + assert np.isnan(strt.values) + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_dis_package(tmp_path, v2_dfns, model_name): + """Test loading DIS package with dimensions and arrays.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + dis_files = list(workspace.rglob("*.dis")) + assert len(dis_files) > 0, "No DIS files found" + + result = load_package_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) + + # Verify dimensions block + assert "dimensions" in result + dims = result["dimensions"] + assert "nlay" in dims + assert "nrow" in dims + assert "ncol" in dims + assert all(isinstance(v, int) and v > 0 for v in [dims["nlay"], dims["nrow"], dims["ncol"]]) + + # Verify griddata arrays + assert "griddata" in result + griddata = result["griddata"] + + # Check required arrays + for array_name in ["delr", "delc", "top", "botm"]: + assert array_name in griddata, f"Missing required array: {array_name}" + arr = griddata[array_name] + assert isinstance(arr, xr.DataArray), f"{array_name} should be DataArray" + assert "control_type" in arr.attrs + + # Verify layered array (botm) + botm = griddata["botm"] + if "layer" in botm.dims: + assert botm.sizes["layer"] == dims["nlay"] + assert "controls" in botm.attrs # Layered arrays have controls list + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) +def test_load_wel_package(tmp_path, v2_dfns, model_name): + """Test loading WEL package with stress period data.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + wel_files = list(workspace.rglob("*.wel")) + + if len(wel_files) == 0: + pytest.skip("No WEL files in this model") + + result = load_package_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) + + # Verify dimensions + assert "dimensions" in result + assert "maxbound" in result["dimensions"] + maxbound = result["dimensions"]["maxbound"] + assert isinstance(maxbound, int) and maxbound > 0 + + # Verify stress period data format + assert "stress_period_data" in result + spd = result["stress_period_data"] + assert isinstance(spd, xr.Dataset), "stress_period_data should be xr.Dataset" + + # Verify Dataset structure + assert "kper" in spd.coords, "Should have kper coordinate" + assert len(spd.data_vars) > 0, "Should have at least one data variable" + + # Verify spatial coordinates + has_structured = all(c in spd.coords for c in ["layer", "row", "col"]) + has_unstructured = "node" in spd.coords + assert has_structured or has_unstructured, "Should have spatial coordinates" + + if has_structured: + # Structured grid - already verified all 3 coords exist + pass + elif has_unstructured: + # Unstructured grid - already verified node exists + pass + + # Verify data makes sense + assert len(spd.kper) > 0, "Should have at least one record" + + # Get first data variable (e.g., 'q' or 'field_0') + data_var_name = list(spd.data_vars.keys())[0] + data_var = spd[data_var_name] + assert data_var.dims == ("record",), "Data var should have 'record' dimension" + assert len(data_var) == len(spd.kper), "Data var length should match records" + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) +def test_load_oc_package(tmp_path, v2_dfns, model_name): + """Test loading OC package with period blocks (non-tabular).""" + workspace = copy_to(tmp_path, model_name, verbose=False) + oc_files = list(workspace.rglob("*.oc")) + + if len(oc_files) == 0: + pytest.skip("No OC files in this model") + + result = load_package_file(oc_files[0], "gwf-oc", v2_dfns["gwf-oc"]) + + # Verify options block + assert "options" in result + options = result["options"] + + # Check for file records + if "budget_filerecord" in options: + assert isinstance(options["budget_filerecord"], dict) + assert "budgetfile" in options["budget_filerecord"] + + if "head_filerecord" in options: + assert isinstance(options["head_filerecord"], dict) + assert "headfile" in options["head_filerecord"] + + # Verify period blocks (should NOT be converted to Dataset) + period_keys = [k for k in result.keys() if k.startswith("period")] + assert len(period_keys) > 0, "Should have period blocks" + + # Period blocks should remain as dicts (not Dataset) for OC + first_period = result[period_keys[0]] + assert isinstance(first_period, dict), "OC period blocks should be dicts" + + # Check for save/print records + if "saverecord" in first_period: + assert isinstance(first_period["saverecord"], list) + if first_period["saverecord"]: + assert isinstance(first_period["saverecord"][0], dict) + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_npf_package(tmp_path, v2_dfns, model_name): + """Test loading NPF package with arrays and keywords.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + npf_files = list(workspace.rglob("*.npf")) + + if len(npf_files) == 0: + pytest.skip("No NPF files in this model") + + result = load_package_file(npf_files[0], "gwf-npf", v2_dfns["gwf-npf"]) + + # Verify options with keywords + if "options" in result: + options = result["options"] + # Check for boolean keyword fields + keyword_fields = {k: v for k, v in options.items() if isinstance(v, bool)} + # NPF might have save_flows, save_specific_discharge, etc. + assert len(keyword_fields) >= 0 # May or may not have keywords + + # Verify griddata arrays + assert "griddata" in result + griddata = result["griddata"] + + # NPF requires icelltype and k + assert "icelltype" in griddata + assert "k" in griddata + + # Verify arrays are DataArrays + for array_name in ["icelltype", "k"]: + arr = griddata[array_name] + assert isinstance(arr, xr.DataArray) + assert "control_type" in arr.attrs + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_sto_package(tmp_path, v2_dfns, model_name): + """Test loading STO package with period blocks (keyword-based).""" + workspace = copy_to(tmp_path, model_name, verbose=False) + sto_files = list(workspace.rglob("*.sto")) + + if len(sto_files) == 0: + pytest.skip("No STO files in this model") + + result = load_package_file(sto_files[0], "gwf-sto", v2_dfns["gwf-sto"]) + + # Verify griddata arrays + if "griddata" in result: + griddata = result["griddata"] + + # Check for common STO arrays + if "iconvert" in griddata: + assert isinstance(griddata["iconvert"], xr.DataArray) + + # Verify period blocks (keyword-based, NOT converted to Dataset) + period_keys = [k for k in result.keys() if k.startswith("period")] + + if period_keys: + # STO period blocks should remain as dicts (STEADY-STATE, TRANSIENT keywords) + first_period = result[period_keys[0]] + assert isinstance(first_period, dict) + + # Check for STEADY-STATE or TRANSIENT + # Note: These are keyword fields, not tabular data + has_keyword = any( + k in first_period for k in ["steady_state", "transient", "stress_period_data"] + ) + assert has_keyword, "Period block should have steady-state/transient indicator" + + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) +def test_dataset_to_dataframe_conversion(tmp_path, v2_dfns, model_name): + """Test that Dataset easily converts to DataFrame for user convenience.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + wel_files = list(workspace.rglob("*.wel")) + + if len(wel_files) == 0: + pytest.skip("No WEL files in this model") + + result = load_package_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) + spd = result["stress_period_data"] + + # Should easily convert to DataFrame + df = spd.to_dataframe() + assert df is not None + assert len(df) > 0 + + # DataFrame should have kper and spatial coords as columns + assert "kper" in df.columns + if "layer" in spd.coords: + assert "layer" in df.columns + assert "row" in df.columns + assert "col" in df.columns + else: + assert "node" in df.columns + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) +def test_dataset_period_filtering(self, tmp_path, v2_dfns, model_name): + """Test filtering Dataset by period.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + wel_files = list(workspace.rglob("*.wel")) + + if len(wel_files) == 0: + pytest.skip("No WEL files in this model") + + result = load_package_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) + spd = result["stress_period_data"] + + # Should be able to filter by period + unique_periods = np.unique(spd.kper.values) + if len(unique_periods) > 1: + # Filter to first period + first_kper = unique_periods[0] + period_data = spd.sel(record=spd.kper == first_kper) + + assert len(period_data.kper) > 0 + assert all(period_data.kper.values == first_kper) + + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_into_ic_component(tmp_path, v2_dfns, model_name): + """Test loading parsed IC data into Ic component.""" + from flopy4.mf6.gwf import Ic + + workspace = copy_to(tmp_path, model_name, verbose=False) + ic_files = list(workspace.rglob("*.ic")) + assert len(ic_files) > 0 + + # Parse and transform + result = load_package_file(ic_files[0], "gwf-ic", v2_dfns["gwf-ic"]) + + # Load into component - use cattrs to structure + from cattrs import structure + + try: + ic = structure(result, Ic) + + # Verify component was created + assert ic is not None + assert hasattr(ic, "griddata") + + # Verify strt was loaded + if hasattr(ic.griddata, "strt"): + strt = ic.griddata.strt + assert strt is not None + # Could be DataArray or scalar depending on structure_array converter + + except Exception as e: + # If structuring isn't fully implemented yet, that's expected + pytest.skip(f"Component structuring not yet implemented: {e}") + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_into_dis_component(tmp_path, v2_dfns, model_name): + """Test loading parsed DIS data into Dis component.""" + from flopy4.mf6.gwf import Dis + + workspace = copy_to(tmp_path, model_name, verbose=False) + dis_files = list(workspace.rglob("*.dis")) + assert len(dis_files) > 0 + + # Parse and transform + result = load_package_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) + + # Load into component + from cattrs import structure + + try: + dis = structure(result, Dis) + + # Verify component was created + assert dis is not None + + # Verify dimensions were loaded + if hasattr(dis, "dimensions"): + assert dis.dimensions.nlay > 0 + assert dis.dimensions.nrow > 0 + assert dis.dimensions.ncol > 0 + + except Exception as e: + pytest.skip(f"Component structuring not yet implemented: {e}") + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) +def test_load_into_wel_component(tmp_path, v2_dfns, model_name): + """Test loading parsed WEL data into Wel component.""" + from flopy4.mf6.gwf import Wel + + workspace = copy_to(tmp_path, model_name, verbose=False) + wel_files = list(workspace.rglob("*.wel")) + + if len(wel_files) == 0: + pytest.skip("No WEL files in this model") + + # Parse and transform + result = load_package_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) + + # Load into component + from cattrs import structure + + try: + wel = structure(result, Wel) + + # Verify component was created + assert wel is not None + + # Verify dimensions + if hasattr(wel, "dimensions"): + assert wel.dimensions.maxbound > 0 + + # Verify stress period data + # The Dataset should be accessible via stress_period_data property + if hasattr(wel, "stress_period_data"): + spd = wel.stress_period_data + # Might be Dataset or DataFrame depending on property getter + assert spd is not None + + except Exception as e: + pytest.skip(f"Component structuring not yet implemented: {e}") + + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_complete_gwf_model(tmp_path, v2_dfns, model_name): + """Test loading all packages for a complete GWF model.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + + # Find all package files for this model + # Look for common GWF packages + package_files = {} + package_types = { + "dis": "gwf-dis", + "ic": "gwf-ic", + "npf": "gwf-npf", + "sto": "gwf-sto", + "oc": "gwf-oc", + "wel": "gwf-wel", + "chd": "gwf-chd", + "drn": "gwf-drn", + "rch": "gwf-rch", + } + + for ext, component_name in package_types.items(): + files = list(workspace.rglob(f"*.{ext}")) + if files: + package_files[ext] = (files[0], component_name) + + # Should have at least DIS and IC + assert "dis" in package_files, "Model should have DIS package" + assert "ic" in package_files, "Model should have IC package" + + print(f"\nFound {len(package_files)} packages:") + for ext in package_files: + print(f" - {ext.upper()}") + + # Parse all packages + parsed_packages = {} + for ext, (file_path, component_name) in package_files.items(): + try: + result = load_package_file(file_path, component_name, v2_dfns[component_name]) + parsed_packages[ext] = result + print(f" ✓ Parsed {ext.upper()}") + except Exception as e: + print(f" ✗ Failed to parse {ext.upper()}: {e}") + # Continue with other packages + + # Verify we successfully parsed key packages + assert "dis" in parsed_packages, "DIS package should parse successfully" + assert "ic" in parsed_packages, "IC package should parse successfully" + + # Verify DIS has required structure + dis = parsed_packages["dis"] + assert "dimensions" in dis + assert "griddata" in dis + + # Verify IC has required structure + ic = parsed_packages["ic"] + assert "griddata" in ic + assert "strt" in ic["griddata"] + + print(f"\n✓ Successfully parsed {len(parsed_packages)} packages for model") + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) +def test_load_model_with_stress_packages(tmp_path, v2_dfns, model_name): + """Test loading a model with stress period packages (WEL, CHD, etc.).""" + workspace = copy_to(tmp_path, model_name, verbose=False) + + # This model should have WEL + wel_files = list(workspace.rglob("*.wel")) + assert len(wel_files) > 0, "Model should have WEL package" + + # Parse WEL + wel_result = load_package_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) + + # Verify stress period data is Dataset + assert "stress_period_data" in wel_result + spd = wel_result["stress_period_data"] + assert isinstance(spd, xr.Dataset) + + # Verify we can access data + assert len(spd.data_vars) > 0 + assert "kper" in spd.coords + + print("\n✓ Model has stress packages with Dataset format") + print(f" - WEL has {len(spd.kper)} stress period records") + print(f" - Data variables: {list(spd.data_vars.keys())}") + + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_load_simulation_structure(tmp_path, model_name): + """Test loading and parsing all files in a simulation.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + + # Find simulation name file + mfsim_files = list(workspace.rglob("mfsim.nam")) + assert len(mfsim_files) > 0, "Should have mfsim.nam file" + + print(f"\nSimulation workspace: {workspace}") + + # Find all model name files + model_nam_files = list(workspace.rglob("*.nam")) + model_nam_files = [f for f in model_nam_files if f.name != "mfsim.nam"] + + print(f"Found {len(model_nam_files)} model(s)") + + # Count all package files + all_package_files = list(workspace.rglob("*.[a-z][a-z][a-z]")) + # Filter out name files + package_files = [f for f in all_package_files if not f.name.endswith(".nam")] + + print(f"Found {len(package_files)} package file(s)") + + # Group by extension + extensions = {} + for f in package_files: + ext = f.suffix[1:] # Remove leading dot + extensions[ext] = extensions.get(ext, 0) + 1 + + print("\nPackages by type:") + for ext, count in sorted(extensions.items()): + print(f" {ext.upper()}: {count}") + + # Verify we have essential packages + assert "dis" in extensions or "disv" in extensions, "Should have discretization" + # IC is optional (some models use steady-state without IC) + assert "npf" in extensions or "lpf" in extensions, "Should have flow package" + +@pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) +def test_parse_all_simulation_packages(tmp_path, v2_dfns, model_name): + """Test parsing all packages in a simulation.""" + workspace = copy_to(tmp_path, model_name, verbose=False) + + # Component type mapping + component_map = { + "dis": "gwf-dis", + "ic": "gwf-ic", + "npf": "gwf-npf", + "sto": "gwf-sto", + "oc": "gwf-oc", + "wel": "gwf-wel", + "chd": "gwf-chd", + "drn": "gwf-drn", + "rch": "gwf-rch", + } + + parsed_count = 0 + failed_count = 0 + + print("\nParsing simulation packages:") + + for ext, component_name in component_map.items(): + files = list(workspace.rglob(f"*.{ext}")) + if not files: + continue + + for file_path in files: + try: + result = load_package_file(file_path, component_name, v2_dfns[component_name]) + parsed_count += 1 + print(f" ✓ {file_path.name}") + + # Verify result structure + assert isinstance(result, dict), f"{file_path.name} should return dict" + + except Exception as e: + failed_count += 1 + print(f" ✗ {file_path.name}: {e}") + + print(f"\nResults: {parsed_count} parsed, {failed_count} failed") + assert parsed_count > 0, "Should successfully parse at least some packages" + + # Most packages should parse successfully + success_rate = ( + parsed_count / (parsed_count + failed_count) if (parsed_count + failed_count) > 0 else 0 + ) + assert success_rate >= 0.7, f"Should have at least 70% success rate, got {success_rate:.1%}" diff --git a/test/test_mf6_reader.py b/test/test_mf6_reader.py index 28cd556..71bd060 100644 --- a/test/test_mf6_reader.py +++ b/test/test_mf6_reader.py @@ -4,7 +4,6 @@ from pathlib import Path import numpy as np -import pandas as pd import pytest import xarray as xr from lark import Lark @@ -218,10 +217,13 @@ def test_transform_external_array(): OPEN/CLOSE "data/heads.dat" FACTOR 1.0 (BINARY) """) ) - # Result is now a Path for external arrays - assert isinstance(result, Path) - assert result == Path("data/heads.dat") - # TODO: Control metadata for external arrays needs special handling + # External arrays now return DataArray with path in attrs + assert isinstance(result, xr.DataArray) + assert result.attrs["control_type"] == "external" + assert result.attrs["external_path"] == "data/heads.dat" + assert result.attrs["control_factor"] == 1.0 + assert result.attrs["control_binary"] is True + assert np.isnan(result.values) # Placeholder data def test_transform_layered_array(): @@ -319,10 +321,13 @@ def test_transform_full_component(): assert y.attrs["control_type"] == "internal" assert np.array_equal(y.values, np.array([4.0, 5.0, 6.0])) - # External arrays still return Path directly + # External arrays now return DataArray with path in attrs z = result["arrays"]["z"] - assert isinstance(z, Path) - assert z == Path("data/z.dat") + assert isinstance(z, xr.DataArray) + assert z.attrs["control_type"] == "external" + assert z.attrs["external_path"] == "data/z.dat" + assert z.attrs["control_factor"] == 1.0 + assert z.attrs["control_binary"] is True # Real model tests using modflow-devtools models API @@ -438,14 +443,16 @@ def test_transform_gwf_ic_file(model_workspace, dfn_path): # Check strt array structure - now returns xr.DataArray with control in attrs strt = result["griddata"]["strt"] - if isinstance(strt, xr.DataArray): - # DataArray for constant or internal - assert "control_type" in strt.attrs - assert strt.attrs["control_type"] in ["constant", "internal"] + assert isinstance(strt, xr.DataArray) + assert "control_type" in strt.attrs + assert strt.attrs["control_type"] in ["constant", "internal", "external"] + + # For external arrays, check path in attrs + if strt.attrs["control_type"] == "external": + assert "external_path" in strt.attrs + assert np.isnan(strt.values) or True # Placeholder data + else: assert strt.values is not None - elif isinstance(strt, Path): - # Path for external arrays - assert strt.exists() or True # May not exist in test env @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-bcf2ss-p01a"], indirect=True) @@ -484,35 +491,36 @@ def test_transform_gwf_wel_file(model_workspace, dfn_path): assert "dimensions" in result assert result["dimensions"]["maxbound"] == 2 - # Stress period data is now aggregated into a single DataFrame + # Stress period data is now aggregated into a single Dataset assert "stress_period_data" in result spd = result["stress_period_data"] - assert isinstance(spd, pd.DataFrame) + assert isinstance(spd, xr.Dataset) - # Check DataFrame structure - assert "kper" in spd.columns - assert "layer" in spd.columns - assert "row" in spd.columns - assert "col" in spd.columns + # Check Dataset structure - coords and data_vars + assert "kper" in spd.coords + assert "layer" in spd.coords + assert "row" in spd.coords + assert "col" in spd.coords + assert len(spd.data_vars) > 0 # Should have at least one data field (q or field_0) # Filter for period 2 data (kper = 1 in 0-indexed) - period_2_data = spd[spd["kper"] == 1] - assert len(period_2_data) == 2 # Should have 2 wells in period 2 + period_2_data = spd.sel(record=spd.kper == 1) + assert len(period_2_data.kper) == 2 # Should have 2 wells in period 2 # Check specific values from the file # First well: 2 3 4 -3.5e4 - first_well = period_2_data.iloc[0] - assert first_well["layer"] == 2 - assert first_well["row"] == 3 - assert first_well["col"] == 4 - assert first_well.iloc[-1] == -3.5e4 # Last column is the field value (q) + assert period_2_data.layer.values[0] == 2 + assert period_2_data.row.values[0] == 3 + assert period_2_data.col.values[0] == 4 + # Check the data field (should be first data_var) + field_name = list(spd.data_vars.keys())[0] + assert period_2_data[field_name].values[0] == -3.5e4 # Second well: 2 8 4 -3.5e4 - second_well = period_2_data.iloc[1] - assert second_well["layer"] == 2 - assert second_well["row"] == 8 - assert second_well["col"] == 4 - assert second_well.iloc[-1] == -3.5e4 + assert period_2_data.layer.values[1] == 2 + assert period_2_data.row.values[1] == 8 + assert period_2_data.col.values[1] == 4 + assert period_2_data[field_name].values[1] == -3.5e4 @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-bcf2ss-p01a"], indirect=True) @@ -647,12 +655,8 @@ def test_transform_gwf_dis_file(model_workspace, dfn_path): # Arrays are now xr.DataArray with control in attrs delr = griddata["delr"] - if isinstance(delr, xr.DataArray): - assert "control_type" in delr.attrs - assert delr.values is not None - elif isinstance(delr, Path): - # External array - pass + assert isinstance(delr, xr.DataArray) + assert "control_type" in delr.attrs @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-csub-p01"], indirect=True) @@ -700,20 +704,12 @@ def test_transform_gwf_npf_file(model_workspace, dfn_path): # Arrays are now xr.DataArray with control in attrs icelltype = griddata["icelltype"] - if isinstance(icelltype, xr.DataArray): - assert "control_type" in icelltype.attrs - assert icelltype.values is not None - elif isinstance(icelltype, Path): - # External array - pass + assert isinstance(icelltype, xr.DataArray) + assert "control_type" in icelltype.attrs k = griddata["k"] - if isinstance(k, xr.DataArray): - assert "control_type" in k.attrs - assert k.values is not None - elif isinstance(k, Path): - # External array - pass + assert isinstance(k, xr.DataArray) + assert "control_type" in k.attrs @pytest.mark.parametrize("model_workspace", ["mf6/example/ex-gwf-csub-p01"], indirect=True) @@ -753,9 +749,5 @@ def test_transform_gwf_sto_file(model_workspace, dfn_path): # STO should have iconvert assert "iconvert" in griddata iconvert = griddata["iconvert"] - if isinstance(iconvert, xr.DataArray): - assert "control_type" in iconvert.attrs - assert iconvert.values is not None - elif isinstance(iconvert, Path): - # External array - pass + assert isinstance(iconvert, xr.DataArray) + assert "control_type" in iconvert.attrs diff --git a/test/test_transformer_output_format.py b/test/test_transformer_output_format.py deleted file mode 100644 index 72eb828..0000000 --- a/test/test_transformer_output_format.py +++ /dev/null @@ -1,361 +0,0 @@ -""" -Tests to inspect and document the current TypedTransformer output format. - -This test suite parses real MF6 input files and inspects the exact structure -of the TypedTransformer output to identify what needs to change for integration -with the structure_array converter. -""" - -from pathlib import Path -from pprint import pformat - -import pytest -import xarray as xr -from modflow_devtools.dfn import MapV1To2, load_flat -from modflow_devtools.models import copy_to - -from flopy4.mf6.codec.reader.parser import get_typed_parser -from flopy4.mf6.codec.reader.transformer import TypedTransformer - - -@pytest.fixture(scope="module") -def v2_dfns(dfn_path): - """Load and convert DFNs to V2 format.""" - v1_dfns = load_flat(dfn_path) - mapper = MapV1To2() - return {name: mapper.map(dfn) for name, dfn in v1_dfns.items()} - - -def parse_file(file_path: Path, component_name: str, dfn): - """Helper to parse a file and return transformed output.""" - parser = get_typed_parser(component_name) - transformer = TypedTransformer(dfn=dfn) - - with open(file_path, "r") as f: - content = f.read() - - tree = parser.parse(content) - result = transformer.transform(tree) - return result - - -class TestArrayOutput: - """Test TypedTransformer output format for array fields.""" - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) - def test_ic_constant_array(self, tmp_path, v2_dfns, model_name): - """Test output format for CONSTANT array (IC package).""" - workspace = copy_to(tmp_path, model_name, verbose=False) - ic_files = list(workspace.rglob("*.ic")) - assert len(ic_files) > 0, "No IC files found" - - result = parse_file(ic_files[0], "gwf-ic", v2_dfns["gwf-ic"]) - - print("\n" + "=" * 80) - print("IC FILE OUTPUT:") - print("=" * 80) - print(pformat(result, width=120)) - - # Check structure - assert "griddata" in result, "Should have griddata block" - assert "strt" in result["griddata"], "Should have strt field" - - strt = result["griddata"]["strt"] - print("\n" + "-" * 80) - print("STRT FIELD STRUCTURE:") - print("-" * 80) - print(f"Type: {type(strt)}") - print(f"Keys: {strt.keys() if isinstance(strt, dict) else 'N/A'}") - - if isinstance(strt, dict): - if "control" in strt: - print(f"Control: {strt['control']}") - if "data" in strt: - data = strt["data"] - print(f"Data type: {type(data)}") - print(f"Data shape: {data.shape if hasattr(data, 'shape') else 'N/A'}") - if isinstance(data, xr.DataArray): - print(f"Data dims: {data.dims}") - print(f"Data attrs: {data.attrs}") - - # Current behavior: should be {"control": {...}, "data": ...} - assert isinstance(strt, dict), "Array field should be a dict" - assert "control" in strt, "Should have 'control' key" - assert "data" in strt, "Should have 'data' key" - - print("\n" + "=" * 80) - print("FINDING: Array fields use {'control': ..., 'data': ...} wrapper") - print("NEEDED: Should be xr.DataArray with control in attrs") - print("=" * 80) - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) - def test_dis_internal_arrays(self, tmp_path, v2_dfns, model_name): - """Test output format for INTERNAL arrays (DIS package).""" - workspace = copy_to(tmp_path, model_name, verbose=False) - dis_files = list(workspace.rglob("*.dis")) - assert len(dis_files) > 0, "No DIS files found" - - result = parse_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) - - print("\n" + "=" * 80) - print("DIS FILE OUTPUT:") - print("=" * 80) - print(pformat(result, width=120, depth=2)) # Limit depth for readability - - assert "griddata" in result - - # Check delr array - if "delr" in result["griddata"]: - delr = result["griddata"]["delr"] - print("\n" + "-" * 80) - print("DELR FIELD (INTERNAL ARRAY):") - print("-" * 80) - print(f"Type: {type(delr)}") - if isinstance(delr, dict): - print(f"Keys: {delr.keys()}") - if "control" in delr: - print(f"Control: {delr['control']}") - if "data" in delr: - print(f"Data type: {type(delr['data'])}") - print( - f"Data shape: {delr['data'].shape if hasattr(delr['data'], 'shape') else 'N/A'}" - ) - - # Check botm array (layered) - if "botm" in result["griddata"]: - botm = result["griddata"]["botm"] - print("\n" + "-" * 80) - print("BOTM FIELD (LAYERED ARRAY):") - print("-" * 80) - print(f"Type: {type(botm)}") - if isinstance(botm, dict): - print(f"Keys: {botm.keys()}") - if "control" in botm: - print(f"Control type: {type(botm['control'])}") - print(f"Control: {botm['control']}") - if "data" in botm: - data = botm["data"] - print(f"Data type: {type(data)}") - if isinstance(data, xr.DataArray): - print(f"Data shape: {data.shape}") - print(f"Data dims: {data.dims}") - - -class TestStressPeriodDataOutput: - """Test TypedTransformer output format for stress period data.""" - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) - def test_wel_stress_period_data(self, tmp_path, v2_dfns, model_name): - """Test output format for WEL stress period data.""" - workspace = copy_to(tmp_path, model_name, verbose=False) - wel_files = list(workspace.rglob("*.wel")) - - if len(wel_files) == 0: - pytest.skip("No WEL files in this model") - - result = parse_file(wel_files[0], "gwf-wel", v2_dfns["gwf-wel"]) - - print("\n" + "=" * 80) - print("WEL FILE OUTPUT:") - print("=" * 80) - print(pformat(result, width=120)) - - # Find period blocks - period_keys = [k for k in result.keys() if k.startswith("period")] - print("\n" + "-" * 80) - print(f"PERIOD BLOCK KEYS: {period_keys}") - print("-" * 80) - - if period_keys: - first_period = result[period_keys[0]] - print(f"\nFirst period block: {period_keys[0]}") - print(f"Keys: {first_period.keys() if isinstance(first_period, dict) else 'N/A'}") - - if "stress_period_data" in first_period: - spd = first_period["stress_period_data"] - print(f"\nStress period data type: {type(spd)}") - print(f"Length: {len(spd) if hasattr(spd, '__len__') else 'N/A'}") - if spd and len(spd) > 0: - print(f"First record type: {type(spd[0])}") - print(f"First record: {spd[0]}") - print(f"Record length: {len(spd[0]) if hasattr(spd[0], '__len__') else 'N/A'}") - - print("\n" + "=" * 80) - print("FINDING: Period blocks use 'period N' string keys") - print("FINDING: stress_period_data is list of lists: [[k, i, j, q], ...]") - print("NEEDED: Should use integer keys {0: ..., 1: ...}") - print("NEEDED: Should be dict or list-of-tuples format for structure_array") - print("=" * 80) - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) - def test_chd_stress_period_data(self, tmp_path, v2_dfns, model_name): - """Test output format for CHD stress period data.""" - workspace = copy_to(tmp_path, model_name, verbose=False) - chd_files = list(workspace.rglob("*.chd")) - - if len(chd_files) == 0: - pytest.skip("No CHD files in this model") - - result = parse_file(chd_files[0], "gwf-chd", v2_dfns["gwf-chd"]) - - print("\n" + "=" * 80) - print("CHD FILE OUTPUT:") - print("=" * 80) - print(pformat(result, width=120)) - - period_keys = [k for k in result.keys() if k.startswith("period")] - if period_keys: - first_period = result[period_keys[0]] - if "stress_period_data" in first_period: - spd = first_period["stress_period_data"] - print("\nCHD stress_period_data format:") - print(f"Type: {type(spd)}") - if spd and len(spd) > 0: - print(f"Sample records: {spd[:3]}") - - -class TestRecordOutput: - """Test TypedTransformer output format for record fields.""" - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) - def test_oc_record_fields(self, tmp_path, v2_dfns, model_name): - """Test output format for OC package record fields.""" - workspace = copy_to(tmp_path, model_name, verbose=False) - oc_files = list(workspace.rglob("*.oc")) - - if len(oc_files) == 0: - pytest.skip("No OC files in this model") - - result = parse_file(oc_files[0], "gwf-oc", v2_dfns["gwf-oc"]) - - print("\n" + "=" * 80) - print("OC FILE OUTPUT:") - print("=" * 80) - print(pformat(result, width=120)) - - # Check options block for record fields - if "options" in result: - print("\n" + "-" * 80) - print("OPTIONS BLOCK:") - print("-" * 80) - options = result["options"] - for key, val in options.items(): - print(f"{key}: {type(val)} = {val}") - - # Check period blocks - period_keys = [k for k in result.keys() if k.startswith("period")] - if period_keys: - first_period = result[period_keys[0]] - print("\n" + "-" * 80) - print(f"FIRST PERIOD BLOCK ({period_keys[0]}):") - print("-" * 80) - for key, val in first_period.items(): - print(f"{key}: {type(val)}") - if isinstance(val, list) and val: - print(f" First item: {val[0]}") - - -class TestKeywordOutput: - """Test TypedTransformer output format for keyword fields.""" - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) - def test_npf_keywords(self, tmp_path, v2_dfns, model_name): - """Test output format for NPF package keyword fields.""" - workspace = copy_to(tmp_path, model_name, verbose=False) - npf_files = list(workspace.rglob("*.npf")) - - if len(npf_files) == 0: - pytest.skip("No NPF files in this model") - - result = parse_file(npf_files[0], "gwf-npf", v2_dfns["gwf-npf"]) - - print("\n" + "=" * 80) - print("NPF FILE OUTPUT:") - print("=" * 80) - print(pformat(result, width=120, depth=2)) - - if "options" in result: - options = result["options"] - print("\n" + "-" * 80) - print("KEYWORD FIELDS:") - print("-" * 80) - for key, val in options.items(): - if isinstance(val, bool): - print(f"{key}: {type(val)} = {val}") - - print("\n" + "=" * 80) - print("FINDING: Keywords are stored as (name, True) tuples, transformed to {name: True}") - print("NEEDED: This format is probably OK for structure_array") - print("=" * 80) - - -class TestDimensionsOutput: - """Test TypedTransformer output format for dimension fields.""" - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) - def test_dis_dimensions(self, tmp_path, v2_dfns, model_name): - """Test output format for DIS dimensions block.""" - workspace = copy_to(tmp_path, model_name, verbose=False) - dis_files = list(workspace.rglob("*.dis")) - assert len(dis_files) > 0 - - result = parse_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) - - print("\n" + "=" * 80) - print("DIS DIMENSIONS BLOCK:") - print("=" * 80) - - if "dimensions" in result: - dims = result["dimensions"] - print(pformat(dims, width=120)) - print("\n" + "-" * 80) - for key, val in dims.items(): - print(f"{key}: {type(val)} = {val}") - - print("\n" + "=" * 80) - print("FINDING: Dimension fields are simple integers") - print("NEEDED: This format is correct for structure_array") - print("=" * 80) - - -class TestExternalArrayOutput: - """Test TypedTransformer output format for external arrays.""" - - @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) - def test_external_array_reference(self, tmp_path, v2_dfns, model_name): - """Test output format when arrays reference external files.""" - workspace = copy_to(tmp_path, model_name, verbose=False) - - # Try to find a package with external arrays - # DIS often has external arrays for large grids - dis_files = list(workspace.rglob("*.dis")) - if len(dis_files) == 0: - pytest.skip("No DIS files found") - - result = parse_file(dis_files[0], "gwf-dis", v2_dfns["gwf-dis"]) - - print("\n" + "=" * 80) - print("CHECKING FOR EXTERNAL ARRAYS:") - print("=" * 80) - - # Look through griddata for external arrays - if "griddata" in result: - for field_name, field_val in result["griddata"].items(): - if isinstance(field_val, dict) and "control" in field_val: - control = field_val["control"] - if isinstance(control, dict) and control.get("type") == "external": - print(f"\nFound external array: {field_name}") - print(f"Control: {control}") - print(f"Data type: {type(field_val['data'])}") - print(f"Data value: {field_val['data']}") - - print("\n" + "-" * 80) - print("FINDING: External arrays have data as Path object") - print("NEEDED: Check if structure_array handles Path correctly") - print("-" * 80) - break - - -if __name__ == "__main__": - # Allow running this file directly for debugging - pytest.main([__file__, "-v", "-s"]) From 8b174483f8fe2a010fac159d37f7e37b055d8121 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 24 Nov 2025 05:18:41 -0500 Subject: [PATCH 4/8] closer --- test/test_mf6_load_integration.py | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/test_mf6_load_integration.py b/test/test_mf6_load_integration.py index 8ca755f..5d619cc 100644 --- a/test/test_mf6_load_integration.py +++ b/test/test_mf6_load_integration.py @@ -66,6 +66,7 @@ def test_load_ic_package(tmp_path, v2_dfns, model_name): assert "external_path" in strt.attrs assert np.isnan(strt.values) + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) def test_load_dis_package(tmp_path, v2_dfns, model_name): """Test loading DIS package with dimensions and arrays.""" @@ -100,6 +101,7 @@ def test_load_dis_package(tmp_path, v2_dfns, model_name): assert botm.sizes["layer"] == dims["nlay"] assert "controls" in botm.attrs # Layered arrays have controls list + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) def test_load_wel_package(tmp_path, v2_dfns, model_name): """Test loading WEL package with stress period data.""" @@ -147,6 +149,7 @@ def test_load_wel_package(tmp_path, v2_dfns, model_name): assert data_var.dims == ("record",), "Data var should have 'record' dimension" assert len(data_var) == len(spd.kper), "Data var length should match records" + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) def test_load_oc_package(tmp_path, v2_dfns, model_name): """Test loading OC package with period blocks (non-tabular).""" @@ -185,6 +188,7 @@ def test_load_oc_package(tmp_path, v2_dfns, model_name): if first_period["saverecord"]: assert isinstance(first_period["saverecord"][0], dict) + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) def test_load_npf_package(tmp_path, v2_dfns, model_name): """Test loading NPF package with arrays and keywords.""" @@ -218,6 +222,7 @@ def test_load_npf_package(tmp_path, v2_dfns, model_name): assert isinstance(arr, xr.DataArray) assert "control_type" in arr.attrs + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) def test_load_sto_package(tmp_path, v2_dfns, model_name): """Test loading STO package with period blocks (keyword-based).""" @@ -279,6 +284,7 @@ def test_dataset_to_dataframe_conversion(tmp_path, v2_dfns, model_name): else: assert "node" in df.columns + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) def test_dataset_period_filtering(self, tmp_path, v2_dfns, model_name): """Test filtering Dataset by period.""" @@ -334,6 +340,7 @@ def test_load_into_ic_component(tmp_path, v2_dfns, model_name): # If structuring isn't fully implemented yet, that's expected pytest.skip(f"Component structuring not yet implemented: {e}") + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) def test_load_into_dis_component(tmp_path, v2_dfns, model_name): """Test loading parsed DIS data into Dis component.""" @@ -364,6 +371,7 @@ def test_load_into_dis_component(tmp_path, v2_dfns, model_name): except Exception as e: pytest.skip(f"Component structuring not yet implemented: {e}") + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) def test_load_into_wel_component(tmp_path, v2_dfns, model_name): """Test loading parsed WEL data into Wel component.""" @@ -462,6 +470,7 @@ def test_load_complete_gwf_model(tmp_path, v2_dfns, model_name): print(f"\n✓ Successfully parsed {len(parsed_packages)} packages for model") + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-bcf2ss-p01a"]) def test_load_model_with_stress_packages(tmp_path, v2_dfns, model_name): """Test loading a model with stress period packages (WEL, CHD, etc.).""" @@ -527,6 +536,7 @@ def test_load_simulation_structure(tmp_path, model_name): # IC is optional (some models use steady-state without IC) assert "npf" in extensions or "lpf" in extensions, "Should have flow package" + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) def test_parse_all_simulation_packages(tmp_path, v2_dfns, model_name): """Test parsing all packages in a simulation.""" @@ -576,3 +586,45 @@ def test_parse_all_simulation_packages(tmp_path, v2_dfns, model_name): parsed_count / (parsed_count + failed_count) if (parsed_count + failed_count) > 0 else 0 ) assert success_rate >= 0.7, f"Should have at least 70% success rate, got {success_rate:.1%}" + + +class TestClassmethodLoad: + """Test the new classmethod .load() API with complete pipeline.""" + + @pytest.mark.parametrize("model_name", ["mf6/example/ex-gwf-csub-p01"]) + def test_load_simulation_classmethod(self, tmp_path, model_name): + """Test loading a complete simulation using Simulation.load() classmethod.""" + from flopy4.mf6.simulation import Simulation + + workspace = copy_to(tmp_path, model_name, verbose=False) + sim_path = workspace / "mfsim.nam" + + # Load simulation using classmethod + sim = Simulation.load(sim_path) + + # Verify simulation was loaded + assert sim is not None + assert isinstance(sim, Simulation) + + # Check for TDIS (required) + assert hasattr(sim, "tdis") + assert sim.tdis is not None + + # Verify workspace was set correctly + assert sim.workspace == workspace + + # Count loaded components + loaded_components = [] + if hasattr(sim, "children"): + for child_name, child in sim.children.items(): + if child is not None: + loaded_components.append(child_name) + + print(f"\n✓ Loaded complete simulation from {sim_path.name}") + print(f" - Workspace: {workspace}") + print(f" - Components loaded: {len(loaded_components)}") + for comp in sorted(loaded_components): + print(f" • {comp}") + + # Should have at least TDIS + assert any("tdis" in comp.lower() for comp in loaded_components), "Should have TDIS" From e474b7198595b8de488526da6f05881b745759a2 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 24 Nov 2025 05:24:52 -0500 Subject: [PATCH 5/8] remove md files --- test/INTEGRATION_FINDINGS.md | 223 ---------------------------- test/INTEGRATION_PLAN_REVISED.md | 127 ---------------- test/TRANSFORMER_OUTPUT_ANALYSIS.md | 195 ------------------------ 3 files changed, 545 deletions(-) delete mode 100644 test/INTEGRATION_FINDINGS.md delete mode 100644 test/INTEGRATION_PLAN_REVISED.md delete mode 100644 test/TRANSFORMER_OUTPUT_ANALYSIS.md diff --git a/test/INTEGRATION_FINDINGS.md b/test/INTEGRATION_FINDINGS.md deleted file mode 100644 index 127549f..0000000 --- a/test/INTEGRATION_FINDINGS.md +++ /dev/null @@ -1,223 +0,0 @@ -# TypedTransformer → Converter Integration Findings - -## Key Discovery: Period Block Format Matches! - -Looking at the unstructure code (`converter/unstructure.py:305-312`), the **write side** produces: -```python -{ - 'options': {...}, - 'dimensions': {...}, - 'period 1': {'head': , 'aux': }, - 'period 2': {'head': , 'aux': } -} -``` - -The TypedTransformer currently outputs: -```python -{ - 'options': {...}, - 'dimensions': {...}, - 'period 1': {'stress_period_data': [[2, 3, 4, 10.0], [2, 8, 4, 12.0]]}, - 'period 2': {'stress_period_data': [[...]]} -} -``` - -**Note:** Both use **string keys** `'period N'` with **1-based indexing**! - -## The Data Flow - -### Current State (Write Path) -``` -Component with separate arrays: - head: xr.DataArray(shape=(nper, nodes)) - aux: xr.DataArray(shape=(nper, nodes)) - ↓ - unstructure_component() - ↓ -Period blocks with separate fields: - 'period 1': {'head': array_kper0, 'aux': array_kper0} - ↓ - Writer (codec) - ↓ - MF6 input file -``` - -### Desired State (Read Path) -``` - MF6 input file - ↓ -Parser + TypedTransformer - ↓ -Period blocks with tabular data: - 'period 1': {'stress_period_data': [[k,i,j,head,aux], ...]} - ↓ - ??? Converter ??? - ↓ -Component initialization: - Chd(head={0: {cellid: val}, 1: {...}}, - aux={0: {cellid: val}, 1: {...}}) - ↓ -Component with separate arrays: - head: xr.DataArray(shape=(nper, nodes)) - aux: xr.DataArray(shape=(nper, nodes)) -``` - -## The Missing Piece - -The gap is converting from: -```python -# What transformer outputs -{'period 1': {'stress_period_data': [[2,3,4,10.0,5.0], ...]}} -``` - -To: -```python -# What Chd.__init__ expects -{ - 'head': {0: {(2,3,4): 10.0, ...}}, - 'aux': {0: {(2,3,4): 5.0, ...}} -} -``` - -**This conversion must happen BEFORE calling structure_array!** - -## Two Possible Approaches - -### Approach 1: Transformer Does the Conversion -Modify TypedTransformer to: -1. Know about the DFN field schema for period blocks -2. Split stress_period_data into separate field dicts -3. Output: `{'head': {0: list}, 'aux': {0: list}}` - -**Pros:** -- Transformer output directly matches what structure_array expects -- No additional conversion layer needed - -**Cons:** -- Transformer becomes more complex -- Tightly couples parser to data model - -### Approach 2: Add a Converter Layer -Keep TypedTransformer simple, add a `structure_from_parsed()` function: -1. TypedTransformer outputs tabular data as-is -2. Converter layer splits period blocks into fields -3. Calls existing `structure()` function - -**Pros:** -- Separation of concerns (parsing vs. structuring) -- Transformer stays focused on syntactic conversion -- Easier to test and maintain - -**Cons:** -- Extra layer of code -- Needs to handle various period block formats (CHD, WEL, RCH all different) - -## Recommended Approach: **Approach 2** (Converter Layer) - -Because: -1. **Separation of concerns**: Transformer handles syntax, converter handles semantics -2. **Flexibility**: Different packages have different stress period formats -3. **Testability**: Can test transformer and converter independently -4. **Maintainability**: Changes to data model don't require parser changes - -## Implementation Plan - -### Step 1: Fix Array Metadata (High Priority) -Change TypedTransformer to put control metadata in xarray attrs: -```python -# Current -{'control': {'type': 'constant', 'value': 1.0}, 'data': xr.DataArray(1.0)} - -# Target -xr.DataArray(1.0, attrs={'control_type': 'constant', 'control_value': 1.0}) -``` - -**Files:** `transformer.py:274-283, 130-158` - -### Step 2: Create Converter Layer -Add `flopy4/mf6/converter/reader.py`: -```python -def structure_from_parsed(parsed: dict, component_type: type, path: Path) -> Component: - """Convert TypedTransformer output to Component.""" - - # 1. Split period blocks into field arrays - converted = _split_period_blocks(parsed, component_type) - - # 2. Convert 'period N' string keys to integer keys - converted = _convert_period_keys(converted) - - # 3. Convert stress_period_data lists to dicts - converted = _convert_stress_data(converted, component_type) - - # 4. Use existing structure() function - return structure(converted, path) -``` - -### Step 3: Period Block Conversion -```python -def _split_period_blocks(data: dict, component_type: type) -> dict: - """Split 'stress_period_data' into separate field arrays.""" - - # Get DFN to know field names and order - dfn = component_type.dfn - period_fields = [f for f in dfn.fields if f.block == 'period'] - - # Find period blocks - period_blocks = {k: v for k, v in data.items() if k.startswith('period ')} - - # Initialize field dicts - field_arrays = {f.name: {} for f in period_fields} - - # Process each period - for period_key, period_data in period_blocks.items(): - kper = int(period_key.split()[1]) - 1 # Convert to 0-indexed - - if 'stress_period_data' in period_data: - spd = period_data['stress_period_data'] - - # Split each record into fields - for record in spd: - # Determine cellid length (3 for structured, 1 for unstructured) - # ... conversion logic ... - - return {**data, **field_arrays} -``` - -### Step 4: Integration Points -1. Modify `codec/reader/__init__.py` to provide `load_component()`: - ```python - def load_component(fp: IO[str], component_type: type, path: Path) -> Component: - """Load and structure a component from file.""" - parser = get_typed_parser(component_type.package_abbr) - transformer = TypedTransformer(dfn=component_type.dfn) - tree = parser.parse(fp.read()) - parsed = transformer.transform(tree) - return structure_from_parsed(parsed, component_type, path) - ``` - -2. Use in high-level API (Model.load(), etc.) - -## Testing Strategy - -1. **Unit tests** for each conversion function -2. **Integration tests** parsing real files → structured components -3. **Roundtrip tests** read → write → read → compare -4. **Regression tests** on all MF6 example models - -## Open Questions - -1. **Array control metadata**: What keys to use in attrs? `control_type`, `control_value`, `control_factor`? Or nest under `control` dict? -2. **External arrays**: How to preserve Path + metadata through the pipeline? -3. **Layered arrays**: How to handle list of controls in attrs? -4. **Package variations**: Some packages (WEL, CHD, etc.) have different stress data formats - need generic solution? - -## Next Steps - -1. ✅ Write tests to inspect TypedTransformer output (DONE) -2. Implement Step 1 (array metadata in attrs) -3. Test with structure_array to verify arrays work -4. Implement Step 2 (converter layer skeleton) -5. Implement Step 3 (period block splitting) - start with simple case (WEL) -6. Test integration with one package (WEL or CHD) -7. Generalize to other packages -8. Full integration testing with example models diff --git a/test/INTEGRATION_PLAN_REVISED.md b/test/INTEGRATION_PLAN_REVISED.md deleted file mode 100644 index 6b123e0..0000000 --- a/test/INTEGRATION_PLAN_REVISED.md +++ /dev/null @@ -1,127 +0,0 @@ -# Revised Integration Plan - -## Key Discovery: stress_period_data Setter Already Exists! - -The recent commits (#277, #280) implemented a `stress_period_data` property with getter and setter that **already does the conversion we need**! - -### What It Does - -**Setter:** `package.stress_period_data = DataFrame` -- Takes DataFrame with columns: `kper`, spatial coords (`layer`/`row`/`col` or `node`), and field values -- Internally calls `structure_array()` for each field -- Automatically splits multi-field data into separate arrays -- Handles all the complexcoordinates/reshaping/conversion logic - -**Getter:** `df = package.stress_period_data` -- Returns DataFrame with all period data combined -- Converts arrays → DataFrame format - -## Simplified Integration Path - -Instead of building a complex converter layer, we just need: - -### Step 1: Fix Array Metadata (Easy Win) -Modify TypedTransformer to put control metadata in xarray attrs instead of wrapper dict. - -**Current:** -```python -{'control': {'type': 'constant', 'value': 1.0}, 'data': xr.DataArray(1.0)} -``` - -**Target:** -```python -xr.DataArray(1.0, attrs={'control_type': 'constant', 'control_value': 1.0}) -``` - -### Step 2: Add Simple Period Data Converter -Create one simple function to convert TypedTransformer output → DataFrame: - -```python -def parsed_to_dataframe(parsed: dict, dfn: Dfn) -> pd.DataFrame: - """ - Convert TypedTransformer period blocks to DataFrame. - - Input: {'period 1': {'stress_period_data': [[2,3,4,-35000.0], ...]}} - Output: DataFrame with columns [kper, layer, row, col, q] - """ - # 1. Find period blocks - # 2. Get field names and order from DFN - # 3. Split cellid from values - # 4. Build DataFrame records - pass -``` - -### Step 3: Integration in Reader -```python -# In codec/reader/__init__.py or similar - -def load_component(path: Path, component_type: type) -> Component: - """Load component from MF6 input file.""" - - # 1. Parse and transform - parser = get_typed_parser(component_type.package_abbr) - transformer = TypedTransformer(dfn=component_type.dfn) - with open(path) as f: - tree = parser.parse(f.read()) - parsed = transformer.transform(tree) - - # 2. Extract non-period fields - non_period = {k: v for k, v in parsed.items() if not k.startswith('period ')} - - # 3. Create component with non-period fields - # (arrays already have correct format after Step 1) - component = component_type(**non_period) - - # 4. If has period data, convert and use setter - if any(k.startswith('period ') for k in parsed): - df = parsed_to_dataframe(parsed, component_type.dfn) - component.stress_period_data = df # ← Magic happens here! - - return component -``` - -## Benefits of This Approach - -1. **Reuses existing code**: The setter already handles all the complex logic -2. **Simple converter**: Just one function to convert lists → DataFrame -3. **Tested**: The setter is already tested and working -4. **Maintainable**: Minimal new code -5. **Flexible**: Works for all packages (WEL, CHD, DRN, etc.) - -## Implementation Steps - -### Phase 1: Array Metadata (This Session) -- [ ] Modify `TypedTransformer.try_create_dataarray()` to attach control to attrs -- [ ] Update `array()`, `single_array()`, `layered_array()` methods -- [ ] Test with real files -- [ ] Update existing transformer tests - -### Phase 2: Period Data Converter (Next Session) -- [ ] Implement `parsed_to_dataframe()` function -- [ ] Handle field name/order lookup from DFN -- [ ] Handle both structured and unstructured grids -- [ ] Test with WEL, CHD packages - -### Phase 3: Reader Integration (Following Session) -- [ ] Create `load_component()` function -- [ ] Handle parent model attachment for dims -- [ ] Test end-to-end parsing -- [ ] Roundtrip tests (read → write → read) - -### Phase 4: Full Integration (Final Session) -- [ ] Integrate with Model.load() -- [ ] Test on all MF6 example models -- [ ] Handle edge cases (empty periods, external arrays, etc.) -- [ ] Documentation and cleanup - -## Proof of Concept - -See `test/test_stress_period_data_converter.py` for working proof: -- ✅ `parsed_to_dataframe()` function works -- ✅ DataFrame format is correct -- ✅ Setter accepts the DataFrame (just needs proper parent setup) -- ✅ Getter returns matching format - -## Next Action - -Let's start with Phase 1: Fix array metadata in TypedTransformer! diff --git a/test/TRANSFORMER_OUTPUT_ANALYSIS.md b/test/TRANSFORMER_OUTPUT_ANALYSIS.md deleted file mode 100644 index 98cd20d..0000000 --- a/test/TRANSFORMER_OUTPUT_ANALYSIS.md +++ /dev/null @@ -1,195 +0,0 @@ -# TypedTransformer Output Format Analysis - -## Summary of Current Issues - -Based on parsing real MF6 files, the TypedTransformer outputs data in a format that doesn't directly match what `structure_array()` expects. Here are the key mismatches: - -## 1. Array Fields - Control Metadata Wrapper - -### Current Format -```python -{ - 'griddata': { - 'strt': { - 'control': {'type': 'constant', 'value': -10.7}, - 'data': - } - } -} -``` - -### What structure_array() Expects -- Raw xarray.DataArray or numpy.ndarray -- Dict with integer keys `{0: value, 1: value}` for period/layer data -- Scalars, lists, or DataFrames - -### Required Change -Output just the DataArray with control metadata in `.attrs`: -```python -{ - 'griddata': { - 'strt': - } -} -``` - -**Files to modify:** -- `transformer.py:274-283` - `try_create_dataarray()` method -- `transformer.py:130-158` - `array()`, `single_array()`, `layered_array()` methods - -## 2. Layered Arrays - -### Current Format -```python -{ - 'botm': { - 'control': [ - {'type': 'constant', 'value': -12.2}, - {'type': 'constant', 'value': -21.3}, - {'type': 'constant', 'value': -30.5} - ], - 'data': , - 'attrs': {...}, - 'dims': {'layer': 3} - } -} -``` - -### Required Change -Return DataArray with each layer's control metadata in attrs: -```python -{ - 'botm': -} -``` - -**Note:** For layered arrays, control is a list (one per layer). Store as `attrs['controls']` (plural). - -## 3. Period Blocks - String Keys - -### Current Format -```python -{ - 'period 1': {'saverecord': [...]}, - 'period 2': {'stress_period_data': [[2, 3, 4, -35000.0], ...]} -} -``` - -### Required Change -Use integer keys (0-indexed for Python, even though MF6 uses 1-indexed): -```python -{ - 'period': { - 0: {'saverecord': [...]}, - 1: {'stress_period_data': ...} - } -} -``` - -**OR** flatten to top-level with integer keys: -```python -{ - 0: {'saverecord': [...]}, - 1: {'stress_period_data': ...} -} -``` - -**Files to modify:** -- `transformer.py:105-125` - `start()` method -- `transformer.py:285-300` - `__default__()` method handling for `_block` rules - -**Question:** Should period data be nested under a 'period' key or flattened? Need to check how structure_array handles this. - -## 4. Stress Period Data - List of Lists - -### Current Format -```python -{ - 'stress_period_data': [[2, 3, 4, -35000.0], [2, 8, 4, -35000.0]] -} -``` -- Each inner list has: `[layer, row, col, q_value]` (for structured grids) -- Or: `[node, q_value]` (for unstructured grids) - -### What structure_array() Expects -Looking at `converter/structure.py:567-578`, it handles: -- List of tuples: `[((k, i, j), value), ((k, i, j), value), ...]` -- Nested dict: `{cellid: value, cellid: value}` -- List of lists with cellid: `[[cellid, value], [cellid, value]]` - -The current format `[[k, i, j, q], ...]` should actually work if we convert to: -```python -{ - 'stress_period_data': [[(2, 3, 4), -35000.0], [(2, 8, 4), -35000.0]] -} -``` - -**Alternative:** Convert to dict format: -```python -{ - 'stress_period_data': { - (2, 3, 4): -35000.0, - (2, 8, 4): -35000.0 - } -} -``` - -**Files to modify:** -- `transformer.py:236-266` - `stress_period_data()` and `record()` methods - -## 5. Record Fields - Looks OK - -### Current Format -```python -{ - 'budget_filerecord': {'budgetfile': 'ex-gwf-bcf2ss.cbc'}, - 'saverecord': [ - {'rtype': 'HEAD', 'ocsetting': 'all'}, - {'rtype': 'BUDGET', 'ocsetting': 'all'} - ] -} -``` - -This format looks compatible with structure_array(). Records are dicts, lists of records are lists of dicts. ✓ - -## Implementation Plan - -### Phase 1: Array Metadata (High Priority) -1. Modify `try_create_dataarray()` to attach control metadata to DataArray attrs -2. Modify `array()`, `single_array()`, `layered_array()` to return just the DataArray -3. Update tests in `test_mf6_reader.py` to check for attrs instead of control dict - -### Phase 2: Period Block Keys (High Priority) -1. Modify `start()` to convert "period N" string keys to integer keys -2. Decide on nesting structure (nested under 'period' key vs. flattened) -3. Update grammar/transformer to handle period indexing correctly - -### Phase 3: Stress Period Data Format (Medium Priority) -1. Modify `stress_period_data()` and `record()` to convert flat lists to tuples -2. Convert `[k, i, j, q]` to `[(k, i, j), q]` or dict format -3. Handle both structured and unstructured cellid formats - -### Phase 4: Integration Testing (High Priority) -1. Test that modified transformer output works with structure_array() -2. Create end-to-end tests: parse → transform → structure → roundtrip -3. Verify array metadata is preserved through write operations - -## Questions to Resolve - -1. **Period block structure:** Nested under 'period' key or flattened to top level? -2. **Stress period data format:** List of tuples or dict? Which is more efficient? -3. **Zero vs one indexing:** Should transformer output 0-indexed (Pythonic) or 1-indexed (MF6 native)? -4. **External arrays:** Do they work correctly as Path objects? (Need to test) - -## Next Steps - -1. Review this analysis with team -2. Make architectural decisions on open questions -3. Implement Phase 1 (array metadata) -4. Test with structure_array() -5. Iterate through remaining phases From 0b3d33b24a47a9985969878b74cdfc50994aa762 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 24 Nov 2025 08:46:45 -0500 Subject: [PATCH 6/8] pass cls thru to structure call --- flopy4/mf6/__init__.py | 6 +++--- flopy4/mf6/converter/__init__.py | 26 ++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/flopy4/mf6/__init__.py b/flopy4/mf6/__init__.py index 5e835eb..33ae1e2 100644 --- a/flopy4/mf6/__init__.py +++ b/flopy4/mf6/__init__.py @@ -28,19 +28,19 @@ class WriteError(Exception): def _load_mf6(cls, path: Path) -> Component: """Load MF6 format file into a component instance.""" with open(path, "r") as fp: - return structure(load_mf6(fp), path) + return structure(load_mf6(fp), path, component_type=cls) def _load_json(cls, path: Path) -> Component: """Load JSON format file into a component instance.""" with open(path, "r") as fp: - return structure(load_json(fp), path) + return structure(load_json(fp), path, component_type=cls) def _load_toml(cls, path: Path) -> Component: """Load TOML format file into a component instance.""" with open(path, "rb") as fp: - return structure(load_toml(fp), path) + return structure(load_toml(fp), path, component_type=cls) def _write_mf6(component: Component, context=None, **kwargs) -> None: diff --git a/flopy4/mf6/converter/__init__.py b/flopy4/mf6/converter/__init__.py index c6f2bf6..1780958 100644 --- a/flopy4/mf6/converter/__init__.py +++ b/flopy4/mf6/converter/__init__.py @@ -40,8 +40,30 @@ def _make_converter() -> Converter: COMPONENT_CONVERTER = _make_converter() -def structure(data: dict[str, Any], path: Path) -> Component: - component = COMPONENT_CONVERTER.structure(data, Component) +def structure( + data: dict[str, Any], path: Path, component_type: type[Component] | None = None +) -> Component: + """ + Structure parsed data into a Component instance. + + Parameters + ---------- + data : dict + Parsed component data + path : Path + Path to the component file (for setting workspace/filename) + component_type : type[Component], optional + Concrete component class to instantiate. If not provided, uses Component base class. + + Returns + ------- + Component + Structured component instance + """ + # Use provided type or fall back to Component base class + target_type = component_type if component_type is not None else Component + + component = COMPONENT_CONVERTER.structure(data, target_type) if isinstance(component, Context): component.workspace = path.parent component.filename = path.name From e029b6c0ed70f338186971b8fc63ae3d2a55af63 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 24 Nov 2025 08:47:41 -0500 Subject: [PATCH 7/8] binding design doc --- binding-resolution-design.md | 356 +++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 binding-resolution-design.md diff --git a/binding-resolution-design.md b/binding-resolution-design.md new file mode 100644 index 0000000..e2cc89d --- /dev/null +++ b/binding-resolution-design.md @@ -0,0 +1,356 @@ +# Binding Resolution Design for Component Loading + +## Problem Statement + +MF6 simulation and model namefiles use **bindings** to reference child components in tabular list blocks: + +``` +BEGIN MODELS + gwf6 model.nam modelname +END MODELS + +BEGIN SOLUTIONGROUP 1 + ims6 model.ims modelname +END SOLUTIONGROUP +``` + +**On write**: The `Binding` class converts component instances to binding tuples: +```python +Binding.from_component(model) → ('gwf6', 'model.nam', 'modelname') +``` + +**On load**: We need the inverse operation - convert binding tuples back to component instances: +```python +# Transformer outputs: +{'models': [['gwf6', 'model.nam', 'modelname']]} + +# Component expects: +{'models': {'modelname': Model(...)}} +``` + +The challenge: **Binding resolution requires recursive component loading**. + +--- + +## Approach 1: Inline Resolution in Structure Hook + +### Description +Register custom cattrs structure hooks for Simulation/Model that resolve bindings during structuring: + +```python +def _structure_simulation_hook(data: dict, cls: type) -> Simulation: + """Custom structure hook for Simulation that resolves bindings.""" + + # Resolve model bindings + models_bindings = data.get('models', []) + models = {} + workspace = ... # Need to pass workspace somehow + + for binding_tuple in models_bindings: + model_type, fname, name = binding_tuple + model_cls = _resolve_component_class(model_type) + model = model_cls.load(workspace / fname) + models[name] = model + + # Similar for solutions, exchanges... + + return Simulation(models=models, solutions=solutions, ...) + +# Register the hook +COMPONENT_CONVERTER.register_structure_hook(Simulation, _structure_simulation_hook) +``` + +### Pros +- ✅ Clean separation - structure hooks handle all structuring logic +- ✅ Fits cattrs design pattern +- ✅ Type-specific logic encapsulated in hooks + +### Cons +- ❌ Structure hooks don't have access to `workspace` (only get `data` and `cls`) +- ❌ Requires passing workspace through the data dict or via closure +- ❌ Mixing structural transformation with I/O (recursive loads) +- ❌ Harder to test in isolation +- ❌ Need separate hooks for each component type with bindings + +--- + +## Approach 2: Post-Processing After Structure + +### Description +Keep structuring simple, then resolve bindings as a post-processing step: + +```python +def resolve_bindings(component: Component, workspace: Path) -> Component: + """ + Recursively resolve binding tuples to component instances. + + Processes a component after structuring to: + 1. Detect binding lists (list of tuples with type/fname/terms) + 2. Recursively load referenced components + 3. Replace binding lists with dicts of component instances + """ + + if isinstance(component, Simulation): + # Resolve models + if isinstance(component.models, list): + resolved_models = {} + for binding_tuple in component.models: + model = Binding.to_component(binding_tuple, workspace) + resolved_models[model.name] = model + component.models = resolved_models + + # Resolve solutions + if isinstance(component.solutions, dict): + for group_name, bindings in list(component.solutions.items()): + if isinstance(bindings, list): + resolved = {} + for binding_tuple in bindings: + solution = Binding.to_component(binding_tuple, workspace) + resolved[solution.name] = solution + component.solutions.update(resolved) + + # Similar for exchanges... + + elif isinstance(component, Model): + # Resolve package bindings if model namefiles have them + pass + + return component + +# Usage in loader: +def _load_mf6(cls, path: Path) -> Component: + with open(path, "r") as fp: + component = structure(load_mf6(fp), path, component_type=cls) + component = resolve_bindings(component, path.parent) + return component +``` + +### Pros +- ✅ Clear separation of concerns: structure = data transformation, binding = I/O +- ✅ Has access to workspace naturally +- ✅ Easy to test independently +- ✅ Reusable across different component types +- ✅ Doesn't complicate cattrs setup +- ✅ Can be optional/conditional (e.g., skip for testing) + +### Cons +- ❌ Requires components to temporarily hold invalid state (lists instead of dicts) +- ❌ Two-pass processing (structure, then resolve) +- ❌ Need to handle partially-resolved states gracefully + +--- + +## Approach 3: Binding Helper Method on Binding Class + +### Description +Add a `to_component()` classmethod to `Binding` that handles the conversion: + +```python +# flopy4/mf6/binding.py + +@define +class Binding: + type: str + fname: str + terms: tuple[str, ...] | None = None + + @classmethod + def from_component(cls, component: Component) -> "Binding": + """Create binding from component (for write).""" + # ... existing code ... + + @classmethod + def to_component(cls, binding_tuple: tuple, workspace: Path) -> Component: + """ + Resolve binding tuple to component instance (for load). + + Parameters + ---------- + binding_tuple : tuple + Binding in form (type, fname) or (type, fname, *terms) + workspace : Path + Workspace directory for resolving file paths + + Returns + ------- + Component + Loaded component instance + """ + binding_type, fname = binding_tuple[0:2] + terms = binding_tuple[2:] if len(binding_tuple) > 2 else () + + # Resolve component class from type + component_cls = _resolve_component_class(binding_type) + + # Recursively load the component + component_path = workspace / fname + component = component_cls.load(component_path) + + # Apply terms (e.g., set name from binding) + if terms: + component.name = terms[0] + + return component + + +def _resolve_component_class(binding_type: str) -> type[Component]: + """Map binding type string to component class.""" + type_map = { + 'gwf6': 'flopy4.mf6.gwf.Model', + 'gwt6': 'flopy4.mf6.gwt.Model', + 'gwe6': 'flopy4.mf6.gwe.Model', + 'ims6': 'flopy4.mf6.ims.Ims', + 'gwf-gwf': 'flopy4.mf6.exchange.GwfGwf', + # ... etc + } + + if binding_type.lower() not in type_map: + raise ValueError(f"Unknown binding type: {binding_type}") + + # Dynamic import + module_path, class_name = type_map[binding_type.lower()].rsplit('.', 1) + module = __import__(module_path, fromlist=[class_name]) + return getattr(module, class_name) +``` + +### Pros +- ✅ Symmetric API: `from_component()` (write) ↔ `to_component()` (load) +- ✅ Encapsulates type resolution logic in one place +- ✅ Binding class owns both directions of the transformation +- ✅ Composable with Approach 2 for the actual resolution loop + +### Cons +- ❌ Couples Binding class to component loading (circular dependency risk) +- ❌ Still needs a driver (Approach 1 or 2) to call it +- ❌ Type map maintenance burden + +--- + +## Recommended Approach: Hybrid (2 + 3) + +**Use Approach 2 (post-processing) as the driver, with Approach 3 (Binding.to_component) for the conversion logic.** + +### Implementation Strategy + +1. **Add `Binding.to_component()` method** to encapsulate type resolution and loading: + ```python + # Symmetric to Binding.from_component() + Binding.to_component(binding_tuple, workspace) → Component + ``` + +2. **Add `resolve_bindings()` post-processor** to handle the iteration: + ```python + def resolve_bindings(component: Component, workspace: Path) -> Component: + """Post-process component to resolve binding tuples.""" + if isinstance(component, Simulation): + # Use Binding.to_component() for each binding + component.models = _resolve_binding_dict(component.models, workspace) + component.solutions = _resolve_binding_dict(component.solutions, workspace) + component.exchanges = _resolve_binding_dict(component.exchanges, workspace) + return component + ``` + +3. **Call from loaders** after structuring: + ```python + def _load_mf6(cls, path: Path) -> Component: + component = structure(load_mf6(fp), path, component_type=cls) + component = resolve_bindings(component, path.parent) + return component + ``` + +### Why This is Best + +1. **Separation of Concerns** + - `Binding.to_component()`: Type resolution + loading (cohesive, reusable) + - `resolve_bindings()`: Iteration + dict building (structural transformation) + - Loaders: Orchestration (parse → structure → resolve → return) + +2. **Symmetric Design** + - Write: `Component → Binding.from_component() → tuple` + - Load: `tuple → Binding.to_component() → Component` + +3. **Testability** + - Test `Binding.to_component()` with simple bindings + - Test `resolve_bindings()` with mock components + - Test loaders end-to-end with real files + +4. **Flexibility** + - Can skip binding resolution for unit tests + - Can resolve bindings for programmatically created components + - Can extend to support lazy loading later + +5. **Compatibility with Classmethod API** + - `Binding.to_component()` naturally calls `Component.load()` classmethod + - Recursive loading works seamlessly + - Workspace propagates correctly + +--- + +## Implementation Checklist + +- [ ] Add `Binding.to_component(binding_tuple, workspace)` classmethod +- [ ] Add `_resolve_component_class(binding_type)` helper function +- [ ] Add `resolve_bindings(component, workspace)` post-processor +- [ ] Integrate `resolve_bindings()` into `_load_mf6()`, `_load_json()`, `_load_toml()` +- [ ] Update transformer to preserve binding tuples in output +- [ ] Write tests for `Binding.to_component()` +- [ ] Write tests for `resolve_bindings()` +- [ ] Write integration tests for Simulation.load() with bindings + +--- + +## Future Enhancements + +### Lazy Loading +Instead of loading all child components immediately, could return proxy objects: + +```python +class BindingProxy: + def __init__(self, binding_tuple, workspace): + self.binding_tuple = binding_tuple + self.workspace = workspace + self._component = None + + @property + def component(self): + if self._component is None: + self._component = Binding.to_component(self.binding_tuple, self.workspace) + return self._component +``` + +### Caching +Avoid loading the same component multiple times: + +```python +def resolve_bindings(component, workspace, cache=None): + cache = cache or {} + # Check cache before calling Binding.to_component() + # Store loaded components in cache by path +``` + +### Validation +Verify binding references exist before attempting to load: + +```python +def validate_bindings(component, workspace): + """Check that all binding files exist before loading.""" + # Collect all binding tuples + # Verify files exist + # Raise helpful error if missing +``` + +--- + +## Related Issues + +- Simulation structuring bug (name parameter receives entire dict) + - **Fixed by**: Passing `component_type=cls` to `structure()` ✅ + - Implemented in this session + +- Transformer output format for simulations + - **Needs**: Simulation parser/transformer implementation + - **Blocks**: Full simulation loading until implemented + +- Component dimension resolution for standalone package loading + - **Future**: May need `dims` parameter for `Package.load(path, dims={...})` + - Not needed for Simulation/Model loading (get dims from children) From 7ec1ff0d178f58cee1e4bb6e770f8047eba1e22f Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 24 Nov 2025 09:28:02 -0500 Subject: [PATCH 8/8] binding impl, still need to use typed transformer --- binding-resolution-implementation-summary.md | 111 +++++++++++++ flopy4/mf6/__init__.py | 16 +- flopy4/mf6/binding.py | 84 ++++++++++ flopy4/mf6/component.py | 5 +- flopy4/mf6/context.py | 12 +- flopy4/mf6/converter/__init__.py | 165 ++++++++++++++++++- typed-transformer-migration.md | 116 +++++++++++++ 7 files changed, 487 insertions(+), 22 deletions(-) create mode 100644 binding-resolution-implementation-summary.md create mode 100644 typed-transformer-migration.md diff --git a/binding-resolution-implementation-summary.md b/binding-resolution-implementation-summary.md new file mode 100644 index 0000000..9801b2c --- /dev/null +++ b/binding-resolution-implementation-summary.md @@ -0,0 +1,111 @@ +# Binding Resolution Implementation - Session Summary + +## What We Accomplished + +Successfully implemented **pre-construction binding resolution** for loading MF6 simulations with hierarchical component structures. + +### Components Implemented + +1. **`Binding.to_component()` classmethod** (`flopy4/mf6/binding.py`) + - Symmetric inverse of `from_component()` + - Resolves binding tuples `('gwf6', 'file.nam', 'name')` → loaded Component instance + - Recursively loads child components via `Component.load()` + +2. **`_resolve_component_class()` helper** (`flopy4/mf6/binding.py`) + - Maps binding type strings to component classes + - Supports models (gwf6, gwt6, gwe6), solutions (ims6), exchanges, tdis6 + - Dynamic import from type map + +3. **`_resolve_bindings_in_dict()` transformer** (`flopy4/mf6/converter/__init__.py`) + - Pre-construction data transformation + - Detects binding tuple lists (first element ends with '6') + - Transforms `[['gwf6', 'file.nam', 'name']]` → `{'name': Model(...)}` + - Integrated into `structure()` function before component construction + +4. **Block name mapping** (`flopy4/mf6/converter/__init__.py`) + - Maps transformer output block names to xattree field names + - `'timing'` → `'tdis'`, `'solutiongroup 1'` → `'solutions'` + - Uses xattree metadata as source of truth + - Handles numbered blocks automatically + +5. **Removed recursive loading loops** + - Updated `Context.load()` (`flopy4/mf6/context.py`) + - Updated `Component.load()` (`flopy4/mf6/component.py`) + - Child loading now handled by binding resolution during structuring + +## How It Works + +### Call Chain + +``` +Simulation.load(path) + → _load_mf6(Simulation, path) + → structure(data, path, Simulation) + → _map_block_names_to_attributes(data, Simulation) + → _resolve_bindings_in_dict(data, workspace, Simulation) + → Binding.to_component(tdis_binding, workspace) + → Tdis.load(workspace / 'ex-gwf-csub-p01.tdis') ← recursive! + → Binding.to_component(model_binding, workspace) + → Model.load(workspace / 'ex-gwf-csub-p01.nam') ← recursive! + → Binding.to_component(ims_binding, workspace) + → Ims.load(workspace / 'ex-gwf-csub-p01.ims') ← recursive! + → Simulation(**resolved_data) ← children already loaded! +``` + +### Key Design Decisions + +1. **Pre-construction resolution** (not post-processing) + - Transforms data before calling constructor + - Avoids temporary invalid state + - Field converters receive proper types + +2. **Recursive loading via `Binding.to_component()`** + - Child components load their own children + - Workspace propagates correctly + - Natural call stack for debugging + +3. **Bypass cattrs for xattree types** + - Direct construction: `Simulation(**data)` + - cattrs doesn't handle DataTree properly + - Simpler and more predictable + +4. **Binding detection heuristic** + - Check if first element of list ends with '6' + - Avoids false positives (e.g., TDIS OPTIONS data) + - Robust for all MF6 component types + +## Current Status + +✅ **Binding resolution is complete and working!** + +The test successfully: +- Loads simulation namefile +- Maps block names (`timing`, `solutiongroup 1`) to fields +- Resolves binding tuples to component instances +- Recursively loads TDIS, Model, and IMS components + +❌ **Current blocker: Transformer issue** + +Child components (like TDIS) fail to load because: +- `BasicTransformer` outputs raw block data +- Need `TypedTransformer` for typed field extraction +- See `typed-transformer-migration.md` for solution + +## Files Modified + +1. `flopy4/mf6/binding.py` - Added `to_component()` and `_resolve_component_class()` +2. `flopy4/mf6/converter/__init__.py` - Added binding resolution and block mapping +3. `flopy4/mf6/context.py` - Removed recursive loading loop +4. `flopy4/mf6/component.py` - Removed recursive loading loop + +## Next Steps + +1. Migrate to `TypedTransformer` (see `typed-transformer-migration.md`) +2. Test end-to-end simulation loading +3. Handle edge cases (exchanges, multi-model simulations) +4. Add caching to avoid reloading same components + +## Related Design Documents + +- `binding-resolution-design.md` - Original design analysis (Hybrid Approach 2+3) +- `typed-transformer-migration.md` - Solution for current blocker diff --git a/flopy4/mf6/__init__.py b/flopy4/mf6/__init__.py index 33ae1e2..661cc00 100644 --- a/flopy4/mf6/__init__.py +++ b/flopy4/mf6/__init__.py @@ -14,6 +14,7 @@ from flopy4.mf6.ims import Ims from flopy4.mf6.simulation import Simulation from flopy4.mf6.tdis import Tdis +from flopy4.mf6.write_context import WriteContext from flopy4.uio import DEFAULT_REGISTRY __all__ = ["gwf", "simulation", "solution", "utils", "Ims", "Tdis", "Simulation"] @@ -28,31 +29,26 @@ class WriteError(Exception): def _load_mf6(cls, path: Path) -> Component: """Load MF6 format file into a component instance.""" with open(path, "r") as fp: - return structure(load_mf6(fp), path, component_type=cls) + return structure(load_mf6(fp), path, cls) def _load_json(cls, path: Path) -> Component: """Load JSON format file into a component instance.""" with open(path, "r") as fp: - return structure(load_json(fp), path, component_type=cls) + return structure(load_json(fp), path, cls) def _load_toml(cls, path: Path) -> Component: """Load TOML format file into a component instance.""" with open(path, "rb") as fp: - return structure(load_toml(fp), path, component_type=cls) + return structure(load_toml(fp), path, cls) -def _write_mf6(component: Component, context=None, **kwargs) -> None: - from flopy4.mf6.write_context import WriteContext - - # Use provided context or default - ctx = context if context is not None else WriteContext.default() - +def _write_mf6(component: Component, context=None) -> None: with open(component.path, "w") as fp: data = unstructure(component) try: - dump_mf6(data, fp, context=ctx) + dump_mf6(data, fp, context=context if context is not None else WriteContext.default()) except Exception as e: raise WriteError( f"Failed to write MF6 format file for component '{component.name}' " # type: ignore diff --git a/flopy4/mf6/binding.py b/flopy4/mf6/binding.py index e84009d..61d73d4 100644 --- a/flopy4/mf6/binding.py +++ b/flopy4/mf6/binding.py @@ -1,3 +1,5 @@ +from pathlib import Path + from attrs import define from flopy4.mf6.component import Component @@ -7,6 +9,52 @@ from flopy4.mf6.solution import Solution +def _resolve_component_class(binding_type: str) -> type[Component]: + """ + Map binding type string to component class. + + Parameters + ---------- + binding_type : str + Binding type string (e.g., 'gwf6', 'ims6', 'gwf-gwf6') + + Returns + ------- + type[Component] + Component class + """ + # Normalize to lowercase + binding_type = binding_type.lower() + + # Map binding types to module paths and class names + type_map = { + # Models + "gwf6": ("flopy4.mf6.gwf", "Model"), + "gwt6": ("flopy4.mf6.gwt", "Model"), + "gwe6": ("flopy4.mf6.gwe", "Model"), + "prt6": ("flopy4.mf6.prt", "Model"), + # Solutions + "ims6": ("flopy4.mf6.ims", "Ims"), + "sln6": ("flopy4.mf6.solution", "Solution"), + # Exchanges + "gwf-gwf6": ("flopy4.mf6.exchange", "GwfGwf"), + "gwf-gwt6": ("flopy4.mf6.exchange", "GwfGwt"), + "gwf-gwe6": ("flopy4.mf6.exchange", "GwfGwe"), + "gwt-gwt6": ("flopy4.mf6.exchange", "GwtGwt"), + "gwe-gwe6": ("flopy4.mf6.exchange", "GweGwe"), + "gwf-prt6": ("flopy4.mf6.exchange", "GwfPrt"), + # Time discretization + "tdis6": ("flopy4.mf6.tdis", "Tdis"), + } + + if binding_type not in type_map: + raise ValueError(f"Unknown binding type: {binding_type}") + + module_path, class_name = type_map[binding_type] + module = __import__(module_path, fromlist=[class_name]) + return getattr(module, class_name) + + @define class Binding: """ @@ -51,3 +99,39 @@ def _get_binding_terms(component: Component) -> tuple[str, ...] | None: fname=component.filename or component.default_filename(), terms=_get_binding_terms(component), ) + + @classmethod + def to_component(cls, binding_tuple: tuple | list, workspace: Path) -> Component: + """ + Resolve binding tuple to component instance (inverse of from_component). + + Parameters + ---------- + binding_tuple : tuple | list + Binding in form (type, fname) or (type, fname, *terms) + workspace : Path + Workspace directory for resolving file paths + + Returns + ------- + Component + Loaded component instance + """ + # Extract binding parts + binding_type = binding_tuple[0] + fname = binding_tuple[1] + terms = binding_tuple[2:] if len(binding_tuple) > 2 else () + + # Resolve component class from type string + component_cls = _resolve_component_class(binding_type) + + # Recursively load the component + component_path = workspace / fname + component = component_cls.load(component_path) + + # Apply terms (e.g., set name from binding if provided) + # For models/packages, first term is the name + if terms and hasattr(component, "name"): + component.name = terms[0] # type: ignore[attr-defined] + + return component # type: ignore[return-value] diff --git a/flopy4/mf6/component.py b/flopy4/mf6/component.py index 8cf0598..577e30b 100644 --- a/flopy4/mf6/component.py +++ b/flopy4/mf6/component.py @@ -136,9 +136,7 @@ def get_dfn(cls) -> Dfn: @classmethod def load(cls, path: str | PathLike, format: str = MF6) -> None: """Load the component and any children.""" - self = cls._load(path, format=format) # Get the instance - for child in self.children.values(): # type: ignore - child.__class__.load(child.path, format=format) + return cls._load(path, format=format) def write(self, format: str = MF6, context: Optional[WriteContext] = None) -> None: """ @@ -153,6 +151,7 @@ def write(self, format: str = MF6, context: Optional[WriteContext] = None) -> No uses the current context from the context manager stack, or default settings. """ + # TODO: setting filename is a temp hack to get the parent's # name as this component's filename stem, if it has one. an # actual solution is to auto-set the filename when children diff --git a/flopy4/mf6/context.py b/flopy4/mf6/context.py index 8df3fa5..f0f1754 100644 --- a/flopy4/mf6/context.py +++ b/flopy4/mf6/context.py @@ -53,16 +53,12 @@ def load(cls, path, format=MF6): """ Load the context component and children. - Children are loaded relative to the parent's workspace directory, - so their paths are resolved within that workspace. + Children are loaded recursively during structuring via binding resolution, + so no additional loading is needed here. """ - # Load the instance first + # Load the instance (binding resolution in structure() handles children) instance = cls._load(path, format=format) - - # Load children within the workspace context - with cd(instance.workspace): - for child in instance.children.values(): # type: ignore - child.__class__.load(child.path, format=format) + return instance def write(self, format=MF6, context=None): with cd(self.workspace): diff --git a/flopy4/mf6/converter/__init__.py b/flopy4/mf6/converter/__init__.py index 1780958..da4c19c 100644 --- a/flopy4/mf6/converter/__init__.py +++ b/flopy4/mf6/converter/__init__.py @@ -5,7 +5,9 @@ import xattree from cattr import Converter from cattrs.gen import make_hetero_tuple_unstructure_fn +from xattree import get_xatspec +from flopy4.mf6.binding import Binding from flopy4.mf6.component import Component from flopy4.mf6.context import Context from flopy4.mf6.converter.structure import structure_array, structure_keyword @@ -40,6 +42,140 @@ def _make_converter() -> Converter: COMPONENT_CONVERTER = _make_converter() +def _map_block_names_to_attributes(data: dict, cls: type) -> dict: + """ + Map block names in parsed data to attribute names using xattree metadata. + + Handles: + - Exact block name matches (e.g., 'timing' -> 'tdis') + - Numbered blocks (e.g., 'solutiongroup 1', 'solutiongroup 2' -> 'solutions') + - Direct attribute names (passed through unchanged) + + Parameters + ---------- + data : dict + Parsed data with block names as keys + cls : type + Component class with xattree metadata + + Returns + ------- + dict + Data with attribute names as keys + """ + # Get xattree spec to find block name -> attribute name mappings + spec = get_xatspec(cls) + + # Build mapping from xattree field metadata + block_to_attr = {} + for field_name, field_spec in spec.flat.items(): + if ( + hasattr(field_spec, "metadata") + and field_spec.metadata is not None + and "block" in field_spec.metadata + ): + block_name = field_spec.metadata["block"] + block_to_attr[block_name] = field_name + + mapped = {} + + for key, value in data.items(): + # Try exact block name match + if key in block_to_attr: + attr_name = block_to_attr[key] + mapped[attr_name] = value + else: + # Try numbered block (e.g., "solutiongroup 1" -> "solutiongroup") + parts = key.split(" ", 1) + if len(parts) == 2 and parts[1].replace(".", "", 1).isdigit(): + base_name = parts[0] + if base_name in block_to_attr: + attr_name = block_to_attr[base_name] + # Aggregate numbered blocks into a list + if attr_name not in mapped: + mapped[attr_name] = [] + if isinstance(value, list): + mapped[attr_name].extend(value) + else: + mapped[attr_name].append(value) + else: + # Base name not found - use key as-is + mapped[key] = value + else: + # Not a numbered block - use key as-is + mapped[key] = value + + return mapped + + +def _resolve_bindings_in_dict(data: dict, workspace: Path, target_type: type) -> dict: + """ + Resolve binding tuples in data dict to component instances. + + Detects lists of binding tuples (e.g., [['gwf6', 'file.nam', 'name']]) + and transforms them to component instances or dicts of instances. + + Parameters + ---------- + data : dict + Data dict with potential binding tuples + workspace : Path + Workspace directory for loading child components + target_type : type + Target component type (e.g., Simulation) + + Returns + ------- + dict + Data with bindings resolved to component instances + """ + resolved = dict(data) # Copy + + # Get xattree spec to understand expected field types + spec = get_xatspec(target_type) + + for field_name, field_spec in spec.flat.items(): + if field_name not in resolved: + continue + + value = resolved[field_name] + + # Skip if not a list (no binding tuples to resolve) + if not isinstance(value, list): + continue + + # Skip empty lists + if not value: + continue + + # Check if this looks like a list of binding tuples + # Binding tuples have: + # - First element is a list/tuple with 2+ elements + # - First element of that is a string ending with '6' (like 'gwf6', 'tdis6') + first_item = value[0] + if ( + isinstance(first_item, (list, tuple)) + and len(first_item) >= 2 + and isinstance(first_item[0], str) + and first_item[0].lower().endswith("6") + ): + # This is a list of binding tuples + # Resolve each binding to a component instance + resolved_components = {} + for binding_tuple in value: + component = Binding.to_component(binding_tuple, workspace) + # Use component name as key (most bindings include name in terms) + if hasattr(component, "name") and component.name: + resolved_components[component.name] = component + else: + # Fallback: use filename as key + resolved_components[component.filename] = component + + resolved[field_name] = resolved_components + + return resolved + + def structure( data: dict[str, Any], path: Path, component_type: type[Component] | None = None ) -> Component: @@ -63,7 +199,34 @@ def structure( # Use provided type or fall back to Component base class target_type = component_type if component_type is not None else Component - component = COMPONENT_CONVERTER.structure(data, target_type) + # Map block names to attribute names if this is an xattree type + if xattree.has(target_type): + data = _map_block_names_to_attributes(data, target_type) + + # Get valid field names for this type + spec = get_xatspec(target_type) + valid_fields = set(spec.flat.keys()) + + # Filter to only valid fields, excluding workspace (set after) + data_for_structuring = { + k: v for k, v in data.items() if k in valid_fields and k != "workspace" + } + + # Resolve binding tuples to component instances (pre-construction) + data_for_structuring = _resolve_bindings_in_dict( + data_for_structuring, path.parent, target_type + ) + else: + # Exclude workspace from structuring - will be set afterwards + data_for_structuring = {k: v for k, v in data.items() if k != "workspace"} + + # Structure the component + if xattree.has(target_type): + component = target_type(**data_for_structuring) + else: + component = COMPONENT_CONVERTER.structure(data_for_structuring, target_type) + + # Set workspace and filename after construction if isinstance(component, Context): component.workspace = path.parent component.filename = path.name diff --git a/typed-transformer-migration.md b/typed-transformer-migration.md new file mode 100644 index 0000000..16d8f45 --- /dev/null +++ b/typed-transformer-migration.md @@ -0,0 +1,116 @@ +# TypedTransformer Migration + +## Problem + +Currently, `load()` uses `BasicTransformer` which outputs raw block data: + +```python +{ + "OPTIONS": [["NPER", 2], ["TIME_UNITS", "seconds"]], + "DIMENSIONS": [["NPER", 1]], + "PERIODDATA": [[1.0, 1, 1.0]] +} +``` + +But components expect typed, structured data: + +```python +{ + "nper": 2, + "time_units": "seconds", + "perlen": [...], + ... +} +``` + +This causes loading failures because: +1. Block names don't match field names (`OPTIONS` vs `time_units`) +2. Scalar values aren't extracted from record lists +3. Field types aren't applied (everything is a list) + +## Solution: Switch to TypedTransformer + +`TypedTransformer` already exists and handles: +- Extracting scalar values from OPTIONS/DIMENSIONS blocks +- Mapping block names to field names using DFN metadata +- Applying field types (scalars, arrays, records) +- Converting record lists to structured data + +### What's Needed + +1. **Update `flopy4/mf6/codec/reader/__init__.py`**: + + ```python + # Current (uses BasicTransformer) + def load(fp: IO[str]) -> Any: + return BASIC_TRANSFORMER.transform(BASIC_PARSER.parse(data)) + + # Proposed (dispatch to TypedTransformer) + def load(fp: IO[str], component_type: type[Component] = None) -> Any: + tree = BASIC_PARSER.parse(fp.read()) + + if component_type: + dfn = component_type.to_dfn() + transformer = TypedTransformer(dfn=dfn) + else: + transformer = BASIC_TRANSFORMER + + return transformer.transform(tree) + ``` + +2. **Update `flopy4/mf6/__init__.py` loaders**: + + Pass `component_type` through to `load()`: + + ```python + def _load_mf6(cls, path: Path) -> Component: + with open(path, "r") as fp: + # Pass cls to load() so it can use TypedTransformer + data = load_mf6(fp, component_type=cls) + return structure(data, path, cls) + ``` + +3. **Benefits**: + - Eliminates the need for block name mapping in structuring + - Scalars are extracted automatically (no `[['NPER', 2]]` issues) + - Field types applied during transformation + - Single source of truth (DFN) for both parsing and structuring + +### Alternative: Quick Fix + +If changing the loader signature is too invasive, could detect component type from file content: + +```python +def load(fp: IO[str]) -> Any: + content = fp.read() + tree = BASIC_PARSER.parse(content) + + # Detect component type from grammar start rule + # (e.g., sim-tdis grammar → use Tdis.to_dfn()) + component_type = _detect_component_type(tree) + + if component_type: + transformer = TypedTransformer(dfn=component_type.to_dfn()) + else: + transformer = BASIC_TRANSFORMER + + return transformer.transform(tree) +``` + +## Current State + +- `TypedTransformer` exists and is fully implemented +- All components have `to_dfn()` classmethod +- Just needs wiring in the loader + +## Testing + +After migration, the current test should pass: +```python +sim = Simulation.load(sim_path) +# TDIS, Model, IMS load correctly with typed fields +``` + +## Related + +This migration unblocks full end-to-end loading which is currently blocked at TDIS structuring.