Skip to content

Commit 1bb0850

Browse files
authored
chore: Ensure support for GeoArrow 0.2 CRS (#797)
Closes #791
1 parent 95b4724 commit 1bb0850

File tree

6 files changed

+174
-11
lines changed

6 files changed

+174
-11
lines changed

lonboard/_geoarrow/crs.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,56 @@
11
from __future__ import annotations
22

33
import json
4-
from typing import TYPE_CHECKING
4+
from typing import TYPE_CHECKING, Any
5+
6+
from pyproj import CRS
57

68
if TYPE_CHECKING:
79
from arro3.core import Field
810

911

1012
# Note: According to the spec, if the metadata key exists, its value should never be
1113
# `null` or an empty dict, but we still check for those to be safe
12-
def get_field_crs(field: Field) -> dict | None:
13-
extension_metadata_value = field.metadata.get(b"ARROW:extension:metadata")
14+
def get_field_crs(field: Field) -> CRS | None:
15+
extension_metadata_value = field.metadata_str.get("ARROW:extension:metadata")
1416
if not extension_metadata_value:
1517
return None
1618

1719
extension_metadata = json.loads(extension_metadata_value)
18-
return extension_metadata.get("crs")
20+
return parse_metadata(extension_metadata)
21+
22+
23+
def parse_metadata(extension_metadata: dict[str, Any]) -> CRS | None:
24+
crs_val = extension_metadata.get("crs")
25+
crs_type = extension_metadata.get("crs_type")
26+
27+
if crs_type == "projjson":
28+
assert crs_val is not None, "CRS value must be provided for projjson type"
29+
return CRS.from_json_dict(crs_val)
30+
31+
if crs_type == "wkt2:2019":
32+
assert crs_val is not None, "CRS value must be provided for WKT2:2019 type"
33+
return CRS.from_wkt(crs_val)
34+
35+
if crs_type == "authority_code":
36+
assert crs_val is not None, "CRS value must be provided for authority code type"
37+
assert ":" in crs_val, "Authority code must be in the format 'authority:code'"
38+
assert isinstance(crs_val, str), "CRS value must be a string"
39+
return CRS.from_authority(*crs_val.split(":", 1))
40+
41+
return CRS.from_user_input(crs_val) if crs_val is not None else None
42+
43+
44+
def serialize_crs(crs: CRS | None) -> dict[str, Any] | None:
45+
"""Serialize a CRS to GeoArrow metadata.
46+
47+
The returned GeoArrow metadata should be JSON-serialized within the
48+
ARROW:extension:metadata key.
49+
"""
50+
if crs is None:
51+
return None
52+
53+
return {
54+
"crs": crs.to_json_dict(),
55+
"crs_type": "projjson",
56+
}

lonboard/_geoarrow/extension_types.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
import numpy as np
88
from arro3.core import Array, DataType, Field, fixed_size_list_array, list_array
99

10+
from lonboard._geoarrow.crs import serialize_crs
11+
1012
if TYPE_CHECKING:
1113
from collections.abc import Sequence
1214

1315
from numpy.typing import NDArray
16+
from pyproj import CRS
1417

1518

1619
class CoordinateDimension(str, Enum):
@@ -200,7 +203,7 @@ def construct_geometry_array( # noqa: PLR0915
200203
include_z: bool | None = None, # noqa: FBT001
201204
*,
202205
field_name: str = "geometry",
203-
crs: dict | None = None,
206+
crs: CRS | None = None,
204207
) -> tuple[Field, Array]:
205208
import shapely
206209
from shapely import GeometryType
@@ -220,7 +223,7 @@ def construct_geometry_array( # noqa: PLR0915
220223

221224
extension_metadata: dict[str, str] = {}
222225
if crs is not None:
223-
extension_metadata["ARROW:extension:metadata"] = json.dumps({"crs": crs})
226+
extension_metadata["ARROW:extension:metadata"] = json.dumps(serialize_crs(crs))
224227

225228
if geom_type == GeometryType.POINT:
226229
arrow_coords = fixed_size_list_array(coords.ravel("C"), len(dims)).cast(

lonboard/_geoarrow/geopandas_interop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def geopandas_to_geoarrow(
3232
pyarrow_table = pa.Table.from_pandas(df_attr, preserve_index=preserve_index)
3333
field, geom_arr = construct_geometry_array(
3434
np.array(gdf.geometry),
35-
crs=gdf.crs.to_json_dict() if gdf.crs is not None else None,
35+
crs=gdf.crs,
3636
)
3737
return Table.from_arrow(pyarrow_table).append_column(
3838
field,

lonboard/_geoarrow/ops/reproject.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ def reproject_column(
9696
9797
"""
9898
extension_type_name = field.metadata[b"ARROW:extension:name"]
99-
crs_str = get_field_crs(field)
100-
if crs_str is None:
99+
existing_crs = get_field_crs(field)
100+
if existing_crs is None:
101101
no_crs_warning()
102102
return field, column
103103

104-
existing_crs = CRS(crs_str)
105-
106104
if existing_crs == to_crs:
107105
return field, column
108106

tests/geoarrow/__init__.py

Whitespace-only changes.

tests/geoarrow/test_crs.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import json
2+
3+
from arro3.core import Field
4+
from pyproj import CRS
5+
6+
from lonboard._geoarrow.crs import get_field_crs
7+
from lonboard._geoarrow.extension_types import CoordinateDimension, coord_storage_type
8+
9+
10+
def parse_geoarrow_projjson_crs():
11+
expected = CRS.from_epsg(4326)
12+
meta = {
13+
"crs": expected.to_json_dict(),
14+
"crs_type": "projjson",
15+
}
16+
geoarrow_meta = {
17+
"ARROW:extension:name": "geoarrow.point",
18+
"ARROW:extension:metadata": json.dumps(meta),
19+
}
20+
field = Field(
21+
name="geometry",
22+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
23+
nullable=True,
24+
metadata=geoarrow_meta,
25+
)
26+
assert get_field_crs(field) == expected
27+
28+
29+
def parse_geoarrow_wkt_crs():
30+
expected = CRS.from_epsg(4326)
31+
meta = {
32+
"crs": expected.to_wkt(version="WKT2:2019"),
33+
"crs_type": "wkt2:2019",
34+
}
35+
geoarrow_meta = {
36+
"ARROW:extension:name": "geoarrow.point",
37+
"ARROW:extension:metadata": json.dumps(meta),
38+
}
39+
field = Field(
40+
name="geometry",
41+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
42+
nullable=True,
43+
metadata=geoarrow_meta,
44+
)
45+
assert get_field_crs(field) == expected
46+
47+
48+
def parse_geoarrow_authority_crs():
49+
expected = CRS.from_epsg(4326)
50+
meta = {
51+
"crs": ":".join(expected.to_authority()),
52+
"crs_type": "authority_code",
53+
}
54+
geoarrow_meta = {
55+
"ARROW:extension:name": "geoarrow.point",
56+
"ARROW:extension:metadata": json.dumps(meta),
57+
}
58+
field = Field(
59+
name="geometry",
60+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
61+
nullable=True,
62+
metadata=geoarrow_meta,
63+
)
64+
assert get_field_crs(field) == expected
65+
66+
67+
def parse_geoarrow_srid_crs():
68+
expected = CRS.from_epsg(4326)
69+
meta = {
70+
"crs": str(expected.to_epsg()),
71+
"crs_type": "srid",
72+
}
73+
geoarrow_meta = {
74+
"ARROW:extension:name": "geoarrow.point",
75+
"ARROW:extension:metadata": json.dumps(meta),
76+
}
77+
field = Field(
78+
name="geometry",
79+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
80+
nullable=True,
81+
metadata=geoarrow_meta,
82+
)
83+
assert get_field_crs(field) == expected
84+
85+
86+
def parse_geoarrow_unknown_crs_type():
87+
expected = CRS.from_epsg(4326)
88+
meta = {
89+
"crs": expected.to_wkt(),
90+
}
91+
geoarrow_meta = {
92+
"ARROW:extension:name": "geoarrow.point",
93+
"ARROW:extension:metadata": json.dumps(meta),
94+
}
95+
field = Field(
96+
name="geometry",
97+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
98+
nullable=True,
99+
metadata=geoarrow_meta,
100+
)
101+
assert get_field_crs(field) == expected
102+
103+
104+
def parse_geoarrow_no_crs():
105+
geoarrow_meta = {
106+
"ARROW:extension:name": "geoarrow.point",
107+
"ARROW:extension:metadata": json.dumps({}),
108+
}
109+
field = Field(
110+
name="geometry",
111+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
112+
nullable=True,
113+
metadata=geoarrow_meta,
114+
)
115+
assert get_field_crs(field) is None
116+
117+
geoarrow_meta = {"ARROW:extension:name": "geoarrow.point"}
118+
field = Field(
119+
name="geometry",
120+
type=coord_storage_type(interleaved=True, dims=CoordinateDimension.XY),
121+
nullable=True,
122+
metadata=geoarrow_meta,
123+
)
124+
assert get_field_crs(field) is None

0 commit comments

Comments
 (0)