22
33import json
44import re
5- from typing import TYPE_CHECKING , Optional , Union
5+ from typing import TYPE_CHECKING , List , Optional , Union
66
77import numpy as np
8- import pyarrow as pa
9- import pyarrow .compute as pc
8+ from arro3 .compute import struct_field
9+ from arro3 .core import (
10+ Array ,
11+ ChunkedArray ,
12+ Field ,
13+ Table ,
14+ fixed_size_list_array ,
15+ list_array ,
16+ )
1017
1118from lonboard ._constants import EXTENSION_NAME
1219
@@ -29,7 +36,7 @@ def from_duckdb(
2936 * ,
3037 con : Optional [duckdb .DuckDBPyConnection ] = None ,
3138 crs : Optional [Union [str , pyproj .CRS ]] = None ,
32- ) -> pa . Table :
39+ ) -> Table :
3340 geom_col_idxs = [
3441 i for i , t in enumerate (rel .types ) if str (t ) in DUCKDB_SPATIAL_TYPES
3542 ]
@@ -89,9 +96,9 @@ def _from_geometry(
8996 con : Optional [duckdb .DuckDBPyConnection ] = None ,
9097 geom_col_idx : int ,
9198 crs : Optional [Union [str , pyproj .CRS ]] = None ,
92- ) -> pa . Table :
99+ ) -> Table :
93100 other_col_names = [name for i , name in enumerate (rel .columns ) if i != geom_col_idx ]
94- non_geo_table = rel .select (* other_col_names ).arrow ()
101+ non_geo_table = Table . from_arrow ( rel .select (* other_col_names ).arrow () )
95102 geom_col_name = rel .columns [geom_col_idx ]
96103
97104 # A poor-man's string interpolation check
@@ -102,9 +109,11 @@ def _from_geometry(
102109 ), f"Expected geometry column name to match regex: { re_match } "
103110
104111 if con is not None :
105- geom_table = con .sql (f"""
112+ geom_table = Table .from_arrow (
113+ con .sql (f"""
106114 SELECT ST_AsWKB( { geom_col_name } ) as { geom_col_name } FROM rel;
107115 """ ).arrow ()
116+ )
108117 else :
109118 import duckdb
110119
@@ -119,7 +128,9 @@ def _from_geometry(
119128 SELECT ST_AsWKB( { geom_col_name } ) as { geom_col_name } FROM rel;
120129 """
121130 try :
122- geom_table = duckdb .execute (sql ).arrow ()
131+ geom_table = Table .from_arrow (
132+ duckdb .execute (sql , connection = duckdb .default_connection ).arrow ()
133+ )
123134 except duckdb .CatalogException as err :
124135 msg = (
125136 "Could not coerce type GEOMETRY to WKB.\n "
@@ -140,8 +151,8 @@ def _from_geoarrow(
140151 extension_type : EXTENSION_NAME ,
141152 geom_col_idx : int ,
142153 crs : Optional [Union [str , pyproj .CRS ]] = None ,
143- ) -> pa . Table :
144- table = rel .arrow ()
154+ ) -> Table :
155+ table = Table . from_arrow ( rel .arrow () )
145156 metadata = _make_geoarrow_field_metadata (extension_type , crs )
146157 geom_field = table .schema .field (geom_col_idx ).with_metadata (metadata )
147158 return table .set_column (geom_col_idx , geom_field , table .column (geom_col_idx ))
@@ -152,21 +163,24 @@ def _from_box2d(
152163 * ,
153164 geom_col_idx : int ,
154165 crs : Optional [Union [str , pyproj .CRS ]] = None ,
155- ) -> pa . Table :
156- table = rel .arrow ()
166+ ) -> Table :
167+ table = Table . from_arrow ( rel .arrow () )
157168 geom_col = table .column (geom_col_idx )
158169
159- polygon_array = _convert_box2d_to_geoarrow_polygon_array (geom_col )
170+ polygon_chunks : List [Array ] = []
171+ for geom_chunk in geom_col .chunks :
172+ polygon_array = _convert_box2d_to_geoarrow_polygon_array (geom_chunk )
173+ polygon_chunks .append (polygon_array )
160174
161175 metadata = _make_geoarrow_field_metadata (EXTENSION_NAME .POLYGON , crs )
162176 prev_field = table .schema .field (geom_col_idx )
163- geom_field = pa . field (prev_field .name , polygon_array .type , metadata = metadata )
164- return table .set_column (geom_col_idx , geom_field , polygon_array )
177+ geom_field = Field (prev_field .name , polygon_chunks [ 0 ] .type , metadata = metadata )
178+ return table .set_column (geom_col_idx , geom_field , ChunkedArray ( polygon_chunks ) )
165179
166180
167181def _convert_box2d_to_geoarrow_polygon_array (
168- geom_col : pa . StructArray ,
169- ) -> pa . ListArray :
182+ geom_col : Array ,
183+ ) -> Array :
170184 """
171185 This is a manual conversion of the duckdb box_2d type to a GeoArrow Polygon array.
172186
@@ -176,10 +190,10 @@ def _convert_box2d_to_geoarrow_polygon_array(
176190 # Extract the bounding box columns from the Arrow struct
177191 # NOTE: this assumes that the box ordering is minx, miny, maxx, maxy
178192 # Note sure whether the positional ordering or the named fields is more stable
179- min_x = pc . struct_field (geom_col , 0 )
180- min_y = pc . struct_field (geom_col , 1 )
181- max_x = pc . struct_field (geom_col , 2 )
182- max_y = pc . struct_field (geom_col , 3 )
193+ min_x = struct_field (geom_col , 0 )
194+ min_y = struct_field (geom_col , 1 )
195+ max_x = struct_field (geom_col , 2 )
196+ max_y = struct_field (geom_col , 3 )
183197
184198 # Provision memory for the output coordinates. For closed polygons, each input box
185199 # becomes 5 coordinates.
@@ -208,9 +222,10 @@ def _convert_box2d_to_geoarrow_polygon_array(
208222 geom_offsets = np .arange (0 , len (ring_offsets ), dtype = np .int32 )
209223
210224 # Construct the final PolygonArray
211- coords = pa .FixedSizeListArray .from_arrays (coords .ravel ("C" ), 2 )
212- ring_array = pa .ListArray .from_arrays (ring_offsets , coords )
213- polygon_array = pa .ListArray .from_arrays (geom_offsets , ring_array )
225+ flat_coords : Array = Array .from_numpy (coords .ravel ("C" ))
226+ coords = fixed_size_list_array (flat_coords , 2 )
227+ ring_array = list_array (Array .from_numpy (ring_offsets ), coords )
228+ polygon_array = list_array (Array .from_numpy (geom_offsets ), ring_array )
214229 return polygon_array
215230
216231
0 commit comments