Skip to content

Commit 4c4a2f5

Browse files
Refactor: add list of Arrow schema implementations to the data frame.
1 parent a9c11fc commit 4c4a2f5

File tree

4 files changed

+35
-12
lines changed

4 files changed

+35
-12
lines changed

src/oracledb/arrow_impl.pxd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,12 @@ cdef class ArrowArrayImpl:
152152
cdef int get_uint(self, ArrowType arrow_type, int64_t index, bint* is_null,
153153
uint64_t* value) except -1
154154
cdef object get_vector(self, int64_t index, bint* is_null)
155-
cdef int populate_from_array(self, ArrowSchema* schema,
155+
cdef int populate_from_array(self, ArrowSchemaImpl schema_impl,
156156
ArrowArray* array) except -1
157157
cdef int populate_from_schema(self, ArrowSchemaImpl schema_impl) except -1
158158

159159

160160
cdef class DataFrameImpl:
161161
cdef:
162+
list schema_impls
162163
list arrays

src/oracledb/impl/arrow/array.pyx

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -484,27 +484,33 @@ cdef class ArrowArrayImpl:
484484
object implementing the PyCapsule Arrow array interface.
485485
"""
486486
cdef:
487+
ArrowSchemaImpl schema_impl
487488
ArrowArrayImpl array_impl
488489
ArrowSchema *arrow_schema
489490
ArrowArray *arrow_array
491+
492+
# convert schema
490493
schema_capsule, array_capsule = obj.__arrow_c_array__()
491494
arrow_schema = <ArrowSchema*> cpython.PyCapsule_GetPointer(
492495
schema_capsule, "arrow_schema"
493496
)
497+
schema_impl = ArrowSchemaImpl.__new__(ArrowSchemaImpl)
498+
schema_impl.populate_from_schema(arrow_schema)
499+
500+
# convert array
494501
arrow_array = <ArrowArray*> cpython.PyCapsule_GetPointer(
495502
array_capsule, "arrow_array"
496503
)
497504
array_impl = ArrowArrayImpl.__new__(ArrowArrayImpl)
498-
array_impl.schema_impl = ArrowSchemaImpl.__new__(ArrowSchemaImpl)
499-
array_impl.populate_from_array(arrow_schema, arrow_array)
505+
array_impl.populate_from_array(schema_impl, arrow_array)
500506
return array_impl
501507

502-
cdef int populate_from_array(self, ArrowSchema* schema,
508+
cdef int populate_from_array(self, ArrowSchemaImpl schema_impl,
503509
ArrowArray* array) except -1:
504510
"""
505511
Populate the array from another array.
506512
"""
507-
self.schema_impl.populate_from_schema(schema)
513+
self.schema_impl = schema_impl
508514
ArrowArrayMove(array, self.arrow_array)
509515

510516
cdef int populate_from_schema(self, ArrowSchemaImpl schema_impl) except -1:

src/oracledb/impl/arrow/dataframe.pyx

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,34 @@ cdef class DataFrameImpl:
3838
"""
3939
cdef:
4040
ArrowArrayStream *arrow_stream
41+
ArrowSchemaImpl schema_impl
42+
ArrowArrayImpl array_impl
4143
ArrowSchema arrow_schema
4244
ArrowArray arrow_array
4345
DataFrameImpl df_impl
44-
ArrowArrayImpl array_impl
45-
ArrowSchemaImpl schema_impl
4646
ssize_t i
47+
48+
# initialization
4749
df_impl = DataFrameImpl.__new__(DataFrameImpl)
50+
df_impl.schema_impls = []
4851
df_impl.arrays = []
4952
capsule = obj.__arrow_c_stream__()
5053
arrow_stream = <ArrowArrayStream*> cpython.PyCapsule_GetPointer(
5154
capsule, "arrow_array_stream"
5255
)
56+
57+
# populate list of schemas
5358
_check_nanoarrow(arrow_stream.get_schema(arrow_stream, &arrow_schema))
59+
for i in range(arrow_schema.n_children):
60+
schema_impl = ArrowSchemaImpl.__new__(ArrowSchemaImpl)
61+
schema_impl.populate_from_schema(arrow_schema.children[i])
62+
df_impl.schema_impls.append(schema_impl)
63+
64+
# populate list of arrays
5465
_check_nanoarrow(arrow_stream.get_next(arrow_stream, &arrow_array))
5566
for i in range(arrow_schema.n_children):
5667
array_impl = ArrowArrayImpl.__new__(ArrowArrayImpl)
57-
array_impl.schema_impl = ArrowSchemaImpl.__new__(ArrowSchemaImpl)
58-
array_impl.populate_from_array(arrow_schema.children[i],
68+
array_impl.populate_from_array(df_impl.schema_impls[i],
5969
arrow_array.children[i])
6070
df_impl.arrays.append(array_impl)
6171
_check_nanoarrow(arrow_stream.get_next(arrow_stream, &arrow_array))
@@ -77,6 +87,7 @@ cdef class DataFrameImpl:
7787
encapsulates the arrays found in the data frame.
7888
"""
7989
cdef:
90+
ArrowSchemaImpl schema_impl
8091
ArrowArrayImpl array_impl
8192
ArrowArrayStream *stream
8293
int64_t i, num_arrays
@@ -100,14 +111,15 @@ cdef class DataFrameImpl:
100111
ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRUCT)
101112
)
102113
_check_nanoarrow(ArrowArrayAllocateChildren(&array, num_arrays))
103-
for i, array_impl in enumerate(self.arrays):
114+
for i, schema_impl in enumerate(self.schema_impls):
115+
array_impl = self.arrays[i]
104116
array.length = array_impl.arrow_array.length
105117
copy_arrow_array(
106118
array_impl, array_impl.arrow_array, array.children[i]
107119
)
108120
_check_nanoarrow(
109121
ArrowSchemaDeepCopy(
110-
array_impl.schema_impl.arrow_schema, schema.children[i]
122+
schema_impl.arrow_schema, schema.children[i]
111123
)
112124
)
113125

src/oracledb/impl/base/cursor.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,12 +552,16 @@ cdef class BaseCursorImpl:
552552
Flush all buffers and return an Oracle Data frame.
553553
"""
554554
cdef:
555+
ArrowArrayImpl array_impl
555556
DataFrameImpl df_impl
556557
BaseVarImpl var_impl
557558
df_impl = DataFrameImpl.__new__(DataFrameImpl)
559+
df_impl.schema_impls = []
558560
df_impl.arrays = []
559561
for var_impl in self.fetch_var_impls:
560-
df_impl.arrays.append(var_impl._finish_building_arrow_array())
562+
array_impl = var_impl._finish_building_arrow_array()
563+
df_impl.schema_impls.append(array_impl.schema_impl)
564+
df_impl.arrays.append(array_impl)
561565
return PY_TYPE_DATAFRAME._from_impl(df_impl)
562566

563567
def close(self, bint in_del=False):

0 commit comments

Comments
 (0)