@@ -31,17 +31,22 @@ cimport cpython
3131
3232from libc.stdint cimport uintptr_t
3333from libc.string cimport memcpy, strlen, strchr
34- from cpython.pycapsule cimport PyCapsule_New
3534
3635from .. import errors
3736
3837cdef extern from " nanoarrow/nanoarrow.c" :
3938
4039 ctypedef int ArrowErrorCode
4140
41+ ctypedef void (* ArrowBufferDeallocatorCallback)
42+
43+ cdef struct ArrowBufferAllocator:
44+ void * private_data
45+
4246 cdef struct ArrowBuffer:
4347 uint8_t * data
4448 int64_t size_bytes
49+ ArrowBufferAllocator allocator
4550
4651 cdef union ArrowBufferViewData:
4752 const void * data
@@ -65,6 +70,8 @@ cdef extern from "nanoarrow/nanoarrow.c":
6570
6671 cdef ArrowErrorCode NANOARROW_OK
6772
73+ ArrowErrorCode ArrowArrayAllocateChildren(ArrowArray * array,
74+ int64_t n_children)
6875 ArrowErrorCode ArrowArrayAppendBytes(ArrowArray* array,
6976 ArrowBufferView value)
7077 ArrowErrorCode ArrowArrayAppendDecimal(ArrowArray* array,
@@ -88,11 +95,15 @@ cdef extern from "nanoarrow/nanoarrow.c":
8895 const ArrowArray* array,
8996 ArrowError* error)
9097 int8_t ArrowBitGet(const uint8_t* bits, int64_t i)
98+ ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback,
99+ void * private_data)
91100 void ArrowDecimalInit(ArrowDecimal* decimal, int32_t bitwidth,
92101 int32_t precision, int32_t scale)
93102 void ArrowDecimalSetBytes(ArrowDecimal * decimal, const uint8_t* value)
94103 ArrowErrorCode ArrowDecimalSetDigits(ArrowDecimal* decimal,
95104 ArrowStringView value)
105+ ArrowErrorCode ArrowSchemaDeepCopy(const ArrowSchema * schema,
106+ ArrowSchema * schema_out)
96107 void ArrowSchemaInit(ArrowSchema* schema)
97108 ArrowErrorCode ArrowSchemaInitFromType(ArrowSchema* schema, ArrowType type )
98109 void ArrowSchemaRelease(ArrowSchema * schema)
@@ -117,22 +128,13 @@ cdef int _check_nanoarrow(int code) except -1:
117128 errors._raise_err(errors.ERR_ARROW_C_API_ERROR, code = code)
118129
119130
120- cdef void array_deleter(ArrowArray * array) noexcept:
121- """
122- Called when an external library calls the release for an Arrow array. This
123- method simply marks the release as completed but doesn't actually do it, so
124- that the handling of duplicate rows can still make use of the array, even
125- if the external library no longer requires it!
126- """
127- array.release = NULL
128-
129-
130131cdef void pycapsule_array_deleter(object array_capsule) noexcept:
131132 cdef ArrowArray* array = < ArrowArray* > cpython.PyCapsule_GetPointer(
132133 array_capsule, " arrow_array"
133134 )
134135 if array.release != NULL :
135136 ArrowArrayRelease(array)
137+ cpython.PyMem_Free(array)
136138
137139
138140cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
@@ -141,6 +143,65 @@ cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
141143 )
142144 if schema.release != NULL :
143145 ArrowSchemaRelease(schema)
146+ cpython.PyMem_Free(schema)
147+
148+
149+ cdef void arrow_buffer_dealloc_callback(ArrowBufferAllocator * allocator,
150+ uint8_t * ptr, int64_t size):
151+ """
152+ ArrowBufferDeallocatorCallback for an ArrowBuffer borrowed from
153+ OracleArrowArray
154+ """
155+ cpython.Py_DECREF(< OracleArrowArray> allocator.private_data)
156+
157+
158+ cdef int copy_arrow_array(OracleArrowArray oracle_arrow_array,
159+ ArrowArray * src, ArrowArray * dest) except - 1 :
160+ """
161+ Shallow copy source ArrowArray to destination ArrowArray. The source
162+ ArrowArray belongs to the wrapper OracleArrowArray. The shallow copy idea
163+ is borrowed from nanoarrow:
164+ https://github.com/apache/arrow-nanoarrow/main/blob/python
165+ """
166+ cdef:
167+ ArrowBuffer * dest_buffer
168+ ssize_t i
169+ _check_nanoarrow(
170+ ArrowArrayInitFromType(
171+ dest, NANOARROW_TYPE_UNINITIALIZED
172+ )
173+ )
174+
175+ # Copy metadata
176+ dest.length = src.length
177+ dest.offset = src.offset
178+ dest.null_count = src.null_count
179+
180+ # Borrow an ArrowBuffer belonging to OracleArrowArray. The ArrowBuffer can
181+ # belong to an immediate ArrowArray or a child (in case of nested types).
182+ # Either way, we PY_INCREF(oracle_arrow_array), so that it is not
183+ # prematurely garbage collected. The corresponding PY_DECREF happens in the
184+ # ArrowBufferDeAllocator callback.
185+ for i in range (src.n_buffers):
186+ if src.buffers[i] != NULL :
187+ dest_buffer = ArrowArrayBuffer(dest, i)
188+ dest_buffer.data = < uint8_t * > src.buffers[i]
189+ dest_buffer.size_bytes = 0
190+ dest_buffer.allocator = ArrowBufferDeallocator(
191+ < ArrowBufferDeallocatorCallback> arrow_buffer_dealloc_callback,
192+ < void * > oracle_arrow_array
193+ )
194+ cpython.Py_INCREF(oracle_arrow_array)
195+ dest.buffers[i] = src.buffers[i]
196+ dest.n_buffers = src.n_buffers
197+
198+ # shallow copy of children (recursive call)
199+ if src.n_children > 0 :
200+ _check_nanoarrow(ArrowArrayAllocateChildren(dest, src.n_children))
201+ for i in range (src.n_children):
202+ copy_arrow_array(
203+ oracle_arrow_array, src.children[i], dest.children[i]
204+ )
144205
145206
146207cdef class OracleArrowArray:
@@ -187,8 +248,6 @@ cdef class OracleArrowArray:
187248
188249 def __dealloc__ (self ):
189250 if self .arrow_array != NULL :
190- if self .arrow_array.release == NULL :
191- self .arrow_array.release = self .actual_array_release
192251 if self .arrow_array.release != NULL :
193252 ArrowArrayRelease(self .arrow_array)
194253 cpython.PyMem_Free(self .arrow_array)
@@ -409,6 +468,26 @@ cdef class OracleArrowArray:
409468 def offset(self ) -> int:
410469 return self.arrow_array.offset
411470
471+ def __arrow_c_schema__(self ):
472+ """
473+ Export an ArrowSchema PyCapsule
474+ """
475+ cdef ArrowSchema * exported_schema = \
476+ < ArrowSchema* > cpython.PyMem_Malloc(sizeof(ArrowSchema))
477+ try :
478+ _check_nanoarrow(
479+ ArrowSchemaDeepCopy(
480+ self .arrow_schema,
481+ exported_schema
482+ )
483+ )
484+ except :
485+ cpython.PyMem_Free(exported_schema)
486+ raise
487+ return cpython.PyCapsule_New(
488+ exported_schema, ' arrow_schema' , & pycapsule_schema_deleter
489+ )
490+
412491 def __arrow_c_array__ (self , requested_schema = None ):
413492 """
414493 Returns
@@ -419,13 +498,14 @@ cdef class OracleArrowArray:
419498 """
420499 if requested_schema is not None :
421500 raise NotImplementedError (" requested_schema" )
422-
423- array_capsule = PyCapsule_New(
424- self .arrow_array, ' arrow_array' , & pycapsule_array_deleter
425- )
426- self .actual_array_release = self .arrow_array.release
427- self .arrow_array.release = array_deleter
428- schema_capsule = PyCapsule_New(
429- self .arrow_schema, " arrow_schema" , & pycapsule_schema_deleter
430- )
431- return schema_capsule, array_capsule
501+ cdef ArrowArray * exported_array = \
502+ < ArrowArray * > cpython.PyMem_Malloc(sizeof(ArrowArray))
503+ try :
504+ copy_arrow_array(self , self .arrow_array, exported_array)
505+ array_capsule = cpython.PyCapsule_New(
506+ exported_array, ' arrow_array' , & pycapsule_array_deleter
507+ )
508+ except :
509+ cpython.PyMem_Free(exported_array)
510+ raise
511+ return self .__arrow_c_schema__(), array_capsule
0 commit comments