Skip to content

Commit 83fd834

Browse files
committed
end of day commit
missed a spot little refactor pycall and conda to extras
1 parent 005c946 commit 83fd834

File tree

6 files changed

+176
-69
lines changed

6 files changed

+176
-69
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ Manifest.toml
55
*.jl.mem
66

77
test/_scrap.jl
8-
.DS_STORE
8+
.vscode

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
3333
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
3434
StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
3535
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
36+
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
37+
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
3638

3739
[targets]
38-
test = ["Test", "Random", "JSON3", "StructTypes", "CategoricalArrays"]
40+
test = ["Test", "Random", "JSON3", "StructTypes", "CategoricalArrays", "PyCall", "Conda"]

src/Arrow.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ include("arraytypes/arraytypes.jl")
9292
include("eltypes.jl")
9393
include("table.jl")
9494
include("write.jl")
95-
include("cinterface.jl")
95+
include("CDataInterface/CDataInterface.jl")
9696

9797
const LZ4_FRAME_COMPRESSOR = LZ4FrameCompressor[]
9898
const ZSTD_COMPRESSOR = ZstdCompressor[]
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
module CDataInterface
2+
3+
export ArrowSchema, ArrowArray, getschema, getarray
4+
5+
include("c_definitions.jl")
6+
7+
mutable struct ArrowSchema
8+
format::String
9+
name::String
10+
metadata::Dict{String, String}
11+
flags::Int64
12+
n_children::Int64
13+
children::Vector{ArrowSchema}
14+
dictionary::Union{Nothing, ArrowSchema}
15+
carrowschema::Ref{CArrowSchema}
16+
end
17+
18+
ArrowSchema(s::Ref{CArrowSchema}) = ArrowSchema(
19+
s[].format,
20+
s[].name,
21+
s[].metadata,
22+
s[].flags,
23+
s[].n_children,
24+
map(ArrowSchema, s[].children),
25+
s[].dictionary === nothing ? nothing : ArrowSchema(s[].dictionary),
26+
s
27+
)
28+
29+
ArrowSchema(s::CArrowSchema) = ArrowSchema(
30+
s.format,
31+
s.name,
32+
s.metadata,
33+
s.flags,
34+
s.n_children,
35+
map(ArrowSchema, s.children), s.dictionary === nothing ? nothing : ArrowSchema(s.dictionary),
36+
Ref{CArrowSchema}()
37+
)
38+
39+
function getschema(f)
40+
schref = Ref{CArrowSchema}()
41+
ptr = Base.unsafe_convert(Ptr{CArrowSchema}, schref)
42+
f(ptr)
43+
sch = ArrowSchema(schref)
44+
finalizer(sch) do x
45+
r = getfield(x.carrowschema[], :release)
46+
if r != C_NULL
47+
ccall(r, Cvoid, (Ptr{CArrowSchema},), x.carrowschema)
48+
end
49+
end
50+
return sch
51+
end
52+
53+
mutable struct ArrowArray
54+
length::Int64
55+
null_count::Int64
56+
offset::Int64
57+
n_buffers::Int64
58+
n_children::Int64
59+
buffers::Vector{Ptr{UInt8}}
60+
children::Vector{ArrowArray}
61+
dictionary::Union{Nothing, ArrowArray}
62+
c_arrow_array::Ref{CArrowArray}
63+
end
64+
65+
ArrowArray(a::Ref{CArrowArray}) = ArrowArray(
66+
a[].length,
67+
a[].null_count,
68+
a[].offset,
69+
a[].n_buffers,
70+
a[].n_children,
71+
a[].buffers,
72+
map(ArrowArray, a[].children), a[].dictionary === nothing ? nothing : ArrowArray(a[].dictionary),
73+
a
74+
)
75+
76+
ArrowArray(a::CArrowArray) = ArrowArray(
77+
a.length,
78+
a.null_count,
79+
a.offset,
80+
a.n_buffers,
81+
a.n_children,
82+
a.buffers,
83+
map(ArrowArray, a.children),
84+
a.dictionary === nothing ? nothing : ArrowArray(a.dictionary),
85+
Ref{CArrowArray}()
86+
)
87+
88+
function getarray(f)
89+
arr_ref = Ref{CArrowArray}()
90+
ptr = Base.unsafe_convert(Ptr{CArrowArray}, arr_ref)
91+
f(ptr)
92+
arr = ArrowArray(arr_ref)
93+
finalizer(arr) do x
94+
r = getfield(x.c_arrow_array[], :release)
95+
if r != C_NULL
96+
ccall(r, Cvoid, (Ptr{CArrowArray},), x.c_arrow_array)
97+
end
98+
end
99+
return arr
100+
end
101+
102+
end # module

src/cinterface.jl renamed to src/CDataInterface/c_definitions.jl

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
module CData
2-
3-
export ArrowSchema, ArrowArray, getschema, getarray
4-
51
const ARROW_FLAG_DICTIONARY_ORDERED = 1
62
const ARROW_FLAG_NULLABLE = 2
73
const ARROW_FLAG_MAP_KEYS_SORTED = 4
@@ -22,7 +18,7 @@ CArrowSchema() = CArrowSchema(C_NULL, C_NULL, C_NULL, 0, 0, C_NULL, C_NULL, _CNU
2218

2319
Base.propertynames(::CArrowSchema) = (:format, :name, :metadata, :flags, :n_children, :children, :dictionary)
2420

25-
function readmetadata(ptr::Ptr{UInt8})
21+
function read_c_arrow_schema_metadata(ptr::Ptr{UInt8})
2622
pos = 1
2723
meta = Dict{String, String}()
2824
if ptr != C_NULL
@@ -49,7 +45,7 @@ function Base.getproperty(x::CArrowSchema, nm::Symbol)
4945
elseif nm === :name
5046
return unsafe_string(getfield(x, :name))
5147
elseif nm === :metadata
52-
return readmetadata(getfield(x, :metadata))
48+
return read_c_arrow_schema_metadata(getfield(x, :metadata))
5349
elseif nm === :flags
5450
return getfield(x, :flags)
5551
elseif nm === :n_children
@@ -64,34 +60,6 @@ function Base.getproperty(x::CArrowSchema, nm::Symbol)
6460
error("unknown property requested: $nm")
6561
end
6662

67-
mutable struct ArrowSchema
68-
format::String
69-
name::String
70-
metadata::Dict{String, String}
71-
flags::Int64
72-
n_children::Int64
73-
children::Vector{ArrowSchema}
74-
dictionary::Union{Nothing, ArrowSchema}
75-
carrowschema::Ref{CArrowSchema}
76-
end
77-
78-
ArrowSchema(s::Ref{CArrowSchema}) = ArrowSchema(s[].format, s[].name, s[].metadata, s[].flags, s[].n_children, map(ArrowSchema, s[].children), s[].dictionary === nothing ? nothing : ArrowSchema(s[].dictionary), s)
79-
ArrowSchema(s::CArrowSchema) = ArrowSchema(s.format, s.name, s.metadata, s.flags, s.n_children, map(ArrowSchema, s.children), s.dictionary === nothing ? nothing : ArrowSchema(s.dictionary), Ref{CArrowSchema}())
80-
81-
function getschema(f)
82-
schref = Ref{CArrowSchema}()
83-
ptr = Base.unsafe_convert(Ptr{CArrowSchema}, schref)
84-
f(ptr)
85-
sch = ArrowSchema(schref)
86-
finalizer(sch) do x
87-
r = getfield(x.carrowschema[], :release)
88-
if r != C_NULL
89-
ccall(r, Cvoid, (Ptr{CArrowSchema},), x.carrowschema)
90-
end
91-
end
92-
return sch
93-
end
94-
9563
struct CArrowArray
9664
length::Int64
9765
null_count::Int64
@@ -131,35 +99,4 @@ function Base.getproperty(x::CArrowArray, nm::Symbol)
13199
return d == C_NULL ? nothing : unsafe_load(d)
132100
end
133101
error("unknown property requested: $nm")
134-
end
135-
136-
mutable struct ArrowArray
137-
length::Int64
138-
null_count::Int64
139-
offset::Int64
140-
n_buffers::Int64
141-
n_children::Int64
142-
buffers::Vector{Ptr{UInt8}}
143-
children::Vector{ArrowArray}
144-
dictionary::Union{Nothing, ArrowArray}
145-
carrowarray::Ref{CArrowArray}
146-
end
147-
148-
ArrowArray(a::Ref{CArrowArray}) = ArrowArray(a[].length, a[].null_count, a[].offset, a[].n_buffers, a[].n_children, a[].buffers, map(ArrowArray, a[].children), a[].dictionary === nothing ? nothing : ArrowArray(a[].dictionary), a)
149-
ArrowArray(a::CArrowArray) = ArrowArray(a.length, a.null_count, a.offset, a.n_buffers, a.n_children, a.buffers, map(ArrowArray, a.children), a.dictionary === nothing ? nothing : ArrowArray(a.dictionary), Ref{CArrowArray}())
150-
151-
function getarray(f)
152-
arrref = Ref{CArrowArray}()
153-
ptr = Base.unsafe_convert(Ptr{CArrowArray}, arrref)
154-
f(ptr)
155-
arr = ArrowArray(arrref)
156-
finalizer(arr) do x
157-
r = getfield(x.carrowarray[], :release)
158-
if r != C_NULL
159-
ccall(r, Cvoid, (Ptr{CArrowArray},), x.carrowarray)
160-
end
161-
end
162-
return arr
163-
end
164-
165-
end # module
102+
end
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
2+
3+
function get_type_from_format_string(format_string ::String) ::DataType
4+
if format_string == "n"
5+
Nothing
6+
elseif format_string == "b"
7+
Bool
8+
elseif format_string == "c"
9+
Int8
10+
elseif format_string == "C"
11+
UInt8
12+
elseif format_string == "s"
13+
Int16
14+
elseif format_string == "S"
15+
UInt16
16+
elseif format_string == "i"
17+
Int32
18+
elseif format_string == "I"
19+
UInt32
20+
elseif format_string == "l"
21+
Int64
22+
elseif format_string == "L"
23+
UInt64
24+
elseif format_string == "e"
25+
Float16
26+
elseif format_string == "f"
27+
Float32
28+
elseif format_string == "g"
29+
Float64
30+
elseif format_string == "z" || format_string == "Z"
31+
Vector{UInt8}
32+
elseif format_string == "u" || format_string == "U"
33+
String
34+
elseif format_string[1] == 'd'
35+
const splits = split(format_string[3:end], ",")
36+
precision = Int(splits[1])
37+
scale = Int(splits[2])
38+
if length(splits) == 3
39+
bandwidth = splits[3]
40+
end
41+
#TODO return something here
42+
elseif format_string[1] == 'w'
43+
#TODO figure out fixed width binary
44+
elseif format_string[1] == '+'
45+
if format_string[2] == 'l' || format_string[2] == 'L'
46+
Arrow.List
47+
elseif format_string[2] == 'w'
48+
size = Int(format_string[4:end]) #TODO use this somehow
49+
Arrow.FixedSizeList
50+
elseif format_string[2] == 's'
51+
Arrow.Struct
52+
elseif format_string[2] == 'm'
53+
Arrow.Map
54+
elseif format_string[2:3] == "ud"
55+
type_strings = split(format_string[5:end], ",") # todo use this somehow
56+
Arrow.DenseUnion
57+
elseif format_string[2:3] == "us"
58+
type_strings = split(format_string[5:end], ",") # todo use this somehow
59+
Arrow.DenseUnion
60+
end
61+
elseif format_string [1] == 't'
62+
# todo return something here
63+
end
64+
end
65+
66+

0 commit comments

Comments
 (0)