Skip to content

Commit feb5fff

Browse files
committed
end of day commit
missed a spot little refactor pycall and conda to extras little refactoring
1 parent 005c946 commit feb5fff

File tree

7 files changed

+176
-69
lines changed

7 files changed

+176
-69
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ Manifest.toml
55
*.jl.mem
66

77
test/_scrap.jl
8-
.DS_STORE
8+
.vscode

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
3333
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
3434
StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4"
3535
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
36+
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
37+
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
3638

3739
[targets]
38-
test = ["Test", "Random", "JSON3", "StructTypes", "CategoricalArrays"]
40+
test = ["Test", "Random", "JSON3", "StructTypes", "CategoricalArrays", "PyCall", "Conda"]

src/Arrow.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ include("arraytypes/arraytypes.jl")
9292
include("eltypes.jl")
9393
include("table.jl")
9494
include("write.jl")
95-
include("cinterface.jl")
95+
include("CDataInterface/CDataInterface.jl")
9696

9797
const LZ4_FRAME_COMPRESSOR = LZ4FrameCompressor[]
9898
const ZSTD_COMPRESSOR = ZstdCompressor[]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
module CDataInterface
2+
3+
export ArrowSchema, ArrowArray, getschema, getarray
4+
5+
include("c_definitions.jl")
6+
include("jl_definitions.jl")
7+
8+
function getschema(f)
9+
schref = Ref{CArrowSchema}()
10+
ptr = Base.unsafe_convert(Ptr{CArrowSchema}, schref)
11+
f(ptr)
12+
sch = ArrowSchema(schref)
13+
finalizer(sch) do x
14+
r = getfield(x.carrowschema[], :release)
15+
if r != C_NULL
16+
ccall(r, Cvoid, (Ptr{CArrowSchema},), x.carrowschema)
17+
end
18+
end
19+
return sch
20+
end
21+
22+
function getarray(f)
23+
arr_ref = Ref{CArrowArray}()
24+
ptr = Base.unsafe_convert(Ptr{CArrowArray}, arr_ref)
25+
f(ptr)
26+
arr = ArrowArray(arr_ref)
27+
finalizer(arr) do x
28+
r = getfield(x.c_arrow_array[], :release)
29+
if r != C_NULL
30+
ccall(r, Cvoid, (Ptr{CArrowArray},), x.c_arrow_array)
31+
end
32+
end
33+
return arr
34+
end
35+
36+
end # module

src/cinterface.jl renamed to src/CDataInterface/c_definitions.jl

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
module CData
2-
3-
export ArrowSchema, ArrowArray, getschema, getarray
4-
51
const ARROW_FLAG_DICTIONARY_ORDERED = 1
62
const ARROW_FLAG_NULLABLE = 2
73
const ARROW_FLAG_MAP_KEYS_SORTED = 4
@@ -22,7 +18,7 @@ CArrowSchema() = CArrowSchema(C_NULL, C_NULL, C_NULL, 0, 0, C_NULL, C_NULL, _CNU
2218

2319
Base.propertynames(::CArrowSchema) = (:format, :name, :metadata, :flags, :n_children, :children, :dictionary)
2420

25-
function readmetadata(ptr::Ptr{UInt8})
21+
function read_c_arrow_schema_metadata(ptr::Ptr{UInt8})
2622
pos = 1
2723
meta = Dict{String, String}()
2824
if ptr != C_NULL
@@ -49,7 +45,7 @@ function Base.getproperty(x::CArrowSchema, nm::Symbol)
4945
elseif nm === :name
5046
return unsafe_string(getfield(x, :name))
5147
elseif nm === :metadata
52-
return readmetadata(getfield(x, :metadata))
48+
return read_c_arrow_schema_metadata(getfield(x, :metadata))
5349
elseif nm === :flags
5450
return getfield(x, :flags)
5551
elseif nm === :n_children
@@ -64,34 +60,6 @@ function Base.getproperty(x::CArrowSchema, nm::Symbol)
6460
error("unknown property requested: $nm")
6561
end
6662

67-
mutable struct ArrowSchema
68-
format::String
69-
name::String
70-
metadata::Dict{String, String}
71-
flags::Int64
72-
n_children::Int64
73-
children::Vector{ArrowSchema}
74-
dictionary::Union{Nothing, ArrowSchema}
75-
carrowschema::Ref{CArrowSchema}
76-
end
77-
78-
ArrowSchema(s::Ref{CArrowSchema}) = ArrowSchema(s[].format, s[].name, s[].metadata, s[].flags, s[].n_children, map(ArrowSchema, s[].children), s[].dictionary === nothing ? nothing : ArrowSchema(s[].dictionary), s)
79-
ArrowSchema(s::CArrowSchema) = ArrowSchema(s.format, s.name, s.metadata, s.flags, s.n_children, map(ArrowSchema, s.children), s.dictionary === nothing ? nothing : ArrowSchema(s.dictionary), Ref{CArrowSchema}())
80-
81-
function getschema(f)
82-
schref = Ref{CArrowSchema}()
83-
ptr = Base.unsafe_convert(Ptr{CArrowSchema}, schref)
84-
f(ptr)
85-
sch = ArrowSchema(schref)
86-
finalizer(sch) do x
87-
r = getfield(x.carrowschema[], :release)
88-
if r != C_NULL
89-
ccall(r, Cvoid, (Ptr{CArrowSchema},), x.carrowschema)
90-
end
91-
end
92-
return sch
93-
end
94-
9563
struct CArrowArray
9664
length::Int64
9765
null_count::Int64
@@ -131,35 +99,4 @@ function Base.getproperty(x::CArrowArray, nm::Symbol)
13199
return d == C_NULL ? nothing : unsafe_load(d)
132100
end
133101
error("unknown property requested: $nm")
134-
end
135-
136-
mutable struct ArrowArray
137-
length::Int64
138-
null_count::Int64
139-
offset::Int64
140-
n_buffers::Int64
141-
n_children::Int64
142-
buffers::Vector{Ptr{UInt8}}
143-
children::Vector{ArrowArray}
144-
dictionary::Union{Nothing, ArrowArray}
145-
carrowarray::Ref{CArrowArray}
146-
end
147-
148-
ArrowArray(a::Ref{CArrowArray}) = ArrowArray(a[].length, a[].null_count, a[].offset, a[].n_buffers, a[].n_children, a[].buffers, map(ArrowArray, a[].children), a[].dictionary === nothing ? nothing : ArrowArray(a[].dictionary), a)
149-
ArrowArray(a::CArrowArray) = ArrowArray(a.length, a.null_count, a.offset, a.n_buffers, a.n_children, a.buffers, map(ArrowArray, a.children), a.dictionary === nothing ? nothing : ArrowArray(a.dictionary), Ref{CArrowArray}())
150-
151-
function getarray(f)
152-
arrref = Ref{CArrowArray}()
153-
ptr = Base.unsafe_convert(Ptr{CArrowArray}, arrref)
154-
f(ptr)
155-
arr = ArrowArray(arrref)
156-
finalizer(arr) do x
157-
r = getfield(x.carrowarray[], :release)
158-
if r != C_NULL
159-
ccall(r, Cvoid, (Ptr{CArrowArray},), x.carrowarray)
160-
end
161-
end
162-
return arr
163-
end
164-
165-
end # module
102+
end
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings
2+
3+
function get_type_from_format_string(format_string ::String) ::DataType
4+
if format_string == "n"
5+
Nothing
6+
elseif format_string == "b"
7+
Bool
8+
elseif format_string == "c"
9+
Int8
10+
elseif format_string == "C"
11+
UInt8
12+
elseif format_string == "s"
13+
Int16
14+
elseif format_string == "S"
15+
UInt16
16+
elseif format_string == "i"
17+
Int32
18+
elseif format_string == "I"
19+
UInt32
20+
elseif format_string == "l"
21+
Int64
22+
elseif format_string == "L"
23+
UInt64
24+
elseif format_string == "e"
25+
Float16
26+
elseif format_string == "f"
27+
Float32
28+
elseif format_string == "g"
29+
Float64
30+
elseif format_string == "z" || format_string == "Z"
31+
Vector{UInt8}
32+
elseif format_string == "u" || format_string == "U"
33+
String
34+
elseif format_string[1] == 'd'
35+
const splits = split(format_string[3:end], ",")
36+
precision = Int(splits[1])
37+
scale = Int(splits[2])
38+
if length(splits) == 3
39+
bandwidth = splits[3]
40+
end
41+
#TODO return something here
42+
elseif format_string[1] == 'w'
43+
#TODO figure out fixed width binary
44+
elseif format_string[1] == '+'
45+
if format_string[2] == 'l' || format_string[2] == 'L'
46+
Arrow.List
47+
elseif format_string[2] == 'w'
48+
size = Int(format_string[4:end]) #TODO use this somehow
49+
Arrow.FixedSizeList
50+
elseif format_string[2] == 's'
51+
Arrow.Struct
52+
elseif format_string[2] == 'm'
53+
Arrow.Map
54+
elseif format_string[2:3] == "ud"
55+
type_strings = split(format_string[5:end], ",") # todo use this somehow
56+
Arrow.DenseUnion
57+
elseif format_string[2:3] == "us"
58+
type_strings = split(format_string[5:end], ",") # todo use this somehow
59+
Arrow.DenseUnion
60+
end
61+
elseif format_string [1] == 't'
62+
# todo return something here
63+
end
64+
end
65+
66+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
mutable struct ArrowSchema
2+
format::String
3+
name::String
4+
metadata::Dict{String, String}
5+
flags::Int64
6+
n_children::Int64
7+
children::Vector{ArrowSchema}
8+
dictionary::Union{Nothing, ArrowSchema}
9+
carrowschema::Ref{CArrowSchema}
10+
end
11+
12+
ArrowSchema(s::Ref{CArrowSchema}) = ArrowSchema(
13+
s[].format,
14+
s[].name,
15+
s[].metadata,
16+
s[].flags,
17+
s[].n_children,
18+
map(ArrowSchema, s[].children),
19+
s[].dictionary === nothing ? nothing : ArrowSchema(s[].dictionary),
20+
s
21+
)
22+
23+
ArrowSchema(s::CArrowSchema) = ArrowSchema(
24+
s.format,
25+
s.name,
26+
s.metadata,
27+
s.flags,
28+
s.n_children,
29+
map(ArrowSchema, s.children), s.dictionary === nothing ? nothing : ArrowSchema(s.dictionary),
30+
Ref{CArrowSchema}()
31+
)
32+
33+
mutable struct ArrowArray
34+
length::Int64
35+
null_count::Int64
36+
offset::Int64
37+
n_buffers::Int64
38+
n_children::Int64
39+
buffers::Vector{Ptr{UInt8}}
40+
children::Vector{ArrowArray}
41+
dictionary::Union{Nothing, ArrowArray}
42+
c_arrow_array::Ref{CArrowArray}
43+
end
44+
45+
ArrowArray(a::Ref{CArrowArray}) = ArrowArray(
46+
a[].length,
47+
a[].null_count,
48+
a[].offset,
49+
a[].n_buffers,
50+
a[].n_children,
51+
a[].buffers,
52+
map(ArrowArray, a[].children), a[].dictionary === nothing ? nothing : ArrowArray(a[].dictionary),
53+
a
54+
)
55+
56+
ArrowArray(a::CArrowArray) = ArrowArray(
57+
a.length,
58+
a.null_count,
59+
a.offset,
60+
a.n_buffers,
61+
a.n_children,
62+
a.buffers,
63+
map(ArrowArray, a.children),
64+
a.dictionary === nothing ? nothing : ArrowArray(a.dictionary),
65+
Ref{CArrowArray}()
66+
)

0 commit comments

Comments
 (0)