Skip to content

Commit 9fb8b7b

Browse files
cleanup
1 parent c1ee984 commit 9fb8b7b

File tree

1 file changed

+92
-109
lines changed

1 file changed

+92
-109
lines changed

src/bcif.jl

Lines changed: 92 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,17 @@ function Base.read(input::IO,
2727
struc = MolecularStructure(structure_name)
2828

2929
for (i, datablock) in enumerate(file["dataBlocks"])
30-
bcif_dict = BCIFDict(datablock_to_dict(datablock)["_atom_site"])
30+
# could decode the whole file at once, or just decode the _atom_site category
31+
# for efficiency which it is currently doing. Can be changed to get access to the
32+
# rest of the file
33+
34+
decode_all = true
35+
if decode_all
36+
bcif_dict = BCIFDict(datablock_to_dict(datablock)["_atom_site"])
37+
else
38+
bcif_dict = BCIFDict(columns_to_dict(get_category(categories, "_atom_site")))
39+
end
40+
3141
struc[i] = Model(i, struc)
3242
for i in 1:length(bcif_dict["id"])
3343
unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i))
@@ -103,7 +113,88 @@ function get_category(cats::Vector{Any}, name::String)
103113
return cats[idx]
104114
end
105115

116+
117+
# Utility functions for encoding/decoding
118+
function encode_stepwise(data, encodings)
119+
for encoding in encodings
120+
data = encode(encoding, data)
121+
end
122+
return data
123+
end
124+
125+
function decode_stepwise(data, encodings)
126+
for encoding in reverse(encodings)
127+
data = decode(encoding, data)
128+
end
129+
return data
130+
end
131+
132+
function deserialize_numeric_encoding(content::Any)
133+
if isa(content, Vector)
134+
return [deserialize_numeric_encoding(item) for item in content]
135+
end
136+
137+
if isa(content, Encoding)
138+
return content
139+
end
140+
kind = content["kind"]
141+
142+
# if byte convert to integer
143+
for (key, value) in content
144+
content[key] = value isa UInt8 ? Int32(value) : value
145+
end
146+
params = content
147+
148+
encoding_constructors = Dict(
149+
"ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]),
150+
"FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]),
151+
"IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"];
152+
srcType=INT_TO_TYPE[get(params, "srcType", 32)]),
153+
"RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing),
154+
srcType=INT_TO_TYPE[get(params, "srcType", nothing)]),
155+
"Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)],
156+
origin=get(params, "origin", 0)),
157+
"IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"],
158+
srcSize=get(params, "srcSize", nothing),
159+
isUnsigned=get(params, "isUnsigned", false))
160+
)
161+
162+
if haskey(encoding_constructors, kind)
163+
return encoding_constructors[kind]()
164+
else
165+
error("Unknown encoding kind: $kind")
166+
end
167+
end
168+
169+
170+
function decode_column(column::Dict)
171+
column_data = column["data"]
172+
encodings = []
173+
174+
# collect the encodings. If it's a string encoding then it should be a single encoding
175+
# that contains it's own dataEncoding and offsetEncoding which also need to be handled
176+
for enc in column_data["encoding"]
177+
if enc["kind"] == "StringArray"
178+
push!(encodings, StringArrayEncoding(
179+
stringData=enc["stringData"],
180+
dataEncoding=deserialize_numeric_encoding(enc["dataEncoding"]),
181+
offsetEncoding=deserialize_numeric_encoding(enc["offsetEncoding"]),
182+
offsets=enc["offsets"]
183+
))
184+
else
185+
push!(encodings, deserialize_numeric_encoding(enc))
186+
end
187+
end
188+
189+
return decode_stepwise(column_data["data"], encodings)
190+
end
191+
192+
193+
# Below are the encoding and decoding types for BCIF format
194+
106195
# Data types defined for the BCIF encoding by are indicated by integer values
196+
# there are not well discussed in the official spec, had to ask about it excplicitly
197+
# https://github.com/molstar/BinaryCIF/issues/4
107198
@enum TypeCode begin
108199
INT8 = 1
109200
INT16 = 2
@@ -529,111 +620,3 @@ function decode(enc::StringArrayEncoding, data)
529620

530621
return substrings[indices]
531622
end
532-
533-
# Utility functions for encoding/decoding
534-
function encode_stepwise(data, encodings)
535-
for encoding in encodings
536-
data = encode(encoding, data)
537-
end
538-
return data
539-
end
540-
541-
function decode_stepwise(data, encodings)
542-
for encoding in reverse(encodings)
543-
data = decode(encoding, data)
544-
end
545-
return data
546-
end
547-
548-
function deserialize_encoding(content::Any)
549-
if isa(content, Vector)
550-
return [deserialize_encoding(item) for item in content]
551-
end
552-
553-
if isa(content, Encoding)
554-
return content
555-
end
556-
kind = content["kind"]
557-
558-
# if byte convert to integer
559-
for (key, value) in content
560-
content[key] = value isa UInt8 ? Int32(value) : value
561-
end
562-
params = content
563-
564-
# Handle nested encodings
565-
if haskey(params, "data_encoding")
566-
params["data_encoding"] = deserialize_encoding(params["data_encoding"])
567-
end
568-
569-
if haskey(params, "offsetEncoding")
570-
params["offsetEncoding"] = deserialize_encoding(params["offsetEncoding"])
571-
end
572-
573-
encoding_constructors = Dict(
574-
"ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]),
575-
"FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]),
576-
"StringArray" => () -> StringArrayEncoding(
577-
stringData=get(params, "stringData", nothing),
578-
dataEncoding=get(params, "dataEncoding", nothing),
579-
offsetEncoding=get(params, "offsetEncoding", nothing),
580-
offsets=get(params, "offsets", nothing)
581-
),
582-
"IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"];
583-
srcType=INT_TO_TYPE[get(params, "srcType", 32)]),
584-
"RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing),
585-
srcType=INT_TO_TYPE[get(params, "srcType", nothing)]),
586-
"Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)],
587-
origin=get(params, "origin", 0)),
588-
"IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"],
589-
srcSize=get(params, "srcSize", nothing),
590-
isUnsigned=get(params, "isUnsigned", false))
591-
)
592-
593-
if haskey(encoding_constructors, kind)
594-
return encoding_constructors[kind]()
595-
else
596-
error("Unknown encoding kind: $kind")
597-
end
598-
end
599-
600-
601-
function decode_column(column::Dict)
602-
data = column["data"]
603-
encodings = []
604-
605-
# Handle the encoding array properly
606-
for enc in data["encoding"]
607-
if haskey(enc, "dataEncoding")
608-
if haskey(enc, "offsetEncoding")
609-
push!(encodings, StringArrayEncoding(
610-
stringData=enc["stringData"],
611-
dataEncoding=deserialize_encoding(enc["dataEncoding"]),
612-
offsetEncoding=deserialize_encoding(enc["offsetEncoding"]),
613-
offsets=enc["offsets"]
614-
))
615-
else
616-
push!(encodings, deserialize_encoding(enc["dataEncoding"]))
617-
end
618-
else
619-
push!(encodings, deserialize_encoding(enc))
620-
end
621-
end
622-
623-
# Flatten the encodings if needed
624-
flat_encodings = []
625-
for enc in encodings
626-
if enc isa Vector
627-
append!(flat_encodings, enc)
628-
else
629-
push!(flat_encodings, enc)
630-
end
631-
end
632-
633-
634-
# return flat_encodings
635-
636-
decoded = decode_stepwise(data["data"], flat_encodings)
637-
638-
639-
end

0 commit comments

Comments
 (0)