@@ -27,7 +27,17 @@ function Base.read(input::IO,
2727 struc = MolecularStructure (structure_name)
2828
2929 for (i, datablock) in enumerate (file[" dataBlocks" ])
30- bcif_dict = BCIFDict (datablock_to_dict (datablock)[" _atom_site" ])
30+ # could decode the whole file at once, or just decode the _atom_site category
31+ # for efficiency which it is currently doing. Can be changed to get access to the
32+ # rest of the file
33+
34+ decode_all = true
35+ if decode_all
36+ bcif_dict = BCIFDict (datablock_to_dict (datablock)[" _atom_site" ])
37+ else
38+ bcif_dict = BCIFDict (columns_to_dict (get_category (categories, " _atom_site" )))
39+ end
40+
3141 struc[i] = Model (i, struc)
3242 for i in 1 : length (bcif_dict[" id" ])
3343 unsafe_addatomtomodel! (struc[1 ], AtomRecord (bcif_dict, i))
@@ -103,7 +113,88 @@ function get_category(cats::Vector{Any}, name::String)
103113 return cats[idx]
104114end
105115
116+
117+ # Utility functions for encoding/decoding
118+ function encode_stepwise (data, encodings)
119+ for encoding in encodings
120+ data = encode (encoding, data)
121+ end
122+ return data
123+ end
124+
125+ function decode_stepwise (data, encodings)
126+ for encoding in reverse (encodings)
127+ data = decode (encoding, data)
128+ end
129+ return data
130+ end
131+
132+ function deserialize_numeric_encoding (content:: Any )
133+ if isa (content, Vector)
134+ return [deserialize_numeric_encoding (item) for item in content]
135+ end
136+
137+ if isa (content, Encoding)
138+ return content
139+ end
140+ kind = content[" kind" ]
141+
142+ # if byte convert to integer
143+ for (key, value) in content
144+ content[key] = value isa UInt8 ? Int32 (value) : value
145+ end
146+ params = content
147+
148+ encoding_constructors = Dict (
149+ " ByteArray" => () -> ByteArrayEncoding (INT_TO_TYPE[get (params, " type" , nothing )]),
150+ " FixedPoint" => () -> FixedPointEncoding (params[" factor" ]; srcType= INT_TO_TYPE[get (params, " srcType" , FLOAT32)]),
151+ " IntervalQuantization" => () -> IntervalQuantizationEncoding (params[" min" ], params[" max" ], params[" numSteps" ];
152+ srcType= INT_TO_TYPE[get (params, " srcType" , 32 )]),
153+ " RunLength" => () -> RunLengthEncoding (srcSize= get (params, " srcSize" , nothing ),
154+ srcType= INT_TO_TYPE[get (params, " srcType" , nothing )]),
155+ " Delta" => () -> DeltaEncoding (srcType= INT_TO_TYPE[get (params, " srcType" , nothing )],
156+ origin= get (params, " origin" , 0 )),
157+ " IntegerPacking" => () -> IntegerPackingEncoding (params[" byteCount" ],
158+ srcSize= get (params, " srcSize" , nothing ),
159+ isUnsigned= get (params, " isUnsigned" , false ))
160+ )
161+
162+ if haskey (encoding_constructors, kind)
163+ return encoding_constructors[kind]()
164+ else
165+ error (" Unknown encoding kind: $kind " )
166+ end
167+ end
168+
169+
170+ function decode_column (column:: Dict )
171+ column_data = column[" data" ]
172+ encodings = []
173+
174+ # collect the encodings. If it's a string encoding then it should be a single encoding
175+ # that contains it's own dataEncoding and offsetEncoding which also need to be handled
176+ for enc in column_data[" encoding" ]
177+ if enc[" kind" ] == " StringArray"
178+ push! (encodings, StringArrayEncoding (
179+ stringData= enc[" stringData" ],
180+ dataEncoding= deserialize_numeric_encoding (enc[" dataEncoding" ]),
181+ offsetEncoding= deserialize_numeric_encoding (enc[" offsetEncoding" ]),
182+ offsets= enc[" offsets" ]
183+ ))
184+ else
185+ push! (encodings, deserialize_numeric_encoding (enc))
186+ end
187+ end
188+
189+ return decode_stepwise (column_data[" data" ], encodings)
190+ end
191+
192+
193+ # Below are the encoding and decoding types for BCIF format
194+
106195# Data types defined for the BCIF encoding by are indicated by integer values
196+ # there are not well discussed in the official spec, had to ask about it excplicitly
197+ # https://github.com/molstar/BinaryCIF/issues/4
107198@enum TypeCode begin
108199 INT8 = 1
109200 INT16 = 2
@@ -529,111 +620,3 @@ function decode(enc::StringArrayEncoding, data)
529620
530621 return substrings[indices]
531622end
532-
533- # Utility functions for encoding/decoding
534- function encode_stepwise (data, encodings)
535- for encoding in encodings
536- data = encode (encoding, data)
537- end
538- return data
539- end
540-
541- function decode_stepwise (data, encodings)
542- for encoding in reverse (encodings)
543- data = decode (encoding, data)
544- end
545- return data
546- end
547-
548- function deserialize_encoding (content:: Any )
549- if isa (content, Vector)
550- return [deserialize_encoding (item) for item in content]
551- end
552-
553- if isa (content, Encoding)
554- return content
555- end
556- kind = content[" kind" ]
557-
558- # if byte convert to integer
559- for (key, value) in content
560- content[key] = value isa UInt8 ? Int32 (value) : value
561- end
562- params = content
563-
564- # Handle nested encodings
565- if haskey (params, " data_encoding" )
566- params[" data_encoding" ] = deserialize_encoding (params[" data_encoding" ])
567- end
568-
569- if haskey (params, " offsetEncoding" )
570- params[" offsetEncoding" ] = deserialize_encoding (params[" offsetEncoding" ])
571- end
572-
573- encoding_constructors = Dict (
574- " ByteArray" => () -> ByteArrayEncoding (INT_TO_TYPE[get (params, " type" , nothing )]),
575- " FixedPoint" => () -> FixedPointEncoding (params[" factor" ]; srcType= INT_TO_TYPE[get (params, " srcType" , FLOAT32)]),
576- " StringArray" => () -> StringArrayEncoding (
577- stringData= get (params, " stringData" , nothing ),
578- dataEncoding= get (params, " dataEncoding" , nothing ),
579- offsetEncoding= get (params, " offsetEncoding" , nothing ),
580- offsets= get (params, " offsets" , nothing )
581- ),
582- " IntervalQuantization" => () -> IntervalQuantizationEncoding (params[" min" ], params[" max" ], params[" numSteps" ];
583- srcType= INT_TO_TYPE[get (params, " srcType" , 32 )]),
584- " RunLength" => () -> RunLengthEncoding (srcSize= get (params, " srcSize" , nothing ),
585- srcType= INT_TO_TYPE[get (params, " srcType" , nothing )]),
586- " Delta" => () -> DeltaEncoding (srcType= INT_TO_TYPE[get (params, " srcType" , nothing )],
587- origin= get (params, " origin" , 0 )),
588- " IntegerPacking" => () -> IntegerPackingEncoding (params[" byteCount" ],
589- srcSize= get (params, " srcSize" , nothing ),
590- isUnsigned= get (params, " isUnsigned" , false ))
591- )
592-
593- if haskey (encoding_constructors, kind)
594- return encoding_constructors[kind]()
595- else
596- error (" Unknown encoding kind: $kind " )
597- end
598- end
599-
600-
601- function decode_column (column:: Dict )
602- data = column[" data" ]
603- encodings = []
604-
605- # Handle the encoding array properly
606- for enc in data[" encoding" ]
607- if haskey (enc, " dataEncoding" )
608- if haskey (enc, " offsetEncoding" )
609- push! (encodings, StringArrayEncoding (
610- stringData= enc[" stringData" ],
611- dataEncoding= deserialize_encoding (enc[" dataEncoding" ]),
612- offsetEncoding= deserialize_encoding (enc[" offsetEncoding" ]),
613- offsets= enc[" offsets" ]
614- ))
615- else
616- push! (encodings, deserialize_encoding (enc[" dataEncoding" ]))
617- end
618- else
619- push! (encodings, deserialize_encoding (enc))
620- end
621- end
622-
623- # Flatten the encodings if needed
624- flat_encodings = []
625- for enc in encodings
626- if enc isa Vector
627- append! (flat_encodings, enc)
628- else
629- push! (flat_encodings, enc)
630- end
631- end
632-
633-
634- # return flat_encodings
635-
636- decoded = decode_stepwise (data[" data" ], flat_encodings)
637-
638-
639- end
0 commit comments