104104
105105# Split a mmCIF line into tokens
106106# See https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for syntax
107- function splitline (s:: AbstractString )
108- tokens = String[]
107+ function splitline! (tokens, s:: AbstractString )
109108 in_token = false
110109 # Quote character of the currently open quote, or ' ' if no quote open
111110 quote_open_char = ' '
@@ -114,7 +113,7 @@ function splitline(s::AbstractString)
114113 if c in whitespacechars
115114 if in_token && quote_open_char == ' '
116115 in_token = false
117- push! (tokens, s[start_i: (i - 1 )])
116+ push! (tokens, @view ( s[start_i: (i - 1 )]) )
118117 end
119118 elseif c in quotechars
120119 if quote_open_char == ' '
@@ -127,7 +126,7 @@ function splitline(s::AbstractString)
127126 elseif c == quote_open_char && (i == length (s) || s[i + 1 ] in whitespacechars)
128127 quote_open_char = ' '
129128 in_token = false
130- push! (tokens, s[start_i: (i - 1 )])
129+ push! (tokens, @view ( s[start_i: (i - 1 )]) )
131130 end
132131 elseif c == ' #' && ! in_token
133132 return tokens
@@ -137,13 +136,14 @@ function splitline(s::AbstractString)
137136 end
138137 end
139138 if in_token
140- push! (tokens, s[start_i: end ])
139+ push! (tokens, @view ( s[start_i: end ]) )
141140 end
142141 if quote_open_char != ' '
143142 throw (ArgumentError (" Line ended with quote open: $s " ))
144143 end
145144 return tokens
146145end
146+ splitline (s:: AbstractString ) = splitline! (String[], s) # mostly for testing
147147
148148# Get tokens from a mmCIF file
149149function tokenizecif (f:: IO )
@@ -162,7 +162,7 @@ function tokenizecif(f::IO)
162162 end
163163 push! (tokens, join (token_buffer, " \n " ))
164164 else
165- append ! (tokens, splitline ( line) )
165+ splitline ! (tokens, line)
166166 end
167167 end
168168 return tokens
@@ -204,7 +204,7 @@ function tokenizecifstructure(f::IO)
204204 in_keys = true
205205 else
206206 in_keys = false
207- append ! (tokens, splitline ( line) )
207+ splitline ! (tokens, line)
208208 end
209209 end
210210 return tokens
@@ -236,7 +236,7 @@ function MMCIFDict(f::IO; gzip::Bool=false)
236236end
237237
238238# Add tokens to a mmCIF dictionary
239- function populatedict! (mmcif_dict:: MMCIFDict , tokens:: AbstractVector{<:String } )
239+ function populatedict! (mmcif_dict:: MMCIFDict , tokens:: AbstractVector{<:AbstractString } )
240240 key = " "
241241 keys = String[]
242242 loop_flag = false
@@ -264,16 +264,8 @@ function populatedict!(mmcif_dict::MMCIFDict, tokens::AbstractVector{<:String})
264264 continue
265265 end
266266 else
267- try
268- push! (mmcif_dict[keys[i % n + 1 ]], token)
269- catch ex
270- # A zero division error means we have not found any keys
271- if isa (ex, DivideError)
272- throw (ArgumentError (" Loop keys not found, token: \" $token \" " ))
273- else
274- rethrow ()
275- end
276- end
267+ iszero (n) && throw (ArgumentError (" Loop keys not found, token: \" $token \" " ))
268+ push! (mmcif_dict[keys[i % n + 1 ]], token)
277269 i += 1
278270 continue
279271 end
@@ -384,25 +376,34 @@ function MolecularStructure(mmcif_dict::MMCIFDict;
384376end
385377
386378# Constructor from mmCIF ATOM/HETATM line
387- AtomRecord (d:: MMCIFDict , i:: Integer ) = AtomRecord (
388- d[" _atom_site.group_PDB" ][i] == " HETATM" ,
389- parse (Int, d[" _atom_site.id" ][i]),
390- d[" _atom_site.auth_atom_id" ][i],
391- d[" _atom_site.label_alt_id" ][i] in missingvals ? ' ' : d[" _atom_site.label_alt_id" ][i][1 ],
392- d[" _atom_site.auth_comp_id" ][i],
393- d[" _atom_site.auth_asym_id" ][i],
394- parse (Int, d[" _atom_site.auth_seq_id" ][i]),
395- d[" _atom_site.pdbx_PDB_ins_code" ][i] in missingvals ? ' ' : d[" _atom_site.pdbx_PDB_ins_code" ][i][1 ],
396- [
397- parse (Float64, d[" _atom_site.Cartn_x" ][i]),
398- parse (Float64, d[" _atom_site.Cartn_y" ][i]),
399- parse (Float64, d[" _atom_site.Cartn_z" ][i])
400- ],
401- d[" _atom_site.occupancy" ][i] in missingvals ? 1.0 : parse (Float64, d[" _atom_site.occupancy" ][i]),
402- d[" _atom_site.B_iso_or_equiv" ][i] in missingvals ? 0.0 : parse (Float64, d[" _atom_site.B_iso_or_equiv" ][i]),
403- d[" _atom_site.type_symbol" ][i] in missingvals ? " " : d[" _atom_site.type_symbol" ][i],
404- d[" _atom_site.pdbx_formal_charge" ][i] in missingvals ? " " : d[" _atom_site.pdbx_formal_charge" ][i],
405- )
379+ function AtomRecord (d:: MMCIFDict , i:: Integer )
380+ alt_id = d[" _atom_site.label_alt_id" ][i]
381+ ins_code = d[" _atom_site.pdbx_PDB_ins_code" ][i]
382+ occupancy = d[" _atom_site.occupancy" ][i]
383+ temp_factor = d[" _atom_site.B_iso_or_equiv" ][i]
384+ typesym = d[" _atom_site.type_symbol" ][i]
385+ charge = d[" _atom_site.pdbx_formal_charge" ][i]
386+
387+ return AtomRecord (
388+ d[" _atom_site.group_PDB" ][i] == " HETATM" ,
389+ parse (Int, d[" _atom_site.id" ][i]),
390+ d[" _atom_site.auth_atom_id" ][i],
391+ alt_id in missingvals ? ' ' : alt_id[1 ],
392+ d[" _atom_site.auth_comp_id" ][i],
393+ d[" _atom_site.auth_asym_id" ][i],
394+ parse (Int, d[" _atom_site.auth_seq_id" ][i]),
395+ ins_code in missingvals ? ' ' : ins_code[1 ],
396+ SVector {3,Float64} ((
397+ parse (Float64, d[" _atom_site.Cartn_x" ][i]),
398+ parse (Float64, d[" _atom_site.Cartn_y" ][i]),
399+ parse (Float64, d[" _atom_site.Cartn_z" ][i]),
400+ )),
401+ occupancy in missingvals ? 1.0 : parse (Float64, occupancy),
402+ temp_factor in missingvals ? 0.0 : parse (Float64, temp_factor),
403+ typesym in missingvals ? " " : typesym,
404+ charge in missingvals ? " " : charge,
405+ )
406+ end
406407
407408# Format a mmCIF data value by enclosing with quotes or semicolon lines where
408409# appropriate. See
0 commit comments