|
| 1 | +# Running this file generates src/bonding.jl, which contains the atomtypes and residuedata dictionaries. |
| 2 | +# Thanks to OpenMM for the ff14SB force field XML file. |
| 3 | + |
| 4 | +using Downloads |
| 5 | + |
| 6 | +if !isfile(joinpath(@__DIR__, "protein.ff14SB.xml")) |
| 7 | + Downloads.download("https://raw.githubusercontent.com/openmm/openmm/refs/heads/master/wrappers/python/openmm/app/data/amber14/protein.ff14SB.xml", "protein.ff14SB.xml") |
| 8 | +end |
| 9 | + |
| 10 | +function parsexmblock(f, io::IO, key) |
| 11 | + while !eof(io) |
| 12 | + line = strip(readline(io)) |
| 13 | + line == key && return nothing |
| 14 | + f(line) |
| 15 | + end |
| 16 | +end |
| 17 | + |
| 18 | +function parsestring(str) |
| 19 | + @assert startswith(str, '"') |
| 20 | + @assert endswith(str, '"') |
| 21 | + return String(str[2:end-1]) |
| 22 | +end |
| 23 | + |
| 24 | +function parsexmlline(f, line, tag, keyname) |
| 25 | + @assert startswith(line, "<$tag ") |
| 26 | + @assert endswith(line, "/>") |
| 27 | + kv = split(strip(line[length(tag)+2:end-2]), ' ') |
| 28 | + key = "" |
| 29 | + vals = Pair{Symbol,Any}[] |
| 30 | + for kvp in kv |
| 31 | + k, v = split(kvp, '=') |
| 32 | + if k == keyname |
| 33 | + key = parsestring(v) |
| 34 | + else |
| 35 | + push!(vals, Symbol(k) => f(k, v)) |
| 36 | + end |
| 37 | + end |
| 38 | + return key => (; vals...) |
| 39 | +end |
| 40 | + |
| 41 | + |
| 42 | +atomtypes, residues = open("protein.ff14SB.xml", "r") do io |
| 43 | + line = readline(io) |
| 44 | + @assert line == "<ForceField>" |
| 45 | + atomtypes = Dict{String, @NamedTuple{element::String, mass::Float32, name::String}}() |
| 46 | + residues = Dict{String, @NamedTuple{atoms::Dict{String, @NamedTuple{charge::Float32, type::String}}, bonds::Vector{Tuple{String,String}}, externalbonds::Vector{String}}}() |
| 47 | + parsexmblock(io, "</ForceField>") do line |
| 48 | + if line == "<AtomTypes>" |
| 49 | + parsexmblock(io, "</AtomTypes>") do line |
| 50 | + push!(atomtypes, parsexmlline(line, "Type", "class") do k, v |
| 51 | + if k == "element" |
| 52 | + return parsestring(v) |
| 53 | + elseif k == "mass" |
| 54 | + return parse(Float32, v[2:end-1]) # strip the quotes |
| 55 | + elseif k == "name" |
| 56 | + return parsestring(v) |
| 57 | + else |
| 58 | + error("Unknown AtomType key $k") |
| 59 | + end |
| 60 | + end) |
| 61 | + end |
| 62 | + elseif line == "<Residues>" |
| 63 | + parsexmblock(io, "</Residues>") do line |
| 64 | + if startswith(line, "<Residue name=") |
| 65 | + resname = parsestring(line[15:end-1]) |
| 66 | + atoms = Dict{String, @NamedTuple{charge::Float32, type::String}}() |
| 67 | + bonds = Vector{Tuple{String,String}}() |
| 68 | + externalbonds = Vector{String}() |
| 69 | + parsexmblock(io, "</Residue>") do line |
| 70 | + if startswith(line, "<Atom") |
| 71 | + push!(atoms, parsexmlline(line, "Atom", "name") do k, v |
| 72 | + if k == "charge" |
| 73 | + return parse(Float32, v[2:end-1]) # strip the quotes |
| 74 | + elseif k == "type" |
| 75 | + return parsestring(v) |
| 76 | + else |
| 77 | + error("Unknown Atom key $k") |
| 78 | + end |
| 79 | + end) |
| 80 | + elseif startswith(line, "<Bond") |
| 81 | + line = line[6:end-2] |
| 82 | + a1, a2 = split(strip(line), ' ') |
| 83 | + push!(bonds, (only(match(r"atomName1=\"(.*)\"", a1).captures), only(match(r"atomName2=\"(.*)\"", a2).captures))) |
| 84 | + elseif startswith(line, "<ExternalBond") |
| 85 | + line = line[14:end-2] |
| 86 | + push!(externalbonds, only(match(r"atomName=\"(.*)\"", line).captures)) |
| 87 | + else |
| 88 | + error("Unknown Residue line $line") |
| 89 | + end |
| 90 | + end |
| 91 | + residues[resname] = (; atoms, bonds, externalbonds) |
| 92 | + else |
| 93 | + error("Unknown Residues line $line") |
| 94 | + end |
| 95 | + end |
| 96 | + end |
| 97 | + end |
| 98 | + atomtypes, residues |
| 99 | +end |
| 100 | + |
| 101 | +open(joinpath(dirname(@__DIR__), "src", "bonding.jl"), "w") do io |
| 102 | + println(io, "const atomtypes = Dict{String, @NamedTuple{element::String, mass::Float32, name::String}}(") |
| 103 | + at = sort!(collect(atomtypes); by=first) |
| 104 | + for pr in at |
| 105 | + println(io, " ", pr, ',') |
| 106 | + end |
| 107 | + println(io, ")\n") |
| 108 | + |
| 109 | + println(io, "const RDADict = Dict{String, @NamedTuple{charge::Float32, type::String}}") |
| 110 | + |
| 111 | + println(io, "const residuedata = Dict{String, @NamedTuple{atoms::RDADict, bonds::Vector{Tuple{String,String}}, externalbonds::Vector{String}}}(") |
| 112 | + rd = sort!(collect(residues); by=first) |
| 113 | + for (k, v) in rd |
| 114 | + print(io, " "*" "^(4-length(k))) |
| 115 | + show(io, k) |
| 116 | + println(io, " => (atoms = ", replace(sprint(show, v.atoms), "Dict{String, @NamedTuple{charge::Float32, type::String}}" => "RDADict"), ',') |
| 117 | + println(io, " bonds = ", v.bonds, ",") |
| 118 | + println(io, " externalbonds = ", v.externalbonds, "),") |
| 119 | + end |
| 120 | + println(io, ")") |
| 121 | +end |
0 commit comments