From 42a94bae7d8ab9f99ac49ee0baa226eef504032e Mon Sep 17 00:00:00 2001 From: anton083 Date: Mon, 9 Sep 2024 14:48:58 +0200 Subject: [PATCH] Add missing oxygens when writing, remove unused Zygote extension --- Project.toml | 3 ++- ext/ZygoteExt.jl | 24 ------------------------ src/io/write.jl | 31 +++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 25 deletions(-) delete mode 100644 ext/ZygoteExt.jl diff --git a/Project.toml b/Project.toml index 750440b..5aafb45 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,14 @@ name = "ProteinChains" uuid = "b8e8f2a5-48d3-44f1-ba0d-c71cb7726ff8" authors = ["Anton Oresten and contributors"] -version = "0.1.0" +version = "0.1.1" [deps] AssigningSecondaryStructure = "8ed43e74-60fb-4e11-99b9-91deed37aef7" Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7" BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc" DynamicStructs = "e139c391-eeee-4818-b359-c8725224fb1f" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PeriodicTable = "7b2266bf-644c-5ea3-82d8-af4bbd25a884" [compat] diff --git a/ext/ZygoteExt.jl b/ext/ZygoteExt.jl deleted file mode 100644 index 28ceef4..0000000 --- a/ext/ZygoteExt.jl +++ /dev/null @@ -1,24 +0,0 @@ -module ZygoteExt - -using ProteinChains - -import Zygote - -function idealize!(chain::ProteinChain; ideal::BackboneGeometry=BackboneGeometry()) - backbone = Backboner.Backbone(chain.backbone) - T = eltype(backbone.coords) - ideal_lengths = T[ideal.N_Ca_length, ideal.Ca_C_length, ideal.C_N_length] - ideal_angles = T[ideal.N_Ca_C_length, ideal.Ca_C_N_length, ideal.C_N_Ca_length] - new_backbone = Backboner.idealize(backbone, ideal_lengths, ideal_angles) - chain.backbone = new_backbone - return chain -end - -function idealize!(structure::ProteinStructure; kwargs...) - for chain in structure - idealize!(chain; kwargs...) - end - return structure -end - -end diff --git a/src/io/write.jl b/src/io/write.jl index 5334333..8f7d7ad 100644 --- a/src/io/write.jl +++ b/src/io/write.jl @@ -1,8 +1,29 @@ +using LinearAlgebra: cross, normalize + +using AssigningSecondaryStructure: get_oxygen_positions + +function estimate_last_oxygen_position(backbone::Array{T,3}) where T<:Real + N_pos, Ca_pos, C_pos = eachcol(backbone[:,:,end]) + angle = 2π/3 + bond_length = 1.2 + v = Ca_pos - C_pos + w = cross(v, Ca_pos - N_pos) + u = cross(w, v) + O_pos = C_pos + normalize(u)*bond_length*cos(angle - 0.5π) - normalize(v)*bond_length*sin(angle - 0.5π) + return O_pos +end + function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Model) numbering = hasproperty(proteinchain, :numbering) ? proteinchain.numbering : collect(1:countresidues(proteinchain)) residue_list = Vector{String}() residues = Dict{String, BioStructures.AbstractResidue}() chain = BioStructures.Chain(proteinchain.id, residue_list, residues, model) + + # some visualization tools require oxygen atoms to be present, so these get used if a residue is missing the oxygen atom + oxygen_name = encode_atom_name("O", "O") + oxygen_coords = get_oxygen_positions(proteinchain.backbone) + oxygen_coords[:,end] = estimate_last_oxygen_position(proteinchain.backbone) + atom_serial = 0 for residue_index in 1:countresidues(proteinchain) atom_list = Vector{String}() @@ -10,6 +31,7 @@ function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Mo resname = threeletter_resname(proteinchain.sequence[residue_index]) number = numbering[residue_index] residue = BioStructures.Residue(resname, number, ' ', false, atom_list, atoms, chain, '-') # TODO: secondary structure + for (i, (atom_name, element)) in enumerate(zip(BACKBONE_ATOM_NAMES, BACKBONE_ATOM_SYMBOLS)) atom_serial += 1 coords = proteinchain.backbone[:, i, residue_index] @@ -17,6 +39,15 @@ function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Mo push!(atom_list, atom.name) atoms[atom.name] = atom end + + if !any(atom -> atom.atom_name == oxygen_name, proteinchain.atoms[residue_index]) + atom_serial += 1 + coords = oxygen_coords[:, residue_index] + atom = BioStructures.Atom(atom_serial, "O", ' ', coords, 1.0, 0.0, "O", " ", residue) + push!(atom_list, atom.name) + atoms[atom.name] = atom + end + for atom in proteinchain.atoms[residue_index] atom_serial += 1 atom_name = decode_atom_name(atom.atom_name)