diff --git a/src/ProteinChains.jl b/src/ProteinChains.jl index ec7971e..000ff24 100644 --- a/src/ProteinChains.jl +++ b/src/ProteinChains.jl @@ -4,19 +4,13 @@ using Backboner using Compat: @compat -include("ideal.jl") -export BackboneGeometry -export IdealResidue, STANDARD_RESIDUE -export append_residue, prepend_residue - include("atom.jl") export Atom -@compat public (atom_name, atom_number, atom_coords, atom_symbol) +@compat public (atom_name, atom_number, atom_coords, atom_symbol, atom_mass) include("properties.jl") -export StandardProperty, IndexableProperty +export AbstractProperty, StandardProperty, IndexableProperty export setproperties, addproperties, removeproperties -@compat public AbstractProperty include("chain.jl") export ProteinChain diff --git a/src/chain.jl b/src/chain.jl index b11fa41..3f13444 100644 --- a/src/chain.jl +++ b/src/chain.jl @@ -58,16 +58,16 @@ end Base.length(chain::ProteinChain) = length(chain.atoms) -Base.getproperty(chain::ProteinChain, name::Symbol) = - name in fieldnames(ProteinChain) ? getfield(chain, name) : unpack(getfield(getfield(chain, :properties), name)) - -Base.propertynames(chain::ProteinChain, private::Bool=false) = (setdiff(fieldnames(ProteinChain), private ? () : (:properties,))..., propertynames(chain.properties)...) - function Base.getindex(chain::ProteinChain, i::Union{AbstractVector,Colon}) properties = map(p -> p[i], chain.properties) ProteinChain(chain.id, chain.atoms[i], chain.sequence[i], chain.numbering[i], properties) end +Base.getproperty(chain::ProteinChain, name::Symbol) = + name in fieldnames(ProteinChain) ? getfield(chain, name) : unpack(getfield(getfield(chain, :properties), name)) + +Base.propertynames(chain::ProteinChain, private::Bool=false) = (setdiff(fieldnames(ProteinChain), private ? () : (:properties,))..., propertynames(chain.properties)...) + setproperties(chain::ProteinChain, ps::NamedTuple) = ProteinChain(chain.id, chain.atoms, chain.sequence, chain.ins_codes, chain.numbering, setproperties(chain.properties, ps)) diff --git a/src/ideal.jl b/src/ideal.jl deleted file mode 100644 index 1052879..0000000 --- a/src/ideal.jl +++ /dev/null @@ -1,110 +0,0 @@ -""" - BackboneGeometry(; - N_Ca_length = 1.46, - Ca_C_length = 1.52, - C_N_length = 1.33, - - N_Ca_C_angle = 1.94, - Ca_C_N_angle = 2.03, - C_N_Ca_angle = 2.13, - ) - -Define the idealized bond lengths and bond angles of a protein backbone. -""" -Base.@kwdef struct BackboneGeometry - N_Ca_length = 1.46 - Ca_C_length = 1.52 - C_N_length = 1.33 - - N_Ca_C_angle = 1.94 - Ca_C_N_angle = 2.04 - C_N_Ca_angle = 2.13 -end - -const DEFAULT_BACKBONE_GEOMETRY = BackboneGeometry() - -""" - IdealResidue{T<:AbstractFloat} <: AbstractMatrix{T} - - IdealResidue{T}(backbone_geometry=DEFAULT_BACKBONE_GEOMETRY; template=nothing) where T - -A 3x3 matrix representing the idealized geometry of a protein residue, with columns representing -the N, Ca, and C atom positions of a residue positioned at the origin. -""" -struct IdealResidue{T<:AbstractFloat} <: AbstractMatrix{T} - backbone_geometry::BackboneGeometry - N_Ca_C_coords::Matrix{T} -end - -function IdealResidue{T}(backbone_geometry::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY; template=nothing) where T - N_Ca_C_coords = Matrix{T}(undef, 3, 3) - N_pos, Ca_pos, C_pos = eachcol(N_Ca_C_coords) - Θ = backbone_geometry.N_Ca_C_angle - π/2 - N_pos .= [0, 0, 0] - Ca_pos .= [backbone_geometry.N_Ca_length, 0, 0] - C_pos .= Ca_pos + backbone_geometry.Ca_C_length * [sin(Θ), cos(Θ), 0] - N_Ca_C_coords .-= Backboner.centroid(N_Ca_C_coords; dims=2) - if !isnothing(template) - wanted_orientation, current_offset, wanted_offset = Backboner.kabsch_algorithm(N_Ca_C_coords, template) - N_Ca_C_coords .= wanted_orientation * (N_Ca_C_coords .- current_offset) .+ wanted_offset - end - IdealResidue{T}(backbone_geometry, N_Ca_C_coords) -end - -Base.size(::IdealResidue) = (3,3) -Base.getindex(ideal_residue::IdealResidue, args...) = ideal_residue.N_Ca_C_coords[args...] - -""" - STANDARD_RESIDUE_TEMPLATE - -This is a template of a "standard residue", with a very specific and -distinct shape, size, and orientation. which needs to be consistent if we want to -represent protein structures as sets of residue rotations and translations. - -Thus, we can use this residue as a template for aligning other residues with very precise -geometry to it. - -```jldocotest -julia> IdealResidue{Float64}(BackboneGeometry(N_Ca_C_angle = 1.93); template=ProteinChains.STANDARD_RESIDUE_TEMPLATE) -3×3 IdealResidue{Float64}: - -1.06447 -0.199174 1.26364 - 0.646303 -0.529648 -0.116655 - 0.0 0.0 0.0 -``` -""" -const STANDARD_RESIDUE_TEMPLATE = [ - -1.066 -0.200 1.266; - 0.645 -0.527 -0.118; - 0.000 0.000 0.000; -] # N Ca C - -const STANDARD_RESIDUE = IdealResidue{Float64}(DEFAULT_BACKBONE_GEOMETRY; template=STANDARD_RESIDUE_TEMPLATE) - -""" - append_residue(Backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY) - -Create a new backbone by appending 3 new torsion angles (ψ, ω, ϕ) at the end, using bond lengths and bond angles specified in `BackboneGeometry`. -""" -function append_residue(backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY) - length(torsion_angles) == 3 || throw(ArgumentError("length of `torsion_angles` must be 3, as only 3 backbone atoms are introduced.")) - bond_lengths = [ideal.C_N_length, ideal.N_Ca_length, ideal.Ca_C_length] - bond_angles = [ideal.Ca_C_N_angle, ideal.C_N_Ca_angle, ideal.N_Ca_C_angle] - append_bonds(backbone, Float64.(bond_lengths), Float64.(bond_angles), Float64.(torsion_angles)) -end - -""" - append_residue(Backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY) - -Create a new backbone by prepending 3 new torsion angles (ψ, ω, ϕ) at the beginning, using bond lengths and bond angles specified in the `BackboneGeometry`. - -!!! note - The torsion angle order is the same as it would be when appending. The order is *not* reversed. -""" -function prepend_residue(backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY) - length(torsion_angles) == 3 || throw(ArgumentError("length of `torsion_angles` must be 3, as only 3 backbone atoms are introduced.")) - bond_lengths = [ideal.N_Ca_length, ideal.Ca_C_length, ideal.C_N_length] - bond_angles = [ideal.N_Ca_C_angle, ideal.Ca_C_N_angle, ideal.C_N_Ca_angle] - return prepend_bonds(backbone, Float64.(bond_lengths), Float64.(bond_angles), Float64.(torsion_angles)) -end - -function idealize! end diff --git a/src/io/io.jl b/src/io/io.jl index a5cdb8c..2fe10e7 100644 --- a/src/io/io.jl +++ b/src/io/io.jl @@ -14,7 +14,6 @@ const pdbextension_to_format = Dict(ext => format for (format, ext) in BioStruct get_format(path::AbstractString) = get(pdbextension_to_format, lowercase(last(splitext(path))[2:end]), PDBFormat) -include("renumber.jl") include("read.jl") include("write.jl") include("download.jl") diff --git a/src/io/renumber.jl b/src/io/renumber.jl deleted file mode 100644 index 9124b71..0000000 --- a/src/io/renumber.jl +++ /dev/null @@ -1,49 +0,0 @@ -const missingvals = Set([".", "?"]) - -function _renumber(structure::ProteinStructure, mmcif_dict::BioStructures.MMCIFDict) - label_seq_ids = mmcif_dict["_atom_site.label_seq_id"] - pdbx_PDB_ins_codes = mmcif_dict["_atom_site.pdbx_PDB_ins_code"] - auth_seq_ids = mmcif_dict["_atom_site.auth_seq_id"] - auth_asym_ids = mmcif_dict["_atom_site.auth_asym_id"] - - label_seq_id_dict = Dict{String,String}() - - for asym_id in unique(auth_asym_ids) - indices = findall(auth_asym_ids .== asym_id) - for i in indices - ins_code = pdbx_PDB_ins_codes[i] == "?" ? " " : pdbx_PDB_ins_codes[i] - key = asym_id*auth_seq_ids[i]*ins_code - !haskey(label_seq_id_dict, key) && (label_seq_id_dict[key] = label_seq_ids[i]) - end - end - - chainwise_renumbering = Vector{Int32}[] - for chain in structure - if length(chain) == 0 - push!(chainwise_renumbering, Int32[]) - continue - end - renumbering_str = map( - (resnum, ins_code) -> parse(Int32, get(label_seq_id_dict, chain.id*string(resnum)*ins_code, "-1")), - chain.numbering, chain.ins_codes) - push!(chainwise_renumbering, renumbering_str) - end - - return chainwise_renumbering -end - -""" - renumber(structure::ProteinStructure, mmcif_dict::BioStructures.MMCIFDict) - -Return residue numbers from "_atom_site.label_seq_ids". - -The `ProteinStructure` will automatically add a `renumbering` property if -an MMCIFDict is passed (default if the file is an MMCIF). -""" -function renumber(structure::ProteinStructure, mmcifdict) - chainwise_renumbering = _renumber(structure, mmcifdict) - for (i, (chain, renumbering)) in enumerate(zip(structure, chainwise_renumbering)) - structure[i] = addproperties(chain; renumbering) - end - return structure -end \ No newline at end of file diff --git a/src/structure.jl b/src/structure.jl index 7d4edbb..808da5b 100644 --- a/src/structure.jl +++ b/src/structure.jl @@ -63,11 +63,19 @@ function map_atoms!(f::Function, structure::ProteinStructure, args...) return structure end +Base.getproperty(structure::ProteinStructure, name::Symbol) = + name in fieldnames(ProteinStructure) ? getfield(structure, name) : unpack(getfield(getfield(structure, :properties), name)) + +Base.propertynames(structure::ProteinStructure, private::Bool=false) = (setdiff(fieldnames(ProteinStructure), private ? () : (:properties,))..., propertynames(structure.properties)...) + setproperties(structure::ProteinStructure, properties::NamedTuple) = ProteinStructure(structure.name, structure.atoms, structure.chains, setproperties(structure.properties, properties)) addproperties(structure::ProteinStructure, properties::NamedTuple) = setproperties(structure, addproperties(structure.properties, properties)) +addproperties(structure::ProteinStructure; properties...) = + setproperties(structure, addproperties(structure.properties, NamedTuple(properties))) + removeproperties(structure::ProteinStructure, names::Symbol...) = setproperties(structure, removeproperties(structure.properties, names...)) diff --git a/test/runtests.jl b/test/runtests.jl index 9f638e9..9cc860c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -90,58 +90,6 @@ using Test end end - # See https://proteopedia.org/wiki/index.php/Unusual_sequence_numbering - @testset "Unusual numbering" begin - - @testset "Arbitrary Numbering" begin - structure = ProteinChains.renumber(pdb"1BSZ", mmcifdict"1BSZ") - @test !allequal(chain -> chain.numbering, structure) - @test allequal(chain -> chain.renumbering, structure) - end - - @testset "N-Terminal Residues Missing Coordinates" begin - structure = ProteinChains.renumber(pdb"1D66", mmcifdict"1D66") - chain = structure["A"] - @test chain.numbering[begin] == 8 - @test chain.renumbering[begin] == 8 - end - - @testset "Starts With Zero Or Negative Numbers" begin - structure = ProteinChains.renumber(pdb"1BXW", mmcifdict"1BXW") - chain = structure["A"] - @test chain.numbering[begin] == 0 - @test chain.renumbering[begin] == 1 - - structure = ProteinChains.renumber(pdb"1D5T", mmcifdict"1D5T") - chain = structure["A"] - @test chain.numbering[begin] == -2 - @test chain.renumbering[begin] == 1 - end - - @testset "Insertion Codes" begin - structure = ProteinChains.renumber(pdb"1IGY", mmcifdict"1IGY") - chain = structure["B"] - @test count(==(82), chain.numbering) == 4 - @test allunique(chain.renumbering) - @test Set(collect(chain.ins_codes)) == Set([' ', 'A', 'B', 'C']) - end - - @testset "Skipping Sequence Numbers" begin - structure = ProteinChains.renumber(pdb"2ACE", mmcifdict"2ACE") - chain = structure["A"] - @test any(!isone, chain.numbering[begin+1:end] - chain.numbering[begin:end-1]) - @test chain.numbering == chain.renumbering - end - - @testset "Not Monotonic" begin - structure = ProteinChains.renumber(pdb"1NSA", mmcifdict"1NSA") - chain = structure["A"] - @test issorted(chain.numbering) # BioStructures sorts automatically with `fixlists!`, so residues are in wrong order - @test !issorted(chain.renumbering) # the renumbering is not monotonic. it would be if BioStructures didn't sort. - end - - end - end @testset "store" begin