# Copyright 2008 Nanorex, Inc. See LICENSE file for details. """ Protein.py -- Protein class implementation. The Protein class stores protein-related information and provides several methods used by Protein display style and Rosetta simulations. Currently, the protein objects can be created by reading a protein structure from PDB file or by using Peptide Builder. @author: Piotr @version: $Id$ @copyright: 2008 Nanorex, Inc. See LICENSE file for details. History: piotr 080707: Created a preliminary version of the Protein class. piotr 081908: Completed docstrings and documented better the code. piotr 082008: Better documentation of this file; code re-factoring. To do: - get_id_for_aa(): Add flag options for customizing the description(s) returned for aa_ids. """ import foundation.env as env from utilities.debug_prefs import debug_pref, Choice_boolean_False from utilities.prefs_constants import rosetta_backrub_enabled_prefs_key from protein.model.Residue import Residue from protein.model.Residue import SS_HELIX, SS_STRAND, SS_COIL, SS_TURN from protein.model.Residue import AA_3_TO_1, NUC_3_TO_1 # Utility methods used by PDB reading code, perhaps they should be moved # to files_pdb.py def is_water(resName): """ Check if a PDB residue is a water molecule. @param resName: PDB residue name (3-letter string) @type resName: string """ water_codes = ["H2O", "HHO", "OHH", "HOH", "OH2", "SOL", "WAT", "DOD", "DOH", "HOD", "D2O", "DDO", "ODD", "OD2", "HO1", "HO2", "HO3", "HO4"] if resName[:3] in water_codes: return True return False def is_amino_acid(resName): """ Check if a PDB residue is an amino acid. @param resName: PDB residue name (3-letter string) @type resName: string """ if AA_3_TO_1.has_key(resName[:3]): return True return False def is_nucleotide(resName): """ Check if a PDB residue is a nucleotide. @param resName: PDB residue name (3-letter string) @type resName: string """ if NUC_3_TO_1.has_key(resName[:3]): return True return False class Protein: """ This class implements a protein model in NanoEngineer-1. See module docstring for more info. """ # This class was implemented by Piotr in July 2008. def __init__(self): # Dictionary that maps residue names to residue objects. self.residues = {} # Ordered list of amino acids. self.residues_list = [] # Ordered list of alpha carbon atoms. It should contain at least one atom. # This could be probably pre-calculated every time # [I can't tell what that last sentence means -- every time that what occurs? # motivation? -- bruce 080828] self.ca_atom_list = [] # Single-character chain identifier. self.chainId = '' # PDB identifier of the protein, typically, a four character string # in the following format: "1abc" self.pdbId = "" # Index of "current" amino acid (used by Edit Protein command). self.current_aa_idx = 0 # Display list for expanded rotamers, used by ProteinChunk.draw_realtime # The purpose of this list is to speed up drawing of "expanded" rotamers, # and avoid re-generating protein models in reduced display style # if a modification is made to side chain atoms and not to backbone # atoms (e.g. if a side chain chi angle was edited). # The idea is to allow for simultaneous displaying of atomistic # (for expanded rotamers) and reduced (for backbone) representations. # This implementation is wrong, as the display list is never properly # deleted. This attribute could be moved to Chunk class, or implemented # in a different way, so the list is deleted in current OpenGL context. # Potentially, current implementation can cause memory leaks by not # deleting display lists. self.residues_dl = None def set_chain_id(self, chainId): """ Sets a single letter chain ID. @param chainId: chain ID of current protein @type chainId: character """ self.chainId = chainId def get_chain_id(self): """ Gets a single letter chain ID. @return: chain ID of current protein (character) """ return self.chainId def set_pdb_id(self, pdbId): """ Set a four-letter PDB identifier. @param pdbId: PDB ID of current protein @type pdbId: string (four characters) """ self.pdbId = pdbId def get_pdb_id(self): """ Return a four-letter PDB identifier. @return: PDB ID of current protein (four-character string) """ return self.pdbId def add_pdb_atom(self, atom, pdbname, resId, resName, setType = False): """ Adds a new atom to the protein. Returns a residue that the atom has been added to. @param atom: new atom to be added to the protein @type atom: Atom @param pdbname: PDB name (label) of the atom (up to four characters) @type pdbname: string @param resId: PDB residue ID (an integer, usually) @type resId: integer @param resName: PDB residue name @type name: string @param setType: if True, try to determine the atom type based on PDB residue name and atom name @type setType: boolean @return: residue the atom has been added to (Residue) """ if self.residues.has_key(resId): # Find an existing residue. aa = self.residues[resId] else: # This is a new residue. aa = Residue() self.residues[resId] = aa # Append the residue to amino acid list to have a properly ordered # list of amino acids. self.residues_list.append(aa) # Add the atom to the residue. aa.add_atom(atom, pdbname) # If this is an alpha carbon atom, add it to the list of alpha carbons. if pdbname == "CA": self.ca_atom_list.append(atom) return aa def is_c_alpha(self, atom): """ Check if the atom is a C-alpha atom. @param atom: atom to be tested @type atom: Atom @return: True if atom is an alpha carbon atom, False if it is not @rtype: boolean """ if atom in self.ca_atom_list: return True else: return False def count_c_alpha_atoms(self): """ Return a total number of alpha carbon atoms in the protein (usually equal to the protein sequence length). @return: number of C-alpha atoms (integer) """ return len(self.ca_atom_list) def get_c_alpha_atoms(self): """ Return a list of alpha carbon atoms. @note: the returned list is mutable and should not be modified outside of the Protein class. @return: list of alpha carbon atoms """ return self.ca_atom_list def is_c_beta(atom): """ Check if the atom is a C-beta atom. @param atom: atom to be tested @type atom: Atom @return: True if atom is a carbon-beta atom, False if it is not. @rtype: boolean """ if atom in self.cb_atom_list: return True else: return False def get_sequence_string(self): """ Create and return a protein sequence string. @return: string corresponding to the protein sequence in 1-letter code """ seq = "" for aa in self.get_amino_acids(): seq += aa.get_one_letter_code() return seq def set_rosetta_protein_secondary_structure(self, inProtein): """ Set the secondary structure of the protein outputted from rosetta to that of the inProtein. @param inProtein:input protein chunk @type inProtein: L{Chunk} """ #Urmi 20080728: created to set the secondary structure of the rosetta #outputted protein # # piotr 081908: Explanation. Rosetta fixed backbone simulation doesn't # change the backbone atom positions, so it is safe to copy the secondary # structure information from the input protein. # piotr 082008: This is no longer true for flexible backbone simulations, # i.e. when using the backrub mode. It is not going to cause major problems, # as the backbone changes are relatively minor, but in general we need # a secondary structure computation algorithm. aa_list_for_rosetta = self.get_amino_acids() i = 0 for aa in inProtein.protein.get_amino_acids(): ss = aa.get_secondary_structure() aa_list_for_rosetta[i].set_secondary_structure(ss) i = i + 1 def get_secondary_structure_string(self): """ Create and return a protein sequence string. 'H' corresponds to helical conformation, 'E' corresponds to extended secondary structure, '-' corresponds to coil (other types of secondary structure). @return: secondary structure string """ ss_str = "" for aa in self.get_amino_acids(): ss = aa.get_secondary_structure() if ss == SS_HELIX: ss_str += "H" elif ss == SS_STRAND: ss_str += "E" else: ss_str += "-" return ss_str # The two methods below (get_id_for_aa and get_amino_acid_id) # should probably be renamed, the names are confusing now -- piotr 080902 def get_id_for_aa(self, aa, index): """ Create and return an info text describing a specified residue. (See "get_amino_acid_id" method docstring for more information.) @param aa: queried amino acid @type aa: Residue @param index: index of the queried amino acid [0, sequence_length-1] @type index: int @return: string describing this residue """ # REVIEW: should this be private? Also, I guessed "@type aa". # [bruce 080904 comment] aa_id = self.get_pdb_id() + \ self.get_chain_id() + \ "[" + \ repr(index+1) + \ "] : " + \ aa.get_three_letter_code() + \ "[" + \ repr(int(aa.get_id())) + \ "]" return aa_id def get_amino_acid_id(self, index): """ Create and return a descriptive text related to a residue of a given sequence index. The text is composed of protein name, index, residue name, residue index. @note: this method will cause an exception if there is no residue of the specified index @param index: index of a queried amino acid [0, sequence_length-1] @type index: int @return: string describing this residue """ aa_list = self.get_amino_acids() if index in range(len(aa_list)): aa = aa_list[index] aa_id = self.get_id_for_aa(aa, index) return aa_id raise Exception("Residue index is out of range: %d" % index) def get_amino_acid_id_list(self): """ Create and return a list of residue descriptions. See get_amino_acid_id for details on the description format. @return: list of residue descriptions for all amino acids """ id_list = [] index = 0 for aa in self.get_amino_acids(): aa_id = self.get_id_for_aa(aa, index) id_list.append(aa_id) index += 1 return id_list def get_amino_acids(self): """ Return a list of residues in this protein. @note: the returned list is mutable and should not be modified outside of the Protein class. @return: list of residues """ return self.residues_list def count_amino_acids(self): """ Returns the number of amino acids. """ return len(self.residues_list) def assign_helix(self, resId): """ Assign a helical secondary structure to resId. @note: this method will raise an exception if a residue of a specified resId doesn't exist @param resId: residue ID for secondary structure assignment @type resId: int """ if self.residues.has_key(resId): aa = self.residues[resId] aa.set_secondary_structure(SS_HELIX) raise Exception("Specified residue doesn't exist") def assign_strand(self, resId): """ Assign a beta secondary structure to resId. The proper name of this secondary structure type is "beta strand", but names "beta", "strand", "extended" are used interchangeably (PDB files use "STRAND" name to mark the beta conformation fragments). @note: this method will raise an exception if a residue of a specified resId doesn't exist @param resId: residue ID for secondary structure assignment @type resId: int """ if self.residues.has_key(resId): aa = self.residues[resId] aa.set_secondary_structure(SS_STRAND) raise Exception("Specified residue doesn't exist") def assign_turn(self, resId): """ Assign a turn secondary structure to resId. @note: this method will raise an exception if a residue of a specified resId doesn't exist @param resId: residue ID for secondary structure assignment @type resId: int """ if self.residues.has_key(resId): aa = self.residues[resId] aa.set_secondary_structure(SS_TURN) raise Exception("Specified residue doesn't exist") def expand_rotamer(self, aa): """ Expand a side chain of a given residue. @note: aa should belong to the Protein object. @param aa: amino acid to expand @type aa: Residue @return: True if a given residue belongs to self, or False otherwise. """ if aa in self.residues_list: self.residues_dl = None aa.expand() return True return False def is_expanded(self, aa): """ Check if a given amino acid's rotamer is expanded. @param aa: amino acid to check @type aa: Residue @return: True if amino acid's side chain is expanded """ return aa.is_expanded() def collapse_all_rotamers(self): """ Collapse all side chains. """ self.residues_dl = None self.residues_hi_dl = None for aa in self.residues.values(): aa.collapse() def expand_all_rotamers(self): """ Expand all side chains. """ self.residues_dl = None self.residues_hi_dl = None for aa in self.residues.values(): aa.expand() def get_residue(self, atom): """ For a given atom, return a residue that the atom belongs to. @param atom: atom to look for @type atom: Atom @return: residue the atom belongs to, or None if not found """ for aa in self.residues.itervalues(): if aa.has_atom(atom): return aa return None def traverse_forward(self): """ Increase an index of the current amino acid. The index is an attribute of self (Protein class). @return: True if the forward step is possible, otherwise False. """ if self.current_aa_idx < len(self.residues) - 1: self.current_aa_idx += 1 return True return False def traverse_backward(self): """ Decrease an index of the current amino acid. The index is an attribute of self (Protein class). @return: True if the backward step is possible, otherwise False. """ if self.current_aa_idx > 0: self.current_aa_idx -= 1 return True return False # General comment regarding the "current residue" mechanism. The purpose # of the "current_aa_idx" attribute is to have a mechanism for selecting # and focusing on an individual amino acid. Currently, this mechanism # is only used by "Edit Protein" command and probably the "Edit Protein" # command should keep track of the current residue. It would be # better if this attribute was removed from the Protein class. # -- piotr 080902 def get_current_amino_acid(self): """ Get current amino acid. @return: current amino acid (Residue) """ if self.current_aa_idx in range(len(self.residues_list)): return self.residues_list[self.current_aa_idx] return None def get_amino_acid_at_index(self, index): """ Return the amino acid at the given index. @note: This method returns None if there is no amino acid at the specified index. This is intentional - callers should be aware of it. @param index: index of amino acid requested @type index: int @return: amino acid (Residue) """ #Urmi 20080728: created to do the two way connection between protein #sequence editor and residue combo box if index in range(len(self.residues_list)): return self.residues_list[index] return None def get_current_amino_acid_index(self): """ Get index of current amino acid. @return: index of current amino acid (integer) """ return self.current_aa_idx def set_current_amino_acid_index(self, index): """ Set index of current amino acid (if it's in the allowed range). @param index: index of current amino acid @type index: integer @return: True if the index is allowed, False if it is out of allowed range @rtype: boolean """ if index in range(len(self.residues_list)): self.current_aa_idx = index return True else: return False def get_number_of_backrub_aa(self): """ Returns a number of backrub amino acids. @return: number of amino acids assigned as "backrub" @rtype: integer """ nbr = 0 for aa in self.get_amino_acids(): if aa.get_backrub_mode(): nbr += 1 return nbr def is_backrub_setup_correctly(self): """ Returns True if the backrub table is properly set up, False otherwise. @rtype: boolean """ last_aa = None # Check if at least two consecutive amino acids have backrub flag # set as True. for aa in self.get_amino_acids(): if last_aa and \ last_aa.get_backrub_mode() and \ aa.get_backrub_mode(): return True last_aa = aa return False def edit(self, win): """ Edit the protein chunk. @note: Probably this method should not reside here, since this file is for the actual model. Maybe we'll take care of that when we move to the new model """ win.commandSequencer.userEnterCommand('EDIT_PROTEIN') return def _getCopyOfResidues(self): """ Returns a (deep copy) of the residues dict. @note: NIY """ for aa in self.residues.values(): print aa return # override abstract method of DataMixin def _copyOfObject(self): """ Create and return a copy of protein. """ protein = Protein() protein.set_chain_id(self.get_chain_id()) protein.set_pdb_id(self.get_pdb_id()) protein.set_current_amino_acid_index(self.get_current_amino_acid_index()) #@@@ BUG: residues_list, ca_atom_list, and residues aren't copied # completely since they contain compound objects (i.e. Residues with # lists, dicts, etc). # See add_pdb_atom() code to see how residues are created for # these lists/dicts. Need help from Bruce. --Mark 2008-12-25 if 1: protein.residues_list = list(self.residues_list) protein.ca_atom_list = list(self.ca_atom_list) protein.residues = dict(self.residues) else: # Using copy.deepcopy also fails. Found a reference here: # http://mail.python.org/pipermail/python-dev/2008-June/080772.html # which states that it's possible that "residues_list" contains an # object that cannot be copied (i.e. array.array). # Evidently the problem was corrected with python 2.4.4. # I am running Python 2.4. Is it worth upgrading to 2.4.4? # --Mark 2008-12-25 import copy protein.residues_list = copy.deepcopy(self.residues_list) protein.ca_atom_list = copy.deepcopy(self.ca_atom_list) protein.residues = copy.deepcopy(self.residues) protein.residues_dl = None # DL gets rebuilt in ProteinChunks.drawchunk_realtime print "Protein._copyOfObject(): HERE!" return protein # override abstract method of DataMixin. # Ask Bruce to review. --Mark 2008-12-25 def __eq__(self, other): """ Compare self with other. """ print "Protein.__eq__(): HERE!" if self.chain_id != other.chain_id: return False elif self.pdb_id != other.pdb_id: return False elif self.current_aa_idx != other.current_aa_idx: return False elif self.residues is not other.residues: # Is this right? return False elif self.residues_list is not other.residues_list: # Is this right? return False elif self.ca_atom_list is not other.ca_atom_list: # Is this right? return False else: return True pass pass # end of class Protein # Protein helper methods should be located here. # Mark 2008-12-14 def getAllProteinChunksInPart(assy): """ Returns a list of all the protein chunks in assy. @return: a list of all the protein chunks in assy. @rtype: list of chunks """ proteinChunkList = [] for mol in assy.molecules: if mol.isProteinChunk(): proteinChunkList.append(mol) return proteinChunkList # piotr 082008: This and possibly several Rosetta-related methods of the Protein # class should be re-factored and moved to a separate file in simulations/ROSETTA. def write_rosetta_resfile(filename, chunk): """ Write a Rosetta resfile for a given protein chunk. Return True if successfully written, otherwise return False. Writes backrub information if the backrub mode is enabled in user preferences. Currently, we only support single-chain Rosetta simulations. @param chunk: protein chunk to be written """ # Make sure this is a valid protein chunk. if chunk is None or \ chunk.protein is None: return False # Check if the backrub mode is enabled. use_backrub = env.prefs[rosetta_backrub_enabled_prefs_key] # Get a list of amino acids. amino_acids = chunk.protein.get_amino_acids() # Open the output file. f = open(filename, "w") if not f: return False # Write a standard file header. f.write(" This file specifies which residues will be varied\n") f.write(" \n") f.write(" Column 2: Chain \n") f.write(" Column 4-7: sequential residue number \n") f.write(" Column 9-12: pdb residue number \n") f.write(" Column 14-18: id (described below) \n") f.write(" Column 20-40: amino acids to be used \n") f.write(" \n") f.write(" NATAA => use native amino acid \n") f.write(" ALLAA => all amino acids \n") f.write(" NATRO => native amino acid and rotamer \n") f.write(" PIKAA => select inividual amino acids \n") f.write(" POLAR => polar amino acids \n") f.write(" APOLA => apolar amino acids \n") f.write(" \n") f.write(" The following demo lines are in the proper format\n") f.write(" \n") f.write(" A 1 3 NATAA \n") f.write(" A 2 4 ALLAA \n") f.write(" A 3 6 NATRO \n") f.write(" A 4 7 NATAA \n") f.write(" B 5 1 PIKAA DFLM \n") f.write(" B 6 2 PIKAA HIL \n") f.write(" B 7 3 POLAR \n") f.write(" -------------------------------------------------\n") # Begin the actual data records. f.write(" start\n") index = 0 for aa in amino_acids: # For each consecutive amino acid index += 1 mut = aa.get_mutation_range() # Write the amino acid and its mutation range name out_str = " " + \ chunk.protein.get_chain_id() + \ "%5d" % int(index) + \ "%6s" % aa.get_id() + \ mut if use_backrub and \ aa.backrub: # Append "B" when in backrub mode. out_str += "B" if mut == "PIKAA": out_str += " " + aa.get_mutation_descriptor().replace("_","") + "\n" else: out_str += "\n" f.write(out_str) # Close the output file. f.close() return True