Analyze Conformations¶
First, necessary imports:
In [1]: from prody import *
In [2]: from numpy import *
In [3]: from pylab import *
In [4]: ion()
In [5]: import os, glob
Parse conformations¶
Now, let’s read initial and refined conformations:
In [6]: initial = AtomGroup('p38 initial')
In [7]: refined = AtomGroup('p38 refined')
In [8]: for pdb in glob.glob('p38_ensemble/*pdb'):
   ...:     fn = os.path.splitext(os.path.split(pdb)[1])[0]
   ...:     opt = os.path.join('p38_optimize', fn + '.coor')
   ...:     parsePDB(pdb, ag=initial)
   ...:     parsePDB(opt, ag=refined)
   ...: 
---------------------------------------------------------------------------
MMCIFParseError                           Traceback (most recent call last)
<ipython-input-8-87036b051e71> in <module>()
      3     opt = os.path.join('p38_optimize', fn + '.coor')
      4     parsePDB(pdb, ag=initial)
----> 5     parsePDB(opt, ag=refined)
      6 
/home/exx/ProDy-website/ProDy/prody/proteins/pdbfile.pyc in parsePDB(*pdb, **kwargs)
    125 
    126     if n_pdb == 1:
--> 127         return _parsePDB(pdb[0], **kwargs)
    128     else:
    129         results = []
/home/exx/ProDy-website/ProDy/prody/proteins/pdbfile.pyc in _parsePDB(pdb, **kwargs)
    244         try:
    245             LOGGER.warn("Trying to parse as mmCIF file instead")
--> 246             return parseMMCIF(pdb, **kwargs)
    247         except KeyError:
    248             try:
/home/exx/ProDy-website/ProDy/prody/proteins/ciffile.pyc in parseMMCIF(pdb, **kwargs)
    130         kwargs['title'] = title
    131     cif = openFile(pdb, 'rt')
--> 132     result = parseMMCIFStream(cif, chain=chain, segment=segment, **kwargs)
    133     cif.close()
    134     if unite_chains:
/home/exx/ProDy-website/ProDy/prody/proteins/ciffile.pyc in parseMMCIFStream(stream, **kwargs)
    237 
    238         _parseMMCIFLines(ag, lines, model, chain, subset, altloc, 
--> 239                          segment, unite_chains, report)
    240 
    241         if ag.numAtoms() > 0:
/home/exx/ProDy-website/ProDy/prody/proteins/ciffile.pyc in _parseMMCIFLines(atomgroup, lines, model, chain, subset, altloc_torf, segment, unite_chains, report)
    337                 stop = i
    338             else:
--> 339                 raise MMCIFParseError('mmCIF file contained no atoms.')
    340 
    341         i += 1
MMCIFParseError: mmCIF file contained no atoms.
In [9]: initial
Out[9]: <AtomGroup: p38 initial (5658 atoms)>
In [10]: refined
Out[10]: <AtomGroup: p38 refined (0 atoms; no coordinates)>
Calculate RMSD change¶
We can plot RMSD change after refinement as follows:
In [11]: rmsd_ca = []
In [12]: rmsd_all = []
In [13]: initial_ca = initial.ca
In [14]: refined_ca = refined.ca
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-14-40de9febca08> in <module>()
----> 1 refined_ca = refined.ca
/home/exx/ProDy-website/ProDy/prody/atomic/atomic.pyc in __getattribute__(self, name)
     98                         dummies = self.numDummies()
     99                     except AttributeError:
--> 100                         indices = self._getSubset(name)
    101                         if len(indices):
    102                             return Selection(ag, indices, selstr,
/home/exx/ProDy-website/ProDy/prody/atomic/atomgroup.pyc in _getSubset(self, label)
   1086             return self._subsets[label]
   1087         except KeyError:
-> 1088             flgs = self._getFlags(label)
   1089             try:
   1090                 return self._subsets[label]
/home/exx/ProDy-website/ProDy/prody/atomic/atomgroup.pyc in _getFlags(self, label)
   1028         except KeyError:
   1029             try:
-> 1030                 return FLAG_PLANTERS[label](self, label)
   1031             except KeyError:
   1032                 pass
/home/exx/ProDy-website/ProDy/prody/atomic/flags.pyc in setCalpha(ag, label)
    790 
    791     flags = ag._getNames() == 'CA'
--> 792     indices = flags.nonzero()[0]
    793     if len(indices):
    794         torf = array([rn in AMINOACIDS for rn in ag._getResnames()[indices]])
AttributeError: 'bool' object has no attribute 'nonzero'
In [15]: for i in range(initial.numCoordsets()):
   ....:     initial.setACSIndex(i)
   ....:     refined.setACSIndex(i)
   ....:     initial_ca.setACSIndex(i)
   ....:     refined_ca.setACSIndex(i)
   ....:     rmsd_ca.append(calcRMSD(initial_ca, refined_ca))
   ....:     rmsd_all.append(calcRMSD(initial, refined))
   ....: 
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-15-71aab0f1ba84> in <module>()
      1 for i in range(initial.numCoordsets()):
      2     initial.setACSIndex(i)
----> 3     refined.setACSIndex(i)
      4     initial_ca.setACSIndex(i)
      5     refined_ca.setACSIndex(i)
/home/exx/ProDy-website/ProDy/prody/atomic/atomgroup.pyc in setACSIndex(self, index)
    858             raise TypeError('index must be an integer')
    859         if n_csets <= index or n_csets < abs(index):
--> 860             raise IndexError('coordinate set index is out of range')
    861         if index < 0:
    862             index += n_csets
IndexError: coordinate set index is out of range
In [16]: plot(rmsd_all, label='all');
In [17]: plot(rmsd_ca, label='ca');
In [18]: xlabel('Conformation index');
In [19]: ylabel('RMSD');
In [20]: legend();
Select a diverse set¶
To select a diverse set of refined conformations, let’s calculate average RMSD for each conformation to all others:
In [21]: rmsd_mean = []
In [22]: for i in range(refined.numCoordsets()):
   ....:     refined.setACSIndex(i)
   ....:     alignCoordsets(refined)
   ....:     rmsd = calcRMSD(refined)
   ....:     rmsd_mean.append(rmsd.sum() / (len(rmsd) - 1))
   ....: 
In [23]: bar(arange(1, len(rmsd_mean) + 1), rmsd_mean);
In [24]: xlabel('Conformation index');
In [25]: ylabel('Mean RMSD');
Let’s select conformations that are 1.2 Å away from other on average:
In [26]: selected = (array(rmsd_mean) >= 1.2).nonzero()[0]
In [27]: selected
Out[27]: array([], dtype=int64)
In [28]: selection = refined[selected]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-28-691a562240aa> in <module>()
----> 1 selection = refined[selected]
/home/exx/ProDy-website/ProDy/prody/atomic/atomgroup.pyc in __getitem__(self, index)
    208         elif isinstance(index, (list, np.ndarray)):
    209             unique = np.unique(index)
--> 210             if unique[0] < 0 or unique[-1] >= self._n_atoms:
    211                 raise IndexError('index out of range')
    212             return Selection(self, unique, 'index ' + rangeString(index),
IndexError: index 0 is out of bounds for axis 0 with size 0
In [29]: selection
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-ae784f1d4d09> in <module>()
----> 1 selection
NameError: name 'selection' is not defined
      