Coverage for /builds/debichem-team/python-ase/ase/io/proteindatabank.py: 91.60%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""Module to read and write atoms in PDB file format.

3See::

5 http://www.wwpdb.org/documentation/file-format

7Note: The PDB format saves cell lengths and angles; hence the absolute

8orientation is lost when saving. Saving and loading a file will

9conserve the scaled positions, not the absolute ones.

10"""

12import warnings

14import numpy as np

16from ase.atoms import Atoms

17from ase.geometry import cellpar_to_cell

18from ase.io.espresso import label_to_symbol

19from ase.utils import reader, writer

22def read_atom_line(line_full):

23 """

24 Read atom line from pdb format

25 HETATM 1 H14 ORTE 0 6.301 0.693 1.919 1.00 0.00 H

26 """

28 line = line_full.rstrip('\n')

29 type_atm = line[0:6]

30 if type_atm == "ATOM " or type_atm == "HETATM":

32 name = line[12:16].strip()

34 altloc = line[16]

35 resname = line[17:21]

36 # chainid = line[21] # Not used

38 resseq = int(line[22:26].split()[0]) # sequence identifier

39 # icode = line[26] # insertion code, not used

41 # atomic coordinates

42 try:

43 coord = np.array([float(line[30:38]),

44 float(line[38:46]),

45 float(line[46:54])], dtype=np.float64)

46 except ValueError:

47 raise ValueError("Invalid or missing coordinate(s)")

49 # occupancy & B factor

50 try:

51 occupancy = float(line[54:60])

52 except ValueError:

53 occupancy = None # Rather than arbitrary zero or one

55 if occupancy is not None and occupancy < 0:

56 warnings.warn("Negative occupancy in one or more atoms")

58 try:

59 bfactor = float(line[60:66])

60 except ValueError:

61 # The PDB use a default of zero if the data is missing

62 bfactor = 0.0

64 # segid = line[72:76] # not used

65 symbol = line[76:78].strip().upper()

67 else:

68 raise ValueError("Only ATOM and HETATM supported")

70 return symbol, name, altloc, resname, coord, occupancy, bfactor, resseq

73@reader

74def read_proteindatabank(fileobj, index=-1, read_arrays=True):

75 """Read PDB files."""

76 images = []

77 orig = np.identity(3)

78 trans = np.zeros(3)

79 occ = []

80 bfactor = []

81 residuenames = []

82 residuenumbers = []

83 atomtypes = []

85 symbols = []

86 positions = []

87 cell = None

88 pbc = None

90 def build_atoms():

91 atoms = Atoms(symbols=symbols,

92 cell=cell, pbc=pbc,

93 positions=positions)

95 if not read_arrays:

96 return atoms

98 info = {'occupancy': occ,

99 'bfactor': bfactor,

100 'residuenames': residuenames,

101 'atomtypes': atomtypes,

102 'residuenumbers': residuenumbers}

103 for name, array in info.items():

104 if len(array) == 0:

105 pass

106 elif len(array) != len(atoms):

107 warnings.warn('Length of {} array, {}, '

108 'different from number of atoms {}'.

109 format(name, len(array), len(atoms)))

110 else:

111 atoms.set_array(name, np.array(array))

112 return atoms

113

114 for line in fileobj.readlines():

115 if line.startswith('CRYST1'):

116 cellpar = [float(line[6:15]), # a

117 float(line[15:24]), # b

118 float(line[24:33]), # c

119 float(line[33:40]), # alpha

120 float(line[40:47]), # beta

121 float(line[47:54])] # gamma

122 cell = cellpar_to_cell(cellpar)

123 pbc = True

124 for c in range(3):

125 if line.startswith('ORIGX' + '123'[c]):

126 orig[c] = [float(line[10:20]),

127 float(line[20:30]),

128 float(line[30:40])]

129 trans[c] = float(line[45:55])

130

131 if line.startswith('ATOM') or line.startswith('HETATM'):

132 # Atom name is arbitrary and does not necessarily

133 # contain the element symbol. The specification

134 # requires the element symbol to be in columns 77+78.

135 # Fall back to Atom name for files that do not follow

136 # the spec, e.g. packmol.

137

138 # line_info = symbol, name, altloc, resname, coord, occupancy,

139 # bfactor, resseq

140 line_info = read_atom_line(line)

141

142 try:

143 symbol = label_to_symbol(line_info[0])

144 except (KeyError, IndexError):

145 symbol = label_to_symbol(line_info[1])

146

147 position = np.dot(orig, line_info[4]) + trans

148 atomtypes.append(line_info[1])

149 residuenames.append(line_info[3])

150 if line_info[5] is not None:

151 occ.append(line_info[5])

152 bfactor.append(line_info[6])

153 residuenumbers.append(line_info[7])

154

155 symbols.append(symbol)

156 positions.append(position)

157

158 if line.startswith('END'):

159 # End of configuration reached

160 # According to the latest PDB file format (v3.30),

161 # this line should start with 'ENDMDL' (not 'END'),

162 # but in this way PDB trajectories from e.g. CP2K

163 # are supported (also VMD supports this format).

164 atoms = build_atoms()

165 images.append(atoms)

166 occ = []

167 bfactor = []

168 residuenames = []

169 atomtypes = []

170 symbols = []

171 positions = []

172 cell = None

173 pbc = None

174

175 if len(images) == 0:

176 atoms = build_atoms()

177 images.append(atoms)

178 return images[index]

179

180

181@writer

182def write_proteindatabank(fileobj, images, write_arrays=True):

183 """Write images to PDB-file."""

184 if hasattr(images, 'get_positions'):

185 images = [images]

186

187 rotation = None

188 if images[0].get_pbc().any():

189 from ase.geometry import cell_to_cellpar, cellpar_to_cell

190

191 currentcell = images[0].get_cell()

192 cellpar = cell_to_cellpar(currentcell)

193 exportedcell = cellpar_to_cell(cellpar)

194 rotation = np.linalg.solve(currentcell, exportedcell)

195 # ignoring Z-value, using P1 since we have all atoms defined explicitly

196 format = 'CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1\n'

197 fileobj.write(format % (cellpar[0], cellpar[1], cellpar[2],

198 cellpar[3], cellpar[4], cellpar[5]))

199

200 # 1234567 123 6789012345678901 89 67 456789012345678901234567 890

201 format = ('ATOM %5d %4s MOL 1 %8.3f%8.3f%8.3f%6.2f%6.2f'

202 ' %2s \n')

203

204 # RasMol complains if the atom index exceeds 100000. There might

205 # be a limit of 5 digit numbers in this field.

206 MAXNUM = 100000

207

208 symbols = images[0].get_chemical_symbols()

209 natoms = len(symbols)

210

211 for n, atoms in enumerate(images):

212 fileobj.write('MODEL ' + str(n + 1) + '\n')

213 p = atoms.get_positions()

214 occupancy = np.ones(len(atoms))

215 bfactor = np.zeros(len(atoms))

216 if write_arrays:

217 if 'occupancy' in atoms.arrays:

218 occupancy = atoms.get_array('occupancy')

219 if 'bfactor' in atoms.arrays:

220 bfactor = atoms.get_array('bfactor')

221 if rotation is not None:

222 p = p.dot(rotation)

223 for a in range(natoms):

224 x, y, z = p[a]

225 occ = occupancy[a]

226 bf = bfactor[a]

227 fileobj.write(format % ((a+1) % MAXNUM, symbols[a],

228 x, y, z, occ, bf, symbols[a].upper()))

229 fileobj.write('ENDMDL\n')

Coverage for /builds/debichem-team/python-ase/ase/io/proteindatabank.py : 91.60%

131 statements

Coverage for /builds/debichem-team/python-ase/ase/io/proteindatabank.py : 91.60%

131 statements 120 run 11 missing 0 excluded

131 statements