Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Module to read and write atoms in PDB file format.
3See::
5 http://www.wwpdb.org/documentation/file-format
7Note: The PDB format saves cell lengths and angles; hence the absolute
8orientation is lost when saving. Saving and loading a file will
9conserve the scaled positions, not the absolute ones.
10"""
12import warnings
14import numpy as np
16from ase.atoms import Atoms
17from ase.geometry import cellpar_to_cell
18from ase.io.espresso import label_to_symbol
19from ase.utils import reader, writer
22def read_atom_line(line_full):
23 """
24 Read atom line from pdb format
25 HETATM 1 H14 ORTE 0 6.301 0.693 1.919 1.00 0.00 H
26 """
28 line = line_full.rstrip('\n')
29 type_atm = line[0:6]
30 if type_atm == "ATOM " or type_atm == "HETATM":
32 name = line[12:16].strip()
34 altloc = line[16]
35 resname = line[17:21]
36 # chainid = line[21] # Not used
38 resseq = int(line[22:26].split()[0]) # sequence identifier
39 # icode = line[26] # insertion code, not used
41 # atomic coordinates
42 try:
43 coord = np.array([float(line[30:38]),
44 float(line[38:46]),
45 float(line[46:54])], dtype=np.float64)
46 except ValueError:
47 raise ValueError("Invalid or missing coordinate(s)")
49 # occupancy & B factor
50 try:
51 occupancy = float(line[54:60])
52 except ValueError:
53 occupancy = None # Rather than arbitrary zero or one
55 if occupancy is not None and occupancy < 0:
56 warnings.warn("Negative occupancy in one or more atoms")
58 try:
59 bfactor = float(line[60:66])
60 except ValueError:
61 # The PDB use a default of zero if the data is missing
62 bfactor = 0.0
64 # segid = line[72:76] # not used
65 symbol = line[76:78].strip().upper()
67 else:
68 raise ValueError("Only ATOM and HETATM supported")
70 return symbol, name, altloc, resname, coord, occupancy, bfactor, resseq
73@reader
74def read_proteindatabank(fileobj, index=-1, read_arrays=True):
75 """Read PDB files."""
76 images = []
77 orig = np.identity(3)
78 trans = np.zeros(3)
79 occ = []
80 bfactor = []
81 residuenames = []
82 residuenumbers = []
83 atomtypes = []
85 symbols = []
86 positions = []
87 cell = None
88 pbc = None
90 def build_atoms():
91 atoms = Atoms(symbols=symbols,
92 cell=cell, pbc=pbc,
93 positions=positions)
95 if not read_arrays:
96 return atoms
98 info = {'occupancy': occ,
99 'bfactor': bfactor,
100 'residuenames': residuenames,
101 'atomtypes': atomtypes,
102 'residuenumbers': residuenumbers}
103 for name, array in info.items():
104 if len(array) == 0:
105 pass
106 elif len(array) != len(atoms):
107 warnings.warn('Length of {} array, {}, '
108 'different from number of atoms {}'.
109 format(name, len(array), len(atoms)))
110 else:
111 atoms.set_array(name, np.array(array))
112 return atoms
114 for line in fileobj.readlines():
115 if line.startswith('CRYST1'):
116 cellpar = [float(line[6:15]), # a
117 float(line[15:24]), # b
118 float(line[24:33]), # c
119 float(line[33:40]), # alpha
120 float(line[40:47]), # beta
121 float(line[47:54])] # gamma
122 cell = cellpar_to_cell(cellpar)
123 pbc = True
124 for c in range(3):
125 if line.startswith('ORIGX' + '123'[c]):
126 orig[c] = [float(line[10:20]),
127 float(line[20:30]),
128 float(line[30:40])]
129 trans[c] = float(line[45:55])
131 if line.startswith('ATOM') or line.startswith('HETATM'):
132 # Atom name is arbitrary and does not necessarily
133 # contain the element symbol. The specification
134 # requires the element symbol to be in columns 77+78.
135 # Fall back to Atom name for files that do not follow
136 # the spec, e.g. packmol.
138 # line_info = symbol, name, altloc, resname, coord, occupancy,
139 # bfactor, resseq
140 line_info = read_atom_line(line)
142 try:
143 symbol = label_to_symbol(line_info[0])
144 except (KeyError, IndexError):
145 symbol = label_to_symbol(line_info[1])
147 position = np.dot(orig, line_info[4]) + trans
148 atomtypes.append(line_info[1])
149 residuenames.append(line_info[3])
150 if line_info[5] is not None:
151 occ.append(line_info[5])
152 bfactor.append(line_info[6])
153 residuenumbers.append(line_info[7])
155 symbols.append(symbol)
156 positions.append(position)
158 if line.startswith('END'):
159 # End of configuration reached
160 # According to the latest PDB file format (v3.30),
161 # this line should start with 'ENDMDL' (not 'END'),
162 # but in this way PDB trajectories from e.g. CP2K
163 # are supported (also VMD supports this format).
164 atoms = build_atoms()
165 images.append(atoms)
166 occ = []
167 bfactor = []
168 residuenames = []
169 atomtypes = []
170 symbols = []
171 positions = []
172 cell = None
173 pbc = None
175 if len(images) == 0:
176 atoms = build_atoms()
177 images.append(atoms)
178 return images[index]
181@writer
182def write_proteindatabank(fileobj, images, write_arrays=True):
183 """Write images to PDB-file."""
184 if hasattr(images, 'get_positions'):
185 images = [images]
187 rotation = None
188 if images[0].get_pbc().any():
189 from ase.geometry import cell_to_cellpar, cellpar_to_cell
191 currentcell = images[0].get_cell()
192 cellpar = cell_to_cellpar(currentcell)
193 exportedcell = cellpar_to_cell(cellpar)
194 rotation = np.linalg.solve(currentcell, exportedcell)
195 # ignoring Z-value, using P1 since we have all atoms defined explicitly
196 format = 'CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1\n'
197 fileobj.write(format % (cellpar[0], cellpar[1], cellpar[2],
198 cellpar[3], cellpar[4], cellpar[5]))
200 # 1234567 123 6789012345678901 89 67 456789012345678901234567 890
201 format = ('ATOM %5d %4s MOL 1 %8.3f%8.3f%8.3f%6.2f%6.2f'
202 ' %2s \n')
204 # RasMol complains if the atom index exceeds 100000. There might
205 # be a limit of 5 digit numbers in this field.
206 MAXNUM = 100000
208 symbols = images[0].get_chemical_symbols()
209 natoms = len(symbols)
211 for n, atoms in enumerate(images):
212 fileobj.write('MODEL ' + str(n + 1) + '\n')
213 p = atoms.get_positions()
214 occupancy = np.ones(len(atoms))
215 bfactor = np.zeros(len(atoms))
216 if write_arrays:
217 if 'occupancy' in atoms.arrays:
218 occupancy = atoms.get_array('occupancy')
219 if 'bfactor' in atoms.arrays:
220 bfactor = atoms.get_array('bfactor')
221 if rotation is not None:
222 p = p.dot(rotation)
223 for a in range(natoms):
224 x, y, z = p[a]
225 occ = occupancy[a]
226 bf = bfactor[a]
227 fileobj.write(format % ((a+1) % MAXNUM, symbols[a],
228 x, y, z, occ, bf, symbols[a].upper()))
229 fileobj.write('ENDMDL\n')