Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Reader for CP2Ks DCD_ALIGNED_CELL format and restart files.
4Based on [pwtools](https://github.com/elcorto/pwtools).
5All information about the dcd format is taken from there.
6The way of reading it is also copied from pwtools.
7Thanks to Steve for agreeing to this.
9Some information also comes directly from the CP2K source,
10so if they decide to change anything this here might break.
12Some parts are adapted from the extxyz reader.
14Contributed by Patrick Melix <chemistry@melix.me>
15"""
17import numpy as np
18from itertools import islice
19import os
21from ase.atoms import Atoms, Atom
22from ase.cell import Cell
23from ase.io.formats import index2range
24from ase.data import atomic_numbers
26__all__ = ['read_cp2k_dcd', 'iread_cp2k_dcd', 'read_cp2k_restart']
28# DCD file header
29# (name, dtype, shape)
30# numpy dtypes:
31# i4 = int32
32# f4 = float32 (single precision)
33# f8 = float64 (double precision)
34# S80 = string of length 80 (80 chars)
35_HEADER_TYPES = [
36 ('blk0-0', 'i4'), # 84 (start of first block, size=84 bytes)
37 ('hdr', 'S4'), # 'CORD'
38 ('9int', ('i4', 9)), # 9 ints, mostly 0
39 ('timestep', 'f4'), # timestep (float32)
40 ('10int', ('i4', 10)), # 10 ints, mostly 0, last is 24
41 ('blk0-1', 'i4'), # 84
42 ('blk1-0', 'i4'), # 164
43 ('ntitle', 'i4'), # 2
44 ('remark1', 'S80'), # remark1
45 ('remark2', 'S80'), # remark2
46 ('blk1-1', 'i4'), # 164
47 ('blk2-0', 'i4'), # 4 (4 bytes = int32)
48 ('natoms', 'i4'), # natoms (int32)
49 ('blk2-1', 'i4'), # 4
50]
52_HEADER_DTYPE = np.dtype(_HEADER_TYPES)
55def _bytes_per_timestep(natoms):
56 return (4 + 6 * 8 + 7 * 4 + 3 * 4 * natoms)
59def _read_metainfo(fileobj):
60 if not hasattr(fileobj, 'seek'):
61 raise TypeError("You should have passed a fileobject opened in binary "
62 "mode, it seems you did not.")
63 fileobj.seek(0)
64 header = np.fromfile(fileobj, _HEADER_DTYPE, 1)[0]
65 natoms = header['natoms']
66 remark1 = str(header['remark1']) # taken from CP2Ks source/motion_utils.F
67 if 'CP2K' not in remark1:
68 raise ValueError("Header should contain mention of CP2K, are you sure "
69 "this file was created by CP2K?")
71 # dtype for fromfile: nStep times dtype of a timestep data block
72 dtype = np.dtype([('x0', 'i4'),
73 ('x1', 'f8', (6,)),
74 ('x2', 'i4', (2,)),
75 ('x3', 'f4', (natoms,)),
76 ('x4', 'i4', (2,)),
77 ('x5', 'f4', (natoms,)),
78 ('x6', 'i4', (2,)),
79 ('x7', 'f4', (natoms,)),
80 ('x8', 'i4')])
82 fd_pos = fileobj.tell()
83 header_end = fd_pos
84 # seek to end
85 fileobj.seek(0, os.SEEK_END)
86 # number of bytes between fd_pos and end
87 fd_rest = fileobj.tell() - fd_pos
88 # reset to pos after header
89 fileobj.seek(fd_pos)
90 # calculate nstep: fd_rest / bytes_per_timestep
91 # 4 - initial 48
92 # 6*8 - cryst_const_dcd
93 # 7*4 - markers between x,y,z and at the end of the block
94 # 3*4*natoms - float32 cartesian coords
95 nsteps = fd_rest / _bytes_per_timestep(natoms)
96 if fd_rest % _bytes_per_timestep(natoms) != 0:
97 raise ValueError("Calculated number of steps is not int, "
98 "cannot read file")
99 nsteps = int(nsteps)
100 return dtype, natoms, nsteps, header_end
103class DCDChunk:
104 def __init__(self, chunk, dtype, natoms, symbols, aligned):
105 self.chunk = chunk
106 self.dtype = dtype
107 self.natoms = natoms
108 self.symbols = symbols
109 self.aligned = aligned
111 def build(self):
112 """Convert unprocessed chunk into Atoms."""
113 return _read_cp2k_dcd_frame(self.chunk, self.dtype, self.natoms,
114 self.symbols, self.aligned)
117def idcdchunks(fd, ref_atoms, aligned):
118 """Yield unprocessed chunks for each image."""
119 if ref_atoms:
120 symbols = ref_atoms.get_chemical_symbols()
121 else:
122 symbols = None
123 dtype, natoms, nsteps, header_end = _read_metainfo(fd)
124 bytes_per_step = _bytes_per_timestep(natoms)
125 fd.seek(header_end)
126 for i in range(nsteps):
127 fd.seek(bytes_per_step * i + header_end)
128 yield DCDChunk(fd, dtype, natoms, symbols, aligned)
131class DCDImageIterator:
132 """"""
134 def __init__(self, ichunks):
135 self.ichunks = ichunks
137 def __call__(self, fd, indices=-1, ref_atoms=None, aligned=False):
138 self.ref_atoms = ref_atoms
139 self.aligned = aligned
140 if not hasattr(indices, 'start'):
141 if indices < 0:
142 indices = slice(indices - 1, indices)
143 else:
144 indices = slice(indices, indices + 1)
146 for chunk in self._getslice(fd, indices):
147 yield chunk.build()
149 def _getslice(self, fd, indices):
150 try:
151 iterator = islice(self.ichunks(fd, self.ref_atoms, self.aligned),
152 indices.start, indices.stop, indices.step)
153 except ValueError:
154 # Negative indices. Adjust slice to positive numbers.
155 dtype, natoms, nsteps, header_end = _read_metainfo(fd)
156 indices_tuple = indices.indices(nsteps + 1)
157 iterator = islice(self.ichunks(fd, self.ref_atoms, self.aligned),
158 *indices_tuple)
159 return iterator
162iread_cp2k_dcd = DCDImageIterator(idcdchunks)
165def read_cp2k_dcd(fileobj, index=-1, ref_atoms=None, aligned=False):
166 """ Read a DCD file created by CP2K.
168 To yield one Atoms object at a time use ``iread_cp2k_dcd``.
170 Note: Other programs use other formats, they are probably not compatible.
172 If ref_atoms is not given, all atoms will have symbol 'X'.
174 To make sure that this is a dcd file generated with the *DCD_ALIGNED_CELL* key
175 in the CP2K input, you need to set ``aligned`` to *True* to get cell information.
176 Make sure you do not set it otherwise, the cell will not match the atomic coordinates!
177 See the following url for details:
178 https://groups.google.com/forum/#!searchin/cp2k/Outputting$20cell$20information$20and$20fractional$20coordinates%7Csort:relevance/cp2k/72MhYykrSrQ/5c9Jaw7S9yQJ.
179 """ # noqa: E501
180 dtype, natoms, nsteps, header_end = _read_metainfo(fileobj)
181 bytes_per_timestep = _bytes_per_timestep(natoms)
182 if ref_atoms:
183 symbols = ref_atoms.get_chemical_symbols()
184 else:
185 symbols = ['X' for i in range(natoms)]
186 if natoms != len(symbols):
187 raise ValueError("Length of ref_atoms does not match natoms "
188 "from dcd file")
189 trbl = index2range(index, nsteps)
191 for index in trbl:
192 frame_pos = int(header_end + index * bytes_per_timestep)
193 fileobj.seek(frame_pos)
194 yield _read_cp2k_dcd_frame(fileobj, dtype, natoms, symbols,
195 aligned=aligned)
198def _read_cp2k_dcd_frame(fileobj, dtype, natoms, symbols, aligned=False):
199 arr = np.fromfile(fileobj, dtype, 1)
200 cryst_const = np.empty(6, dtype=np.float64)
201 cryst_const[0] = arr['x1'][0, 0]
202 cryst_const[1] = arr['x1'][0, 2]
203 cryst_const[2] = arr['x1'][0, 5]
204 cryst_const[3] = arr['x1'][0, 4]
205 cryst_const[4] = arr['x1'][0, 3]
206 cryst_const[5] = arr['x1'][0, 1]
207 coords = np.empty((natoms, 3), dtype=np.float32)
208 coords[..., 0] = arr['x3'][0, ...]
209 coords[..., 1] = arr['x5'][0, ...]
210 coords[..., 2] = arr['x7'][0, ...]
211 assert len(coords) == len(symbols)
212 if aligned:
213 # convention of the cell is (see cp2ks src/particle_methods.F):
214 # A is in x
215 # B lies in xy plane
216 # luckily this is also the ASE convention for Atoms.set_cell()
217 atoms = Atoms(symbols, coords, cell=cryst_const, pbc=True)
218 else:
219 atoms = Atoms(symbols, coords)
220 return atoms
223def read_cp2k_restart(fileobj):
224 """Read Atoms and Cell from Restart File.
226 Reads the elements, coordinates and cell information from the
227 '&SUBSYS' section of a CP2K restart file.
229 Tries to convert atom names to elements, if this fails element is set to X.
231 Returns an Atoms object.
232 """
234 def _parse_section(inp):
235 """Helper to parse structure to nested dict"""
236 ret = {'content': []}
237 while inp:
238 line = inp.readline().strip()
239 if line.startswith('&END'):
240 return ret
241 elif line.startswith('&'):
242 key = line.replace('&', '')
243 ret[key] = _parse_section(inp)
244 else:
245 ret['content'].append(line)
246 return ret
248 def _fast_forward_to(fileobj, section_header):
249 """Helper to forward to a section"""
250 found = False
251 while fileobj:
252 line = fileobj.readline()
253 if section_header in line:
254 found = True
255 break
256 if not found:
257 raise RuntimeError("No {:} section found!".format(section_header))
259 def _read_cell(data):
260 """Helper to read cell data, returns cell and pbc"""
261 cell = None
262 pbc = [False, False, False]
263 if 'CELL' in data:
264 content = data['CELL']['content']
265 cell = Cell([[0, 0, 0] for i in range(3)])
266 char2idx = {'A ': 0, 'B ': 1, 'C ': 2}
267 for line in content:
268 # lines starting with A/B/C<whitespace> have cell
269 if line[:2] in char2idx:
270 idx = char2idx[line[:2]]
271 cell[idx] = [float(x) for x in line.split()[1:]]
272 pbc[idx] = True
273 if not set([len(v) for v in cell]) == {3}:
274 raise RuntimeError("Bad Cell Definition found.")
275 return cell, pbc
277 def _read_geometry(content):
278 """Helper to read geometry, returns a list of Atoms"""
279 atom_list = []
280 for entry in content:
281 entry = entry.split()
282 # Get letters for element symbol
283 el = [char.lower() for char in entry[0] if char.isalpha()]
284 el = "".join(el).capitalize()
285 # Get positions
286 pos = [float(x) for x in entry[1:4]]
287 if el in atomic_numbers.keys():
288 atom_list.append(Atom(el, pos))
289 else:
290 atom_list.append(Atom('X', pos))
291 return atom_list
293 # fast forward to &SUBSYS section
294 _fast_forward_to(fileobj, '&SUBSYS')
295 # parse sections into dictionary
296 data = _parse_section(fileobj)
297 # look for &CELL
298 cell, pbc = _read_cell(data)
299 # get the atoms
300 atom_list = _read_geometry(data['COORD']['content'])
301 return Atoms(atom_list, cell=cell, pbc=pbc)