Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Reader for CP2Ks DCD_ALIGNED_CELL format and restart files. 

3 

4Based on [pwtools](https://github.com/elcorto/pwtools). 

5All information about the dcd format is taken from there. 

6The way of reading it is also copied from pwtools. 

7Thanks to Steve for agreeing to this. 

8 

9Some information also comes directly from the CP2K source, 

10so if they decide to change anything this here might break. 

11 

12Some parts are adapted from the extxyz reader. 

13 

14Contributed by Patrick Melix <chemistry@melix.me> 

15""" 

16 

17import numpy as np 

18from itertools import islice 

19import os 

20 

21from ase.atoms import Atoms, Atom 

22from ase.cell import Cell 

23from ase.io.formats import index2range 

24from ase.data import atomic_numbers 

25 

26__all__ = ['read_cp2k_dcd', 'iread_cp2k_dcd', 'read_cp2k_restart'] 

27 

28# DCD file header 

29# (name, dtype, shape) 

30# numpy dtypes: 

31# i4 = int32 

32# f4 = float32 (single precision) 

33# f8 = float64 (double precision) 

34# S80 = string of length 80 (80 chars) 

35_HEADER_TYPES = [ 

36 ('blk0-0', 'i4'), # 84 (start of first block, size=84 bytes) 

37 ('hdr', 'S4'), # 'CORD' 

38 ('9int', ('i4', 9)), # 9 ints, mostly 0 

39 ('timestep', 'f4'), # timestep (float32) 

40 ('10int', ('i4', 10)), # 10 ints, mostly 0, last is 24 

41 ('blk0-1', 'i4'), # 84 

42 ('blk1-0', 'i4'), # 164 

43 ('ntitle', 'i4'), # 2 

44 ('remark1', 'S80'), # remark1 

45 ('remark2', 'S80'), # remark2 

46 ('blk1-1', 'i4'), # 164 

47 ('blk2-0', 'i4'), # 4 (4 bytes = int32) 

48 ('natoms', 'i4'), # natoms (int32) 

49 ('blk2-1', 'i4'), # 4 

50] 

51 

52_HEADER_DTYPE = np.dtype(_HEADER_TYPES) 

53 

54 

55def _bytes_per_timestep(natoms): 

56 return (4 + 6 * 8 + 7 * 4 + 3 * 4 * natoms) 

57 

58 

59def _read_metainfo(fileobj): 

60 if not hasattr(fileobj, 'seek'): 

61 raise TypeError("You should have passed a fileobject opened in binary " 

62 "mode, it seems you did not.") 

63 fileobj.seek(0) 

64 header = np.fromfile(fileobj, _HEADER_DTYPE, 1)[0] 

65 natoms = header['natoms'] 

66 remark1 = str(header['remark1']) # taken from CP2Ks source/motion_utils.F 

67 if 'CP2K' not in remark1: 

68 raise ValueError("Header should contain mention of CP2K, are you sure " 

69 "this file was created by CP2K?") 

70 

71 # dtype for fromfile: nStep times dtype of a timestep data block 

72 dtype = np.dtype([('x0', 'i4'), 

73 ('x1', 'f8', (6,)), 

74 ('x2', 'i4', (2,)), 

75 ('x3', 'f4', (natoms,)), 

76 ('x4', 'i4', (2,)), 

77 ('x5', 'f4', (natoms,)), 

78 ('x6', 'i4', (2,)), 

79 ('x7', 'f4', (natoms,)), 

80 ('x8', 'i4')]) 

81 

82 fd_pos = fileobj.tell() 

83 header_end = fd_pos 

84 # seek to end 

85 fileobj.seek(0, os.SEEK_END) 

86 # number of bytes between fd_pos and end 

87 fd_rest = fileobj.tell() - fd_pos 

88 # reset to pos after header 

89 fileobj.seek(fd_pos) 

90 # calculate nstep: fd_rest / bytes_per_timestep 

91 # 4 - initial 48 

92 # 6*8 - cryst_const_dcd 

93 # 7*4 - markers between x,y,z and at the end of the block 

94 # 3*4*natoms - float32 cartesian coords 

95 nsteps = fd_rest / _bytes_per_timestep(natoms) 

96 if fd_rest % _bytes_per_timestep(natoms) != 0: 

97 raise ValueError("Calculated number of steps is not int, " 

98 "cannot read file") 

99 nsteps = int(nsteps) 

100 return dtype, natoms, nsteps, header_end 

101 

102 

103class DCDChunk: 

104 def __init__(self, chunk, dtype, natoms, symbols, aligned): 

105 self.chunk = chunk 

106 self.dtype = dtype 

107 self.natoms = natoms 

108 self.symbols = symbols 

109 self.aligned = aligned 

110 

111 def build(self): 

112 """Convert unprocessed chunk into Atoms.""" 

113 return _read_cp2k_dcd_frame(self.chunk, self.dtype, self.natoms, 

114 self.symbols, self.aligned) 

115 

116 

117def idcdchunks(fd, ref_atoms, aligned): 

118 """Yield unprocessed chunks for each image.""" 

119 if ref_atoms: 

120 symbols = ref_atoms.get_chemical_symbols() 

121 else: 

122 symbols = None 

123 dtype, natoms, nsteps, header_end = _read_metainfo(fd) 

124 bytes_per_step = _bytes_per_timestep(natoms) 

125 fd.seek(header_end) 

126 for i in range(nsteps): 

127 fd.seek(bytes_per_step * i + header_end) 

128 yield DCDChunk(fd, dtype, natoms, symbols, aligned) 

129 

130 

131class DCDImageIterator: 

132 """""" 

133 

134 def __init__(self, ichunks): 

135 self.ichunks = ichunks 

136 

137 def __call__(self, fd, indices=-1, ref_atoms=None, aligned=False): 

138 self.ref_atoms = ref_atoms 

139 self.aligned = aligned 

140 if not hasattr(indices, 'start'): 

141 if indices < 0: 

142 indices = slice(indices - 1, indices) 

143 else: 

144 indices = slice(indices, indices + 1) 

145 

146 for chunk in self._getslice(fd, indices): 

147 yield chunk.build() 

148 

149 def _getslice(self, fd, indices): 

150 try: 

151 iterator = islice(self.ichunks(fd, self.ref_atoms, self.aligned), 

152 indices.start, indices.stop, indices.step) 

153 except ValueError: 

154 # Negative indices. Adjust slice to positive numbers. 

155 dtype, natoms, nsteps, header_end = _read_metainfo(fd) 

156 indices_tuple = indices.indices(nsteps + 1) 

157 iterator = islice(self.ichunks(fd, self.ref_atoms, self.aligned), 

158 *indices_tuple) 

159 return iterator 

160 

161 

162iread_cp2k_dcd = DCDImageIterator(idcdchunks) 

163 

164 

165def read_cp2k_dcd(fileobj, index=-1, ref_atoms=None, aligned=False): 

166 """ Read a DCD file created by CP2K. 

167 

168 To yield one Atoms object at a time use ``iread_cp2k_dcd``. 

169 

170 Note: Other programs use other formats, they are probably not compatible. 

171 

172 If ref_atoms is not given, all atoms will have symbol 'X'. 

173 

174 To make sure that this is a dcd file generated with the *DCD_ALIGNED_CELL* key 

175 in the CP2K input, you need to set ``aligned`` to *True* to get cell information. 

176 Make sure you do not set it otherwise, the cell will not match the atomic coordinates! 

177 See the following url for details: 

178 https://groups.google.com/forum/#!searchin/cp2k/Outputting$20cell$20information$20and$20fractional$20coordinates%7Csort:relevance/cp2k/72MhYykrSrQ/5c9Jaw7S9yQJ. 

179 """ # noqa: E501 

180 dtype, natoms, nsteps, header_end = _read_metainfo(fileobj) 

181 bytes_per_timestep = _bytes_per_timestep(natoms) 

182 if ref_atoms: 

183 symbols = ref_atoms.get_chemical_symbols() 

184 else: 

185 symbols = ['X' for i in range(natoms)] 

186 if natoms != len(symbols): 

187 raise ValueError("Length of ref_atoms does not match natoms " 

188 "from dcd file") 

189 trbl = index2range(index, nsteps) 

190 

191 for index in trbl: 

192 frame_pos = int(header_end + index * bytes_per_timestep) 

193 fileobj.seek(frame_pos) 

194 yield _read_cp2k_dcd_frame(fileobj, dtype, natoms, symbols, 

195 aligned=aligned) 

196 

197 

198def _read_cp2k_dcd_frame(fileobj, dtype, natoms, symbols, aligned=False): 

199 arr = np.fromfile(fileobj, dtype, 1) 

200 cryst_const = np.empty(6, dtype=np.float64) 

201 cryst_const[0] = arr['x1'][0, 0] 

202 cryst_const[1] = arr['x1'][0, 2] 

203 cryst_const[2] = arr['x1'][0, 5] 

204 cryst_const[3] = arr['x1'][0, 4] 

205 cryst_const[4] = arr['x1'][0, 3] 

206 cryst_const[5] = arr['x1'][0, 1] 

207 coords = np.empty((natoms, 3), dtype=np.float32) 

208 coords[..., 0] = arr['x3'][0, ...] 

209 coords[..., 1] = arr['x5'][0, ...] 

210 coords[..., 2] = arr['x7'][0, ...] 

211 assert len(coords) == len(symbols) 

212 if aligned: 

213 # convention of the cell is (see cp2ks src/particle_methods.F): 

214 # A is in x 

215 # B lies in xy plane 

216 # luckily this is also the ASE convention for Atoms.set_cell() 

217 atoms = Atoms(symbols, coords, cell=cryst_const, pbc=True) 

218 else: 

219 atoms = Atoms(symbols, coords) 

220 return atoms 

221 

222 

223def read_cp2k_restart(fileobj): 

224 """Read Atoms and Cell from Restart File. 

225 

226 Reads the elements, coordinates and cell information from the 

227 '&SUBSYS' section of a CP2K restart file. 

228 

229 Tries to convert atom names to elements, if this fails element is set to X. 

230 

231 Returns an Atoms object. 

232 """ 

233 

234 def _parse_section(inp): 

235 """Helper to parse structure to nested dict""" 

236 ret = {'content': []} 

237 while inp: 

238 line = inp.readline().strip() 

239 if line.startswith('&END'): 

240 return ret 

241 elif line.startswith('&'): 

242 key = line.replace('&', '') 

243 ret[key] = _parse_section(inp) 

244 else: 

245 ret['content'].append(line) 

246 return ret 

247 

248 def _fast_forward_to(fileobj, section_header): 

249 """Helper to forward to a section""" 

250 found = False 

251 while fileobj: 

252 line = fileobj.readline() 

253 if section_header in line: 

254 found = True 

255 break 

256 if not found: 

257 raise RuntimeError("No {:} section found!".format(section_header)) 

258 

259 def _read_cell(data): 

260 """Helper to read cell data, returns cell and pbc""" 

261 cell = None 

262 pbc = [False, False, False] 

263 if 'CELL' in data: 

264 content = data['CELL']['content'] 

265 cell = Cell([[0, 0, 0] for i in range(3)]) 

266 char2idx = {'A ': 0, 'B ': 1, 'C ': 2} 

267 for line in content: 

268 # lines starting with A/B/C<whitespace> have cell 

269 if line[:2] in char2idx: 

270 idx = char2idx[line[:2]] 

271 cell[idx] = [float(x) for x in line.split()[1:]] 

272 pbc[idx] = True 

273 if not set([len(v) for v in cell]) == {3}: 

274 raise RuntimeError("Bad Cell Definition found.") 

275 return cell, pbc 

276 

277 def _read_geometry(content): 

278 """Helper to read geometry, returns a list of Atoms""" 

279 atom_list = [] 

280 for entry in content: 

281 entry = entry.split() 

282 # Get letters for element symbol 

283 el = [char.lower() for char in entry[0] if char.isalpha()] 

284 el = "".join(el).capitalize() 

285 # Get positions 

286 pos = [float(x) for x in entry[1:4]] 

287 if el in atomic_numbers.keys(): 

288 atom_list.append(Atom(el, pos)) 

289 else: 

290 atom_list.append(Atom('X', pos)) 

291 return atom_list 

292 

293 # fast forward to &SUBSYS section 

294 _fast_forward_to(fileobj, '&SUBSYS') 

295 # parse sections into dictionary 

296 data = _parse_section(fileobj) 

297 # look for &CELL 

298 cell, pbc = _read_cell(data) 

299 # get the atoms 

300 atom_list = _read_geometry(data['COORD']['content']) 

301 return Atoms(atom_list, cell=cell, pbc=pbc)