Coverage for /builds/debichem-team/python-ase/ase/io/cp2k.py: 89.68%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2Reader for CP2Ks DCD_ALIGNED_CELL format and restart files.

4Based on [pwtools](https://github.com/elcorto/pwtools).

5All information about the dcd format is taken from there.

6The way of reading it is also copied from pwtools.

7Thanks to Steve for agreeing to this.

9Some information also comes directly from the CP2K source,

10so if they decide to change anything this here might break.

12Some parts are adapted from the extxyz reader.

14Contributed by Patrick Melix <chemistry@melix.me>

15"""

17import numpy as np

18from itertools import islice

19import os

21from ase.atoms import Atoms, Atom

22from ase.cell import Cell

23from ase.io.formats import index2range

24from ase.data import atomic_numbers

26__all__ = ['read_cp2k_dcd', 'iread_cp2k_dcd', 'read_cp2k_restart']

28# DCD file header

29# (name, dtype, shape)

30# numpy dtypes:

31# i4 = int32

32# f4 = float32 (single precision)

33# f8 = float64 (double precision)

34# S80 = string of length 80 (80 chars)

35_HEADER_TYPES = [

36 ('blk0-0', 'i4'), # 84 (start of first block, size=84 bytes)

37 ('hdr', 'S4'), # 'CORD'

38 ('9int', ('i4', 9)), # 9 ints, mostly 0

39 ('timestep', 'f4'), # timestep (float32)

40 ('10int', ('i4', 10)), # 10 ints, mostly 0, last is 24

41 ('blk0-1', 'i4'), # 84

42 ('blk1-0', 'i4'), # 164

43 ('ntitle', 'i4'), # 2

44 ('remark1', 'S80'), # remark1

45 ('remark2', 'S80'), # remark2

46 ('blk1-1', 'i4'), # 164

47 ('blk2-0', 'i4'), # 4 (4 bytes = int32)

48 ('natoms', 'i4'), # natoms (int32)

49 ('blk2-1', 'i4'), # 4

50]

52_HEADER_DTYPE = np.dtype(_HEADER_TYPES)

55def _bytes_per_timestep(natoms):

56 return (4 + 6 * 8 + 7 * 4 + 3 * 4 * natoms)

59def _read_metainfo(fileobj):

60 if not hasattr(fileobj, 'seek'):

61 raise TypeError("You should have passed a fileobject opened in binary "

62 "mode, it seems you did not.")

63 fileobj.seek(0)

64 header = np.fromfile(fileobj, _HEADER_DTYPE, 1)[0]

65 natoms = header['natoms']

66 remark1 = str(header['remark1']) # taken from CP2Ks source/motion_utils.F

67 if 'CP2K' not in remark1:

68 raise ValueError("Header should contain mention of CP2K, are you sure "

69 "this file was created by CP2K?")

71 # dtype for fromfile: nStep times dtype of a timestep data block

72 dtype = np.dtype([('x0', 'i4'),

73 ('x1', 'f8', (6,)),

74 ('x2', 'i4', (2,)),

75 ('x3', 'f4', (natoms,)),

76 ('x4', 'i4', (2,)),

77 ('x5', 'f4', (natoms,)),

78 ('x6', 'i4', (2,)),

79 ('x7', 'f4', (natoms,)),

80 ('x8', 'i4')])

82 fd_pos = fileobj.tell()

83 header_end = fd_pos

84 # seek to end

85 fileobj.seek(0, os.SEEK_END)

86 # number of bytes between fd_pos and end

87 fd_rest = fileobj.tell() - fd_pos

88 # reset to pos after header

89 fileobj.seek(fd_pos)

90 # calculate nstep: fd_rest / bytes_per_timestep

91 # 4 - initial 48

92 # 6*8 - cryst_const_dcd

93 # 7*4 - markers between x,y,z and at the end of the block

94 # 3*4*natoms - float32 cartesian coords

95 nsteps = fd_rest / _bytes_per_timestep(natoms)

96 if fd_rest % _bytes_per_timestep(natoms) != 0:

97 raise ValueError("Calculated number of steps is not int, "

98 "cannot read file")

99 nsteps = int(nsteps)

100 return dtype, natoms, nsteps, header_end

101

102

103class DCDChunk:

104 def __init__(self, chunk, dtype, natoms, symbols, aligned):

105 self.chunk = chunk

106 self.dtype = dtype

107 self.natoms = natoms

108 self.symbols = symbols

109 self.aligned = aligned

110

111 def build(self):

112 """Convert unprocessed chunk into Atoms."""

113 return _read_cp2k_dcd_frame(self.chunk, self.dtype, self.natoms,

114 self.symbols, self.aligned)

115

116

117def idcdchunks(fd, ref_atoms, aligned):

118 """Yield unprocessed chunks for each image."""

119 if ref_atoms:

120 symbols = ref_atoms.get_chemical_symbols()

121 else:

122 symbols = None

123 dtype, natoms, nsteps, header_end = _read_metainfo(fd)

124 bytes_per_step = _bytes_per_timestep(natoms)

125 fd.seek(header_end)

126 for i in range(nsteps):

127 fd.seek(bytes_per_step * i + header_end)

128 yield DCDChunk(fd, dtype, natoms, symbols, aligned)

129

130

131class DCDImageIterator:

132 """"""

133

134 def __init__(self, ichunks):

135 self.ichunks = ichunks

136

137 def __call__(self, fd, indices=-1, ref_atoms=None, aligned=False):

138 self.ref_atoms = ref_atoms

139 self.aligned = aligned

140 if not hasattr(indices, 'start'):

141 if indices < 0:

142 indices = slice(indices - 1, indices)

143 else:

144 indices = slice(indices, indices + 1)

145

146 for chunk in self._getslice(fd, indices):

147 yield chunk.build()

148

149 def _getslice(self, fd, indices):

150 try:

151 iterator = islice(self.ichunks(fd, self.ref_atoms, self.aligned),

152 indices.start, indices.stop, indices.step)

153 except ValueError:

154 # Negative indices. Adjust slice to positive numbers.

155 dtype, natoms, nsteps, header_end = _read_metainfo(fd)

156 indices_tuple = indices.indices(nsteps + 1)

157 iterator = islice(self.ichunks(fd, self.ref_atoms, self.aligned),

158 *indices_tuple)

159 return iterator

160

161

162iread_cp2k_dcd = DCDImageIterator(idcdchunks)

163

164

165def read_cp2k_dcd(fileobj, index=-1, ref_atoms=None, aligned=False):

166 """ Read a DCD file created by CP2K.

167

168 To yield one Atoms object at a time use ``iread_cp2k_dcd``.

169

170 Note: Other programs use other formats, they are probably not compatible.

171

172 If ref_atoms is not given, all atoms will have symbol 'X'.

173

174 To make sure that this is a dcd file generated with the *DCD_ALIGNED_CELL* key

175 in the CP2K input, you need to set ``aligned`` to *True* to get cell information.

176 Make sure you do not set it otherwise, the cell will not match the atomic coordinates!

177 See the following url for details:

178 https://groups.google.com/forum/#!searchin/cp2k/Outputting$20cell$20information$20and$20fractional$20coordinates%7Csort:relevance/cp2k/72MhYykrSrQ/5c9Jaw7S9yQJ.

179 """ # noqa: E501

180 dtype, natoms, nsteps, header_end = _read_metainfo(fileobj)

181 bytes_per_timestep = _bytes_per_timestep(natoms)

182 if ref_atoms:

183 symbols = ref_atoms.get_chemical_symbols()

184 else:

185 symbols = ['X' for i in range(natoms)]

186 if natoms != len(symbols):

187 raise ValueError("Length of ref_atoms does not match natoms "

188 "from dcd file")

189 trbl = index2range(index, nsteps)

190

191 for index in trbl:

192 frame_pos = int(header_end + index * bytes_per_timestep)

193 fileobj.seek(frame_pos)

194 yield _read_cp2k_dcd_frame(fileobj, dtype, natoms, symbols,

195 aligned=aligned)

196

197

198def _read_cp2k_dcd_frame(fileobj, dtype, natoms, symbols, aligned=False):

199 arr = np.fromfile(fileobj, dtype, 1)

200 cryst_const = np.empty(6, dtype=np.float64)

201 cryst_const[0] = arr['x1'][0, 0]

202 cryst_const[1] = arr['x1'][0, 2]

203 cryst_const[2] = arr['x1'][0, 5]

204 cryst_const[3] = arr['x1'][0, 4]

205 cryst_const[4] = arr['x1'][0, 3]

206 cryst_const[5] = arr['x1'][0, 1]

207 coords = np.empty((natoms, 3), dtype=np.float32)

208 coords[..., 0] = arr['x3'][0, ...]

209 coords[..., 1] = arr['x5'][0, ...]

210 coords[..., 2] = arr['x7'][0, ...]

211 assert len(coords) == len(symbols)

212 if aligned:

213 # convention of the cell is (see cp2ks src/particle_methods.F):

214 # A is in x

215 # B lies in xy plane

216 # luckily this is also the ASE convention for Atoms.set_cell()

217 atoms = Atoms(symbols, coords, cell=cryst_const, pbc=True)

218 else:

219 atoms = Atoms(symbols, coords)

220 return atoms

221

222

223def read_cp2k_restart(fileobj):

224 """Read Atoms and Cell from Restart File.

225

226 Reads the elements, coordinates and cell information from the

227 '&SUBSYS' section of a CP2K restart file.

228

229 Tries to convert atom names to elements, if this fails element is set to X.

230

231 Returns an Atoms object.

232 """

233

234 def _parse_section(inp):

235 """Helper to parse structure to nested dict"""

236 ret = {'content': []}

237 while inp:

238 line = inp.readline().strip()

239 if line.startswith('&END'):

240 return ret

241 elif line.startswith('&'):

242 key = line.replace('&', '')

243 ret[key] = _parse_section(inp)

244 else:

245 ret['content'].append(line)

246 return ret

247

248 def _fast_forward_to(fileobj, section_header):

249 """Helper to forward to a section"""

250 found = False

251 while fileobj:

252 line = fileobj.readline()

253 if section_header in line:

254 found = True

255 break

256 if not found:

257 raise RuntimeError("No {:} section found!".format(section_header))

258

259 def _read_cell(data):

260 """Helper to read cell data, returns cell and pbc"""

261 cell = None

262 pbc = [False, False, False]

263 if 'CELL' in data:

264 content = data['CELL']['content']

265 cell = Cell([[0, 0, 0] for i in range(3)])

266 char2idx = {'A ': 0, 'B ': 1, 'C ': 2}

267 for line in content:

268 # lines starting with A/B/C<whitespace> have cell

269 if line[:2] in char2idx:

270 idx = char2idx[line[:2]]

271 cell[idx] = [float(x) for x in line.split()[1:]]

272 pbc[idx] = True

273 if not set([len(v) for v in cell]) == {3}:

274 raise RuntimeError("Bad Cell Definition found.")

275 return cell, pbc

276

277 def _read_geometry(content):

278 """Helper to read geometry, returns a list of Atoms"""

279 atom_list = []

280 for entry in content:

281 entry = entry.split()

282 # Get letters for element symbol

283 el = [char.lower() for char in entry[0] if char.isalpha()]

284 el = "".join(el).capitalize()

285 # Get positions

286 pos = [float(x) for x in entry[1:4]]

287 if el in atomic_numbers.keys():

288 atom_list.append(Atom(el, pos))

289 else:

290 atom_list.append(Atom('X', pos))

291 return atom_list

292

293 # fast forward to &SUBSYS section

294 _fast_forward_to(fileobj, '&SUBSYS')

295 # parse sections into dictionary

296 data = _parse_section(fileobj)

297 # look for &CELL

298 cell, pbc = _read_cell(data)

299 # get the atoms

300 atom_list = _read_geometry(data['COORD']['content'])

301 return Atoms(atom_list, cell=cell, pbc=pbc)

Coverage for /builds/debichem-team/python-ase/ase/io/cp2k.py : 89.68%

155 statements

Coverage for /builds/debichem-team/python-ase/ase/io/cp2k.py : 89.68%

155 statements 139 run 16 missing 0 excluded

155 statements