Coverage for /builds/debichem-team/python-ase/ase/io/castep/castep_reader.py: 91.10%

337 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1import io 

2import re 

3import warnings 

4from collections import defaultdict 

5from typing import Any, Dict, List, Optional 

6 

7import numpy as np 

8 

9from ase import Atoms, units 

10from ase.calculators.singlepoint import SinglePointCalculator 

11from ase.constraints import FixAtoms, FixCartesian, FixConstraint 

12from ase.io import ParseError 

13from ase.utils import reader, string2index 

14 

15 

16@reader 

17def read_castep_castep(fd, index=-1): 

18 """Read a .castep file and returns an Atoms object. 

19 

20 The calculator information will be stored in the calc attribute. 

21 

22 Notes 

23 ----- 

24 This routine will return an atom ordering as found within the castep file. 

25 This means that the species will be ordered by ascending atomic numbers. 

26 The atoms witin a species are ordered as given in the original cell file. 

27 

28 """ 

29 # look for last result, if several CASTEP run are appended 

30 line_start, line_end, end_found = _find_last_record(fd) 

31 if not end_found: 

32 raise ParseError(f'No regular end found in {fd.name} file.') 

33 

34 # These variables are finally assigned to `SinglePointCalculator` 

35 # for backward compatibility with the `Castep` calculator. 

36 cut_off_energy = None 

37 kpoints = None 

38 total_time = None 

39 peak_memory = None 

40 

41 # jump back to the beginning to the last record 

42 fd.seek(0) 

43 for i, line in enumerate(fd): 

44 if i == line_start: 

45 break 

46 

47 # read header 

48 parameters_header = _read_header(fd) 

49 if 'cut_off_energy' in parameters_header: 

50 cut_off_energy = parameters_header['cut_off_energy'] 

51 if 'basis_precision' in parameters_header: 

52 del parameters_header['cut_off_energy'] # avoid conflict 

53 

54 markers_new_iteration = [ 

55 'BFGS: starting iteration', 

56 'BFGS: improving iteration', 

57 'Starting MD iteration', 

58 ] 

59 

60 images = [] 

61 

62 results = {} 

63 constraints = [] 

64 species_pot = [] 

65 castep_warnings = [] 

66 for i, line in enumerate(fd): 

67 

68 if i > line_end: 

69 break 

70 

71 if 'Number of kpoints used' in line: 

72 kpoints = int(line.split('=')[-1].strip()) 

73 elif 'Unit Cell' in line: 

74 lattice_real = _read_unit_cell(fd) 

75 elif 'Cell Contents' in line: 

76 for line in fd: 

77 if 'Total number of ions in cell' in line: 

78 n_atoms = int(line.split()[7]) 

79 if 'Total number of species in cell' in line: 

80 int(line.split()[7]) 

81 fields = line.split() 

82 if len(fields) == 0: 

83 break 

84 elif 'Fractional coordinates of atoms' in line: 

85 species, custom_species, positions_frac = \ 

86 _read_fractional_coordinates(fd, n_atoms) 

87 elif 'Files used for pseudopotentials' in line: 

88 for line in fd: 

89 line = fd.readline() 

90 if 'Pseudopotential generated on-the-fly' in line: 

91 continue 

92 fields = line.split() 

93 if len(fields) == 2: 

94 species_pot.append(fields) 

95 else: 

96 break 

97 elif 'k-Points For BZ Sampling' in line: 

98 # TODO: generalize for non-Monkhorst Pack case 

99 # (i.e. kpoint lists) - 

100 # kpoints_offset cannot be read this way and 

101 # is hence always set to None 

102 for line in fd: 

103 if not line.strip(): 

104 break 

105 if 'MP grid size for SCF calculation' in line: 

106 # kpoints = ' '.join(line.split()[-3:]) 

107 # self.kpoints_mp_grid = kpoints 

108 # self.kpoints_mp_offset = '0. 0. 0.' 

109 # not set here anymore because otherwise 

110 # two calculator objects go out of sync 

111 # after each calculation triggering unnecessary 

112 # recalculation 

113 break 

114 

115 elif 'Final energy' in line: 

116 key = 'energy_without_dispersion_correction' 

117 results[key] = float(line.split()[-2]) 

118 elif 'Final free energy' in line: 

119 key = 'free_energy_without_dispersion_correction' 

120 results[key] = float(line.split()[-2]) 

121 elif 'NB est. 0K energy' in line: 

122 key = 'energy_zero_without_dispersion_correction' 

123 results[key] = float(line.split()[-2]) 

124 

125 # Add support for dispersion correction 

126 # filtering due to SEDC is done in get_potential_energy 

127 elif 'Dispersion corrected final energy' in line: 

128 key = 'energy_with_dispersion_correlation' 

129 results[key] = float(line.split()[-2]) 

130 elif 'Dispersion corrected final free energy' in line: 

131 key = 'free_energy_with_dispersion_correlation' 

132 results[key] = float(line.split()[-2]) 

133 elif 'NB dispersion corrected est. 0K energy' in line: 

134 key = 'energy_zero_with_dispersion_correlation' 

135 results[key] = float(line.split()[-2]) 

136 

137 # check if we had a finite basis set correction 

138 elif 'Total energy corrected for finite basis set' in line: 

139 key = 'energy_with_finite_basis_set_correction' 

140 results[key] = float(line.split()[-2]) 

141 

142 # ******************** Forces ********************* 

143 # ************** Symmetrised Forces *************** 

144 # ******************** Constrained Forces ******************** 

145 # ******************* Unconstrained Forces ******************* 

146 elif re.search(r'\**.* Forces \**', line): 

147 forces, constraints = _read_forces(fd, n_atoms) 

148 results['forces'] = np.array(forces) 

149 

150 # ***************** Stress Tensor ***************** 

151 # *********** Symmetrised Stress Tensor *********** 

152 elif re.search(r'\**.* Stress Tensor \**', line): 

153 results.update(_read_stress(fd)) 

154 

155 elif any(_ in line for _ in markers_new_iteration): 

156 _add_atoms( 

157 images, 

158 lattice_real, 

159 species, 

160 custom_species, 

161 positions_frac, 

162 constraints, 

163 results, 

164 ) 

165 # reset for the next step 

166 lattice_real = None 

167 species = None 

168 positions_frac = None 

169 results = {} 

170 constraints = [] 

171 

172 # extract info from the Mulliken analysis 

173 elif 'Atomic Populations' in line: 

174 results.update(_read_mulliken_charges(fd)) 

175 

176 # extract detailed Hirshfeld analysis (iprint > 1) 

177 elif 'Hirshfeld total electronic charge (e)' in line: 

178 results.update(_read_hirshfeld_details(fd, n_atoms)) 

179 

180 elif 'Hirshfeld Analysis' in line: 

181 results.update(_read_hirshfeld_charges(fd)) 

182 

183 # There is actually no good reason to get out of the loop 

184 # already at this point... or do I miss something? 

185 # elif 'BFGS: Final Configuration:' in line: 

186 # break 

187 elif 'warn' in line.lower(): 

188 castep_warnings.append(line) 

189 

190 # fetch some last info 

191 elif 'Total time' in line: 

192 pattern = r'.*=\s*([\d\.]+) s' 

193 total_time = float(re.search(pattern, line).group(1)) 

194 

195 elif 'Peak Memory Use' in line: 

196 pattern = r'.*=\s*([\d]+) kB' 

197 peak_memory = int(re.search(pattern, line).group(1)) 

198 

199 # add the last image 

200 _add_atoms( 

201 images, 

202 lattice_real, 

203 species, 

204 custom_species, 

205 positions_frac, 

206 constraints, 

207 results, 

208 ) 

209 

210 for atoms in images: 

211 # these variables are temporarily assigned to `SinglePointCalculator` 

212 # to be assigned to the `Castep` calculator for backward compatibility 

213 atoms.calc._cut_off_energy = cut_off_energy 

214 atoms.calc._kpoints = kpoints 

215 atoms.calc._species_pot = species_pot 

216 atoms.calc._total_time = total_time 

217 atoms.calc._peak_memory = peak_memory 

218 atoms.calc._parameters_header = parameters_header 

219 

220 if castep_warnings: 

221 warnings.warn('WARNING: .castep file contains warnings') 

222 for warning in castep_warnings: 

223 warnings.warn(warning) 

224 

225 if isinstance(index, str): 

226 index = string2index(index) 

227 

228 return images[index] 

229 

230 

231def _find_last_record(fd): 

232 """Find the last record of the .castep file. 

233 

234 Returns 

235 ------- 

236 start : int 

237 Line number of the first line of the last record. 

238 end : int 

239 Line number of the last line of the last record. 

240 end_found : bool 

241 True if the .castep file ends as expected. 

242 

243 """ 

244 start = -1 

245 for i, line in enumerate(fd): 

246 if (('Welcome' in line or 'Materials Studio' in line) 

247 and 'CASTEP' in line): 

248 start = i 

249 

250 if start < 0: 

251 warnings.warn( 

252 f'Could not find CASTEP label in result file: {fd.name}.' 

253 ' Are you sure this is a .castep file?' 

254 ) 

255 return None 

256 

257 # search for regular end of file 

258 end_found = False 

259 # start to search from record beginning from the back 

260 # and see if 

261 end = -1 

262 fd.seek(0) 

263 for i, line in enumerate(fd): 

264 if i < start: 

265 continue 

266 if 'Finalisation time =' in line: 

267 end_found = True 

268 end = i 

269 break 

270 

271 return (start, end, end_found) 

272 

273 

274def _read_header(out: io.TextIOBase): 

275 """Read the header blocks from a .castep file. 

276 

277 Returns 

278 ------- 

279 parameters : dict 

280 Dictionary storing keys and values of a .param file. 

281 """ 

282 def _parse_on_off(_: str): 

283 return {'on': True, 'off': False}[_] 

284 

285 read_title = False 

286 parameters: Dict[str, Any] = {} 

287 for line in out: 

288 if len(line) == 0: # end of file 

289 break 

290 if re.search(r'^\s*\*+$', line) and read_title: # end of header 

291 break 

292 

293 if re.search(r'\**.* Title \**', line): 

294 read_title = True 

295 

296 # General Parameters 

297 

298 elif 'output verbosity' in line: 

299 parameters['iprint'] = int(line.split()[-1][1]) 

300 elif re.match(r'\stype of calculation\s*:', line): 

301 parameters['task'] = { 

302 'single point energy': 'SinglePoint', 

303 'geometry optimization': 'GeometryOptimization', 

304 'band structure': 'BandStructure', 

305 'molecular dynamics': 'MolecularDynamics', 

306 'optical properties': 'Optics', 

307 'phonon calculation': 'Phonon', 

308 'E-field calculation': 'Efield', 

309 'Phonon followed by E-field': 'Phonon+Efield', 

310 'transition state search': 'TransitionStateSearch', 

311 'Magnetic Resonance': 'MagRes', 

312 'Core level spectra': 'Elnes', 

313 'Electronic Spectroscopy': 'ElectronicSpectroscopy', 

314 }[line.split(':')[-1].strip()] 

315 elif 'stress calculation' in line: 

316 parameters['calculate_stress'] = _parse_on_off(line.split()[-1]) 

317 elif 'calculation limited to maximum' in line: 

318 parameters['run_time'] = float(line.split()[-2]) 

319 elif re.match(r'\soptimization strategy\s*:', line): 

320 parameters['opt_strategy'] = { 

321 'maximize speed(+++)': 'Speed', 

322 'minimize memory(---)': 'Memory', 

323 'balance speed and memory': 'Default', 

324 }[line.split(':')[-1].strip()] 

325 

326 # Exchange-Correlation Parameters 

327 

328 elif re.match(r'\susing functional\s*:', line): 

329 parameters['xc_functional'] = { 

330 'Local Density Approximation': 'LDA', 

331 'Perdew Wang (1991)': 'PW91', 

332 'Perdew Burke Ernzerhof': 'PBE', 

333 'revised Perdew Burke Ernzerhof': 'RPBE', 

334 'PBE with Wu-Cohen exchange': 'WC', 

335 'PBE for solids (2008)': 'PBESOL', 

336 'Hartree-Fock': 'HF', 

337 'Hartree-Fock +': 'HF-LDA', 

338 'Screened Hartree-Fock': 'sX', 

339 'Screened Hartree-Fock + ': 'sX-LDA', 

340 'hybrid PBE0': 'PBE0', 

341 'hybrid B3LYP': 'B3LYP', 

342 'hybrid HSE03': 'HSE03', 

343 'hybrid HSE06': 'HSE06', 

344 'RSCAN': 'RSCAN', 

345 }[line.split(':')[-1].strip()] 

346 elif 'DFT+D: Semi-empirical dispersion correction' in line: 

347 parameters['sedc_apply'] = _parse_on_off(line.split()[-1]) 

348 elif 'SEDC with' in line: 

349 parameters['sedc_scheme'] = { 

350 'OBS correction scheme': 'OBS', 

351 'G06 correction scheme': 'G06', 

352 'D3 correction scheme': 'D3', 

353 'D3(BJ) correction scheme': 'D3-BJ', 

354 'D4 correction scheme': 'D4', 

355 'JCHS correction scheme': 'JCHS', 

356 'TS correction scheme': 'TS', 

357 'TSsurf correction scheme': 'TSSURF', 

358 'TS+SCS correction scheme': 'TSSCS', 

359 'aperiodic TS+SCS correction scheme': 'ATSSCS', 

360 'aperiodic MBD@SCS method': 'AMBD', 

361 'MBD@SCS method': 'MBD', 

362 'aperiodic MBD@rsSCS method': 'AMBD*', 

363 'MBD@rsSCS method': 'MBD*', 

364 'XDM correction scheme': 'XDM', 

365 }[line.split(':')[-1].strip()] 

366 

367 # Basis Set Parameters 

368 

369 elif 'basis set accuracy' in line: 

370 parameters['basis_precision'] = line.split()[-1] 

371 elif 'plane wave basis set cut-off' in line: 

372 parameters['cut_off_energy'] = float(line.split()[-2]) 

373 elif re.match(r'\sfinite basis set correction\s*:', line): 

374 parameters['finite_basis_corr'] = { 

375 'none': 0, 

376 'manual': 1, 

377 'automatic': 2, 

378 }[line.split()[-1]] 

379 

380 # Electronic Parameters 

381 

382 elif 'treating system as spin-polarized' in line: 

383 parameters['spin_polarized'] = True 

384 

385 # Electronic Minimization Parameters 

386 

387 elif 'Treating system as non-metallic' in line: 

388 parameters['fix_occupancy'] = True 

389 elif 'total energy / atom convergence tol.' in line: 

390 parameters['elec_energy_tol'] = float(line.split()[-2]) 

391 elif 'convergence tolerance window' in line: 

392 parameters['elec_convergence_win'] = int(line.split()[-2]) 

393 elif 'max. number of SCF cycles:' in line: 

394 parameters['max_scf_cycles'] = float(line.split()[-1]) 

395 elif 'dump wavefunctions every' in line: 

396 parameters['num_dump_cycles'] = float(line.split()[-3]) 

397 

398 # Density Mixing Parameters 

399 

400 elif 'density-mixing scheme' in line: 

401 parameters['mixing_scheme'] = line.split()[-1] 

402 

403 return parameters 

404 

405 

406def _read_unit_cell(out: io.TextIOBase): 

407 """Read a Unit Cell block from a .castep file.""" 

408 for line in out: 

409 fields = line.split() 

410 if len(fields) == 6: 

411 break 

412 lattice_real = [] 

413 for _ in range(3): 

414 lattice_real.append([float(f) for f in fields[0:3]]) 

415 line = out.readline() 

416 fields = line.split() 

417 return np.array(lattice_real) 

418 

419 

420def _read_forces(out: io.TextIOBase, n_atoms: int): 

421 """Read a block for atomic forces from a .castep file.""" 

422 constraints: List[FixConstraint] = [] 

423 forces = [] 

424 for line in out: 

425 fields = line.split() 

426 if len(fields) == 7: 

427 break 

428 for n in range(n_atoms): 

429 consd = np.array([0, 0, 0]) 

430 fxyz = [0.0, 0.0, 0.0] 

431 for i, force_component in enumerate(fields[-4:-1]): 

432 if force_component.count("(cons'd)") > 0: 

433 consd[i] = 1 

434 # remove constraint labels in force components 

435 fxyz[i] = float(force_component.replace("(cons'd)", '')) 

436 if consd.all(): 

437 constraints.append(FixAtoms(n)) 

438 elif consd.any(): 

439 constraints.append(FixCartesian(n, consd)) 

440 forces.append(fxyz) 

441 line = out.readline() 

442 fields = line.split() 

443 return forces, constraints 

444 

445 

446def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int): 

447 """Read fractional coordinates from a .castep file.""" 

448 species: List[str] = [] 

449 custom_species: Optional[List[str]] = None # A CASTEP special thing 

450 positions_frac: List[List[float]] = [] 

451 for line in out: 

452 fields = line.split() 

453 if len(fields) == 7: 

454 break 

455 for _ in range(n_atoms): 

456 spec_custom = fields[1].split(':', 1) 

457 elem = spec_custom[0] 

458 if len(spec_custom) > 1 and custom_species is None: 

459 # Add it to the custom info! 

460 custom_species = list(species) 

461 species.append(elem) 

462 if custom_species is not None: 

463 custom_species.append(fields[1]) 

464 positions_frac.append([float(s) for s in fields[3:6]]) 

465 line = out.readline() 

466 fields = line.split() 

467 return species, custom_species, positions_frac 

468 

469 

470def _read_stress(out: io.TextIOBase): 

471 """Read a block for the stress tensor from a .castep file.""" 

472 for line in out: 

473 fields = line.split() 

474 if len(fields) == 6: 

475 break 

476 results = {} 

477 stress = [] 

478 for _ in range(3): 

479 stress.append([float(s) for s in fields[2:5]]) 

480 line = out.readline() 

481 fields = line.split() 

482 # stress in .castep file is given in GPa 

483 results['stress'] = np.array(stress) * units.GPa 

484 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]] 

485 line = out.readline() 

486 if "Pressure:" in line: 

487 results['pressure'] = float( 

488 line.split()[-2]) * units.GPa # type: ignore[assignment] 

489 return results 

490 

491 

492def _add_atoms( 

493 images, 

494 lattice_real, 

495 species, 

496 custom_species, 

497 positions_frac, 

498 constraints, 

499 results, 

500): 

501 # If all the lattice parameters are fixed, 

502 # the `Unit Cell` block in the .castep file is not printed 

503 # in every ionic step. 

504 # The lattice paramters are therefore taken from the last step. 

505 # This happens: 

506 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE` 

507 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT` 

508 if lattice_real is None: 

509 lattice_real = images[-1].cell.copy() 

510 

511 # for highly symmetric systems (where essentially only the 

512 # stress is optimized, but the atomic positions) positions 

513 # are only printed once. 

514 if species is None: 

515 species = images[-1].symbols 

516 if positions_frac is None: 

517 positions_frac = images[-1].get_scaled_positions() 

518 

519 _set_energy_and_free_energy(results) 

520 

521 atoms = Atoms( 

522 species, 

523 cell=lattice_real, 

524 constraint=constraints, 

525 pbc=True, 

526 scaled_positions=positions_frac, 

527 ) 

528 if custom_species is not None: 

529 atoms.new_array( 

530 'castep_custom_species', 

531 np.array(custom_species), 

532 ) 

533 

534 atoms.calc = SinglePointCalculator(atoms) 

535 atoms.calc.results = results 

536 

537 images.append(atoms) 

538 

539 

540def _read_mulliken_charges(out: io.TextIOBase): 

541 """Read a block for Mulliken charges from a .castep file.""" 

542 for i in range(3): 

543 line = out.readline() 

544 if i == 1: 

545 spin_polarized = 'Spin' in line 

546 results = defaultdict(list) 

547 for line in out: 

548 fields = line.split() 

549 if len(fields) == 1: 

550 break 

551 if spin_polarized: 

552 if len(fields) != 7: # due to CASTEP 18 outformat changes 

553 results['charges'].append(float(fields[-2])) 

554 results['magmoms'].append(float(fields[-1])) 

555 else: 

556 results['charges'].append(float(fields[-1])) 

557 return {k: np.array(v) for k, v in results.items()} 

558 

559 

560def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int): 

561 """Read the Hirshfeld analysis when iprint > 1 from a .castep file.""" 

562 results = defaultdict(list) 

563 for _ in range(n_atoms): 

564 for line in out: 

565 if line.strip() == '': 

566 break # end for each atom 

567 if 'Hirshfeld / free atomic volume :' in line: 

568 line = out.readline() 

569 fields = line.split() 

570 results['hirshfeld_volume_ratios'].append(float(fields[0])) 

571 return {k: np.array(v) for k, v in results.items()} 

572 

573 

574def _read_hirshfeld_charges(out: io.TextIOBase): 

575 """Read a block for Hirshfeld charges from a .castep file.""" 

576 for i in range(3): 

577 line = out.readline() 

578 if i == 1: 

579 spin_polarized = 'Spin' in line 

580 results = defaultdict(list) 

581 for line in out: 

582 fields = line.split() 

583 if len(fields) == 1: 

584 break 

585 if spin_polarized: 

586 results['hirshfeld_charges'].append(float(fields[-2])) 

587 results['hirshfeld_magmoms'].append(float(fields[-1])) 

588 else: 

589 results['hirshfeld_charges'].append(float(fields[-1])) 

590 return {k: np.array(v) for k, v in results.items()} 

591 

592 

593def _set_energy_and_free_energy(results: Dict[str, Any]): 

594 """Set values referred to as `energy` and `free_energy`.""" 

595 if 'energy_with_dispersion_correction' in results: 

596 suffix = '_with_dispersion_correction' 

597 else: 

598 suffix = '_without_dispersion_correction' 

599 

600 if 'free_energy' + suffix in results: # metallic 

601 keye = 'energy_zero' + suffix 

602 keyf = 'free_energy' + suffix 

603 else: # non-metallic 

604 # The finite basis set correction is applied to the energy at finite T 

605 # (not the energy at 0 K). We should hence refer to the corrected value 

606 # as `energy` only when the free energy is unavailable, i.e., only when 

607 # FIX_OCCUPANCY : TRUE and thus no smearing is applied. 

608 if 'energy_with_finite_basis_set_correction' in results: 

609 keye = 'energy_with_finite_basis_set_correction' 

610 else: 

611 keye = 'energy' + suffix 

612 keyf = 'energy' + suffix 

613 

614 results['energy'] = results[keye] 

615 results['free_energy'] = results[keyf]