Coverage for /builds/debichem-team/python-ase/ase/io/castep/castep

1import io

2import re

3import warnings

4from collections import defaultdict

5from typing import Any, Dict, List, Optional

7import numpy as np

9from ase import Atoms, units

10from ase.calculators.singlepoint import SinglePointCalculator

11from ase.constraints import FixAtoms, FixCartesian, FixConstraint

12from ase.io import ParseError

13from ase.utils import reader, string2index

16@reader

17def read_castep_castep(fd, index=-1):

18 """Read a .castep file and returns an Atoms object.

20 The calculator information will be stored in the calc attribute.

22 Notes

23 -----

24 This routine will return an atom ordering as found within the castep file.

25 This means that the species will be ordered by ascending atomic numbers.

26 The atoms witin a species are ordered as given in the original cell file.

28 """

29 # look for last result, if several CASTEP run are appended

30 line_start, line_end, end_found = _find_last_record(fd)

31 if not end_found:

32 raise ParseError(f'No regular end found in {fd.name} file.')

34 # These variables are finally assigned to `SinglePointCalculator`

35 # for backward compatibility with the `Castep` calculator.

36 cut_off_energy = None

37 kpoints = None

38 total_time = None

39 peak_memory = None

41 # jump back to the beginning to the last record

42 fd.seek(0)

43 for i, line in enumerate(fd):

44 if i == line_start:

45 break

47 # read header

48 parameters_header = _read_header(fd)

49 if 'cut_off_energy' in parameters_header:

50 cut_off_energy = parameters_header['cut_off_energy']

51 if 'basis_precision' in parameters_header:

52 del parameters_header['cut_off_energy'] # avoid conflict

54 markers_new_iteration = [

55 'BFGS: starting iteration',

56 'BFGS: improving iteration',

57 'Starting MD iteration',

58 ]

60 images = []

62 results = {}

63 constraints = []

64 species_pot = []

65 castep_warnings = []

66 for i, line in enumerate(fd):

68 if i > line_end:

69 break

71 if 'Number of kpoints used' in line:

72 kpoints = int(line.split('=')[-1].strip())

73 elif 'Unit Cell' in line:

74 lattice_real = _read_unit_cell(fd)

75 elif 'Cell Contents' in line:

76 for line in fd:

77 if 'Total number of ions in cell' in line:

78 n_atoms = int(line.split()[7])

79 if 'Total number of species in cell' in line:

80 int(line.split()[7])

81 fields = line.split()

82 if len(fields) == 0:

83 break

84 elif 'Fractional coordinates of atoms' in line:

85 species, custom_species, positions_frac = \

86 _read_fractional_coordinates(fd, n_atoms)

87 elif 'Files used for pseudopotentials' in line:

88 for line in fd:

89 line = fd.readline()

90 if 'Pseudopotential generated on-the-fly' in line:

91 continue

92 fields = line.split()

93 if len(fields) == 2:

94 species_pot.append(fields)

95 else:

96 break

97 elif 'k-Points For BZ Sampling' in line:

98 # TODO: generalize for non-Monkhorst Pack case

99 # (i.e. kpoint lists) -

100 # kpoints_offset cannot be read this way and

101 # is hence always set to None

102 for line in fd:

103 if not line.strip():

104 break

105 if 'MP grid size for SCF calculation' in line:

106 # kpoints = ' '.join(line.split()[-3:])

107 # self.kpoints_mp_grid = kpoints

108 # self.kpoints_mp_offset = '0. 0. 0.'

109 # not set here anymore because otherwise

110 # two calculator objects go out of sync

111 # after each calculation triggering unnecessary

112 # recalculation

113 break

114

115 elif 'Final energy' in line:

116 key = 'energy_without_dispersion_correction'

117 results[key] = float(line.split()[-2])

118 elif 'Final free energy' in line:

119 key = 'free_energy_without_dispersion_correction'

120 results[key] = float(line.split()[-2])

121 elif 'NB est. 0K energy' in line:

122 key = 'energy_zero_without_dispersion_correction'

123 results[key] = float(line.split()[-2])

124

125 # Add support for dispersion correction

126 # filtering due to SEDC is done in get_potential_energy

127 elif 'Dispersion corrected final energy' in line:

128 key = 'energy_with_dispersion_correlation'

129 results[key] = float(line.split()[-2])

130 elif 'Dispersion corrected final free energy' in line:

131 key = 'free_energy_with_dispersion_correlation'

132 results[key] = float(line.split()[-2])

133 elif 'NB dispersion corrected est. 0K energy' in line:

134 key = 'energy_zero_with_dispersion_correlation'

135 results[key] = float(line.split()[-2])

136

137 # check if we had a finite basis set correction

138 elif 'Total energy corrected for finite basis set' in line:

139 key = 'energy_with_finite_basis_set_correction'

140 results[key] = float(line.split()[-2])

141

142 # ******************** Forces *********************

143 # ************** Symmetrised Forces ***************

144 # ******************** Constrained Forces ********************

145 # ******************* Unconstrained Forces *******************

146 elif re.search(r'\**.* Forces \**', line):

147 forces, constraints = _read_forces(fd, n_atoms)

148 results['forces'] = np.array(forces)

149

150 # ***************** Stress Tensor *****************

151 # *********** Symmetrised Stress Tensor ***********

152 elif re.search(r'\**.* Stress Tensor \**', line):

153 results.update(_read_stress(fd))

154

155 elif any(_ in line for _ in markers_new_iteration):

156 _add_atoms(

157 images,

158 lattice_real,

159 species,

160 custom_species,

161 positions_frac,

162 constraints,

163 results,

164 )

165 # reset for the next step

166 lattice_real = None

167 species = None

168 positions_frac = None

169 results = {}

170 constraints = []

171

172 # extract info from the Mulliken analysis

173 elif 'Atomic Populations' in line:

174 results.update(_read_mulliken_charges(fd))

175

176 # extract detailed Hirshfeld analysis (iprint > 1)

177 elif 'Hirshfeld total electronic charge (e)' in line:

178 results.update(_read_hirshfeld_details(fd, n_atoms))

179

180 elif 'Hirshfeld Analysis' in line:

181 results.update(_read_hirshfeld_charges(fd))

182

183 # There is actually no good reason to get out of the loop

184 # already at this point... or do I miss something?

185 # elif 'BFGS: Final Configuration:' in line:

186 # break

187 elif 'warn' in line.lower():

188 castep_warnings.append(line)

189

190 # fetch some last info

191 elif 'Total time' in line:

192 pattern = r'.*=\s*([\d\.]+) s'

193 total_time = float(re.search(pattern, line).group(1))

194

195 elif 'Peak Memory Use' in line:

196 pattern = r'.*=\s*([\d]+) kB'

197 peak_memory = int(re.search(pattern, line).group(1))

198

199 # add the last image

200 _add_atoms(

201 images,

202 lattice_real,

203 species,

204 custom_species,

205 positions_frac,

206 constraints,

207 results,

208 )

209

210 for atoms in images:

211 # these variables are temporarily assigned to `SinglePointCalculator`

212 # to be assigned to the `Castep` calculator for backward compatibility

213 atoms.calc._cut_off_energy = cut_off_energy

214 atoms.calc._kpoints = kpoints

215 atoms.calc._species_pot = species_pot

216 atoms.calc._total_time = total_time

217 atoms.calc._peak_memory = peak_memory

218 atoms.calc._parameters_header = parameters_header

219

220 if castep_warnings:

221 warnings.warn('WARNING: .castep file contains warnings')

222 for warning in castep_warnings:

223 warnings.warn(warning)

224

225 if isinstance(index, str):

226 index = string2index(index)

227

228 return images[index]

229

230

231def _find_last_record(fd):

232 """Find the last record of the .castep file.

233

234 Returns

235 -------

236 start : int

237 Line number of the first line of the last record.

238 end : int

239 Line number of the last line of the last record.

240 end_found : bool

241 True if the .castep file ends as expected.

242

243 """

244 start = -1

245 for i, line in enumerate(fd):

246 if (('Welcome' in line or 'Materials Studio' in line)

247 and 'CASTEP' in line):

248 start = i

249

250 if start < 0:

251 warnings.warn(

252 f'Could not find CASTEP label in result file: {fd.name}.'

253 ' Are you sure this is a .castep file?'

254 )

255 return None

256

257 # search for regular end of file

258 end_found = False

259 # start to search from record beginning from the back

260 # and see if

261 end = -1

262 fd.seek(0)

263 for i, line in enumerate(fd):

264 if i < start:

265 continue

266 if 'Finalisation time =' in line:

267 end_found = True

268 end = i

269 break

270

271 return (start, end, end_found)

272

273

274def _read_header(out: io.TextIOBase):

275 """Read the header blocks from a .castep file.

276

277 Returns

278 -------

279 parameters : dict

280 Dictionary storing keys and values of a .param file.

281 """

282 def _parse_on_off(_: str):

283 return {'on': True, 'off': False}[_]

284

285 read_title = False

286 parameters: Dict[str, Any] = {}

287 for line in out:

288 if len(line) == 0: # end of file

289 break

290 if re.search(r'^\s*\*+$', line) and read_title: # end of header

291 break

292

293 if re.search(r'\**.* Title \**', line):

294 read_title = True

295

296 # General Parameters

297

298 elif 'output verbosity' in line:

299 parameters['iprint'] = int(line.split()[-1][1])

300 elif re.match(r'\stype of calculation\s*:', line):

301 parameters['task'] = {

302 'single point energy': 'SinglePoint',

303 'geometry optimization': 'GeometryOptimization',

304 'band structure': 'BandStructure',

305 'molecular dynamics': 'MolecularDynamics',

306 'optical properties': 'Optics',

307 'phonon calculation': 'Phonon',

308 'E-field calculation': 'Efield',

309 'Phonon followed by E-field': 'Phonon+Efield',

310 'transition state search': 'TransitionStateSearch',

311 'Magnetic Resonance': 'MagRes',

312 'Core level spectra': 'Elnes',

313 'Electronic Spectroscopy': 'ElectronicSpectroscopy',

314 }[line.split(':')[-1].strip()]

315 elif 'stress calculation' in line:

316 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])

317 elif 'calculation limited to maximum' in line:

318 parameters['run_time'] = float(line.split()[-2])

319 elif re.match(r'\soptimization strategy\s*:', line):

320 parameters['opt_strategy'] = {

321 'maximize speed(+++)': 'Speed',

322 'minimize memory(---)': 'Memory',

323 'balance speed and memory': 'Default',

324 }[line.split(':')[-1].strip()]

325

326 # Exchange-Correlation Parameters

327

328 elif re.match(r'\susing functional\s*:', line):

329 parameters['xc_functional'] = {

330 'Local Density Approximation': 'LDA',

331 'Perdew Wang (1991)': 'PW91',

332 'Perdew Burke Ernzerhof': 'PBE',

333 'revised Perdew Burke Ernzerhof': 'RPBE',

334 'PBE with Wu-Cohen exchange': 'WC',

335 'PBE for solids (2008)': 'PBESOL',

336 'Hartree-Fock': 'HF',

337 'Hartree-Fock +': 'HF-LDA',

338 'Screened Hartree-Fock': 'sX',

339 'Screened Hartree-Fock + ': 'sX-LDA',

340 'hybrid PBE0': 'PBE0',

341 'hybrid B3LYP': 'B3LYP',

342 'hybrid HSE03': 'HSE03',

343 'hybrid HSE06': 'HSE06',

344 'RSCAN': 'RSCAN',

345 }[line.split(':')[-1].strip()]

346 elif 'DFT+D: Semi-empirical dispersion correction' in line:

347 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])

348 elif 'SEDC with' in line:

349 parameters['sedc_scheme'] = {

350 'OBS correction scheme': 'OBS',

351 'G06 correction scheme': 'G06',

352 'D3 correction scheme': 'D3',

353 'D3(BJ) correction scheme': 'D3-BJ',

354 'D4 correction scheme': 'D4',

355 'JCHS correction scheme': 'JCHS',

356 'TS correction scheme': 'TS',

357 'TSsurf correction scheme': 'TSSURF',

358 'TS+SCS correction scheme': 'TSSCS',

359 'aperiodic TS+SCS correction scheme': 'ATSSCS',

360 'aperiodic MBD@SCS method': 'AMBD',

361 'MBD@SCS method': 'MBD',

362 'aperiodic MBD@rsSCS method': 'AMBD*',

363 'MBD@rsSCS method': 'MBD*',

364 'XDM correction scheme': 'XDM',

365 }[line.split(':')[-1].strip()]

366

367 # Basis Set Parameters

368

369 elif 'basis set accuracy' in line:

370 parameters['basis_precision'] = line.split()[-1]

371 elif 'plane wave basis set cut-off' in line:

372 parameters['cut_off_energy'] = float(line.split()[-2])

373 elif re.match(r'\sfinite basis set correction\s*:', line):

374 parameters['finite_basis_corr'] = {

375 'none': 0,

376 'manual': 1,

377 'automatic': 2,

378 }[line.split()[-1]]

379

380 # Electronic Parameters

381

382 elif 'treating system as spin-polarized' in line:

383 parameters['spin_polarized'] = True

384

385 # Electronic Minimization Parameters

386

387 elif 'Treating system as non-metallic' in line:

388 parameters['fix_occupancy'] = True

389 elif 'total energy / atom convergence tol.' in line:

390 parameters['elec_energy_tol'] = float(line.split()[-2])

391 elif 'convergence tolerance window' in line:

392 parameters['elec_convergence_win'] = int(line.split()[-2])

393 elif 'max. number of SCF cycles:' in line:

394 parameters['max_scf_cycles'] = float(line.split()[-1])

395 elif 'dump wavefunctions every' in line:

396 parameters['num_dump_cycles'] = float(line.split()[-3])

397

398 # Density Mixing Parameters

399

400 elif 'density-mixing scheme' in line:

401 parameters['mixing_scheme'] = line.split()[-1]

402

403 return parameters

404

405

406def _read_unit_cell(out: io.TextIOBase):

407 """Read a Unit Cell block from a .castep file."""

408 for line in out:

409 fields = line.split()

410 if len(fields) == 6:

411 break

412 lattice_real = []

413 for _ in range(3):

414 lattice_real.append([float(f) for f in fields[0:3]])

415 line = out.readline()

416 fields = line.split()

417 return np.array(lattice_real)

418

419

420def _read_forces(out: io.TextIOBase, n_atoms: int):

421 """Read a block for atomic forces from a .castep file."""

422 constraints: List[FixConstraint] = []

423 forces = []

424 for line in out:

425 fields = line.split()

426 if len(fields) == 7:

427 break

428 for n in range(n_atoms):

429 consd = np.array([0, 0, 0])

430 fxyz = [0.0, 0.0, 0.0]

431 for i, force_component in enumerate(fields[-4:-1]):

432 if force_component.count("(cons'd)") > 0:

433 consd[i] = 1

434 # remove constraint labels in force components

435 fxyz[i] = float(force_component.replace("(cons'd)", ''))

436 if consd.all():

437 constraints.append(FixAtoms(n))

438 elif consd.any():

439 constraints.append(FixCartesian(n, consd))

440 forces.append(fxyz)

441 line = out.readline()

442 fields = line.split()

443 return forces, constraints

444

445

446def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):

447 """Read fractional coordinates from a .castep file."""

448 species: List[str] = []

449 custom_species: Optional[List[str]] = None # A CASTEP special thing

450 positions_frac: List[List[float]] = []

451 for line in out:

452 fields = line.split()

453 if len(fields) == 7:

454 break

455 for _ in range(n_atoms):

456 spec_custom = fields[1].split(':', 1)

457 elem = spec_custom[0]

458 if len(spec_custom) > 1 and custom_species is None:

459 # Add it to the custom info!

460 custom_species = list(species)

461 species.append(elem)

462 if custom_species is not None:

463 custom_species.append(fields[1])

464 positions_frac.append([float(s) for s in fields[3:6]])

465 line = out.readline()

466 fields = line.split()

467 return species, custom_species, positions_frac

468

469

470def _read_stress(out: io.TextIOBase):

471 """Read a block for the stress tensor from a .castep file."""

472 for line in out:

473 fields = line.split()

474 if len(fields) == 6:

475 break

476 results = {}

477 stress = []

478 for _ in range(3):

479 stress.append([float(s) for s in fields[2:5]])

480 line = out.readline()

481 fields = line.split()

482 # stress in .castep file is given in GPa

483 results['stress'] = np.array(stress) * units.GPa

484 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]

485 line = out.readline()

486 if "Pressure:" in line:

487 results['pressure'] = float(

488 line.split()[-2]) * units.GPa # type: ignore[assignment]

489 return results

490

491

492def _add_atoms(

493 images,

494 lattice_real,

495 species,

496 custom_species,

497 positions_frac,

498 constraints,

499 results,

500):

501 # If all the lattice parameters are fixed,

502 # the `Unit Cell` block in the .castep file is not printed

503 # in every ionic step.

504 # The lattice paramters are therefore taken from the last step.

505 # This happens:

506 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`

507 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`

508 if lattice_real is None:

509 lattice_real = images[-1].cell.copy()

510

511 # for highly symmetric systems (where essentially only the

512 # stress is optimized, but the atomic positions) positions

513 # are only printed once.

514 if species is None:

515 species = images[-1].symbols

516 if positions_frac is None:

517 positions_frac = images[-1].get_scaled_positions()

518

519 _set_energy_and_free_energy(results)

520

521 atoms = Atoms(

522 species,

523 cell=lattice_real,

524 constraint=constraints,

525 pbc=True,

526 scaled_positions=positions_frac,

527 )

528 if custom_species is not None:

529 atoms.new_array(

530 'castep_custom_species',

531 np.array(custom_species),

532 )

533

534 atoms.calc = SinglePointCalculator(atoms)

535 atoms.calc.results = results

536

537 images.append(atoms)

538

539

540def _read_mulliken_charges(out: io.TextIOBase):

541 """Read a block for Mulliken charges from a .castep file."""

542 for i in range(3):

543 line = out.readline()

544 if i == 1:

545 spin_polarized = 'Spin' in line

546 results = defaultdict(list)

547 for line in out:

548 fields = line.split()

549 if len(fields) == 1:

550 break

551 if spin_polarized:

552 if len(fields) != 7: # due to CASTEP 18 outformat changes

553 results['charges'].append(float(fields[-2]))

554 results['magmoms'].append(float(fields[-1]))

555 else:

556 results['charges'].append(float(fields[-1]))

557 return {k: np.array(v) for k, v in results.items()}

558

559

560def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):

561 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""

562 results = defaultdict(list)

563 for _ in range(n_atoms):

564 for line in out:

565 if line.strip() == '':

566 break # end for each atom

567 if 'Hirshfeld / free atomic volume :' in line:

568 line = out.readline()

569 fields = line.split()

570 results['hirshfeld_volume_ratios'].append(float(fields[0]))

571 return {k: np.array(v) for k, v in results.items()}

572

573

574def _read_hirshfeld_charges(out: io.TextIOBase):

575 """Read a block for Hirshfeld charges from a .castep file."""

576 for i in range(3):

577 line = out.readline()

578 if i == 1:

579 spin_polarized = 'Spin' in line

580 results = defaultdict(list)

581 for line in out:

582 fields = line.split()

583 if len(fields) == 1:

584 break

585 if spin_polarized:

586 results['hirshfeld_charges'].append(float(fields[-2]))

587 results['hirshfeld_magmoms'].append(float(fields[-1]))

588 else:

589 results['hirshfeld_charges'].append(float(fields[-1]))

590 return {k: np.array(v) for k, v in results.items()}

591

592

593def _set_energy_and_free_energy(results: Dict[str, Any]):

594 """Set values referred to as `energy` and `free_energy`."""

595 if 'energy_with_dispersion_correction' in results:

596 suffix = '_with_dispersion_correction'

597 else:

598 suffix = '_without_dispersion_correction'

599

600 if 'free_energy' + suffix in results: # metallic

601 keye = 'energy_zero' + suffix

602 keyf = 'free_energy' + suffix

603 else: # non-metallic

604 # The finite basis set correction is applied to the energy at finite T

605 # (not the energy at 0 K). We should hence refer to the corrected value

606 # as `energy` only when the free energy is unavailable, i.e., only when

607 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.

608 if 'energy_with_finite_basis_set_correction' in results:

609 keye = 'energy_with_finite_basis_set_correction'

610 else:

611 keye = 'energy' + suffix

612 keyf = 'energy' + suffix

613

614 results['energy'] = results[keye]

615 results['free_energy'] = results[keyf]

Coverage for /builds/debichem-team/python-ase/ase/io/castep/castep_reader.py: 91.10%

337 statements