Coverage for /builds/debichem-team/python-ase/ase/io/castep/castep_reader.py: 91.10%
337 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1import io
2import re
3import warnings
4from collections import defaultdict
5from typing import Any, Dict, List, Optional
7import numpy as np
9from ase import Atoms, units
10from ase.calculators.singlepoint import SinglePointCalculator
11from ase.constraints import FixAtoms, FixCartesian, FixConstraint
12from ase.io import ParseError
13from ase.utils import reader, string2index
16@reader
17def read_castep_castep(fd, index=-1):
18 """Read a .castep file and returns an Atoms object.
20 The calculator information will be stored in the calc attribute.
22 Notes
23 -----
24 This routine will return an atom ordering as found within the castep file.
25 This means that the species will be ordered by ascending atomic numbers.
26 The atoms witin a species are ordered as given in the original cell file.
28 """
29 # look for last result, if several CASTEP run are appended
30 line_start, line_end, end_found = _find_last_record(fd)
31 if not end_found:
32 raise ParseError(f'No regular end found in {fd.name} file.')
34 # These variables are finally assigned to `SinglePointCalculator`
35 # for backward compatibility with the `Castep` calculator.
36 cut_off_energy = None
37 kpoints = None
38 total_time = None
39 peak_memory = None
41 # jump back to the beginning to the last record
42 fd.seek(0)
43 for i, line in enumerate(fd):
44 if i == line_start:
45 break
47 # read header
48 parameters_header = _read_header(fd)
49 if 'cut_off_energy' in parameters_header:
50 cut_off_energy = parameters_header['cut_off_energy']
51 if 'basis_precision' in parameters_header:
52 del parameters_header['cut_off_energy'] # avoid conflict
54 markers_new_iteration = [
55 'BFGS: starting iteration',
56 'BFGS: improving iteration',
57 'Starting MD iteration',
58 ]
60 images = []
62 results = {}
63 constraints = []
64 species_pot = []
65 castep_warnings = []
66 for i, line in enumerate(fd):
68 if i > line_end:
69 break
71 if 'Number of kpoints used' in line:
72 kpoints = int(line.split('=')[-1].strip())
73 elif 'Unit Cell' in line:
74 lattice_real = _read_unit_cell(fd)
75 elif 'Cell Contents' in line:
76 for line in fd:
77 if 'Total number of ions in cell' in line:
78 n_atoms = int(line.split()[7])
79 if 'Total number of species in cell' in line:
80 int(line.split()[7])
81 fields = line.split()
82 if len(fields) == 0:
83 break
84 elif 'Fractional coordinates of atoms' in line:
85 species, custom_species, positions_frac = \
86 _read_fractional_coordinates(fd, n_atoms)
87 elif 'Files used for pseudopotentials' in line:
88 for line in fd:
89 line = fd.readline()
90 if 'Pseudopotential generated on-the-fly' in line:
91 continue
92 fields = line.split()
93 if len(fields) == 2:
94 species_pot.append(fields)
95 else:
96 break
97 elif 'k-Points For BZ Sampling' in line:
98 # TODO: generalize for non-Monkhorst Pack case
99 # (i.e. kpoint lists) -
100 # kpoints_offset cannot be read this way and
101 # is hence always set to None
102 for line in fd:
103 if not line.strip():
104 break
105 if 'MP grid size for SCF calculation' in line:
106 # kpoints = ' '.join(line.split()[-3:])
107 # self.kpoints_mp_grid = kpoints
108 # self.kpoints_mp_offset = '0. 0. 0.'
109 # not set here anymore because otherwise
110 # two calculator objects go out of sync
111 # after each calculation triggering unnecessary
112 # recalculation
113 break
115 elif 'Final energy' in line:
116 key = 'energy_without_dispersion_correction'
117 results[key] = float(line.split()[-2])
118 elif 'Final free energy' in line:
119 key = 'free_energy_without_dispersion_correction'
120 results[key] = float(line.split()[-2])
121 elif 'NB est. 0K energy' in line:
122 key = 'energy_zero_without_dispersion_correction'
123 results[key] = float(line.split()[-2])
125 # Add support for dispersion correction
126 # filtering due to SEDC is done in get_potential_energy
127 elif 'Dispersion corrected final energy' in line:
128 key = 'energy_with_dispersion_correlation'
129 results[key] = float(line.split()[-2])
130 elif 'Dispersion corrected final free energy' in line:
131 key = 'free_energy_with_dispersion_correlation'
132 results[key] = float(line.split()[-2])
133 elif 'NB dispersion corrected est. 0K energy' in line:
134 key = 'energy_zero_with_dispersion_correlation'
135 results[key] = float(line.split()[-2])
137 # check if we had a finite basis set correction
138 elif 'Total energy corrected for finite basis set' in line:
139 key = 'energy_with_finite_basis_set_correction'
140 results[key] = float(line.split()[-2])
142 # ******************** Forces *********************
143 # ************** Symmetrised Forces ***************
144 # ******************** Constrained Forces ********************
145 # ******************* Unconstrained Forces *******************
146 elif re.search(r'\**.* Forces \**', line):
147 forces, constraints = _read_forces(fd, n_atoms)
148 results['forces'] = np.array(forces)
150 # ***************** Stress Tensor *****************
151 # *********** Symmetrised Stress Tensor ***********
152 elif re.search(r'\**.* Stress Tensor \**', line):
153 results.update(_read_stress(fd))
155 elif any(_ in line for _ in markers_new_iteration):
156 _add_atoms(
157 images,
158 lattice_real,
159 species,
160 custom_species,
161 positions_frac,
162 constraints,
163 results,
164 )
165 # reset for the next step
166 lattice_real = None
167 species = None
168 positions_frac = None
169 results = {}
170 constraints = []
172 # extract info from the Mulliken analysis
173 elif 'Atomic Populations' in line:
174 results.update(_read_mulliken_charges(fd))
176 # extract detailed Hirshfeld analysis (iprint > 1)
177 elif 'Hirshfeld total electronic charge (e)' in line:
178 results.update(_read_hirshfeld_details(fd, n_atoms))
180 elif 'Hirshfeld Analysis' in line:
181 results.update(_read_hirshfeld_charges(fd))
183 # There is actually no good reason to get out of the loop
184 # already at this point... or do I miss something?
185 # elif 'BFGS: Final Configuration:' in line:
186 # break
187 elif 'warn' in line.lower():
188 castep_warnings.append(line)
190 # fetch some last info
191 elif 'Total time' in line:
192 pattern = r'.*=\s*([\d\.]+) s'
193 total_time = float(re.search(pattern, line).group(1))
195 elif 'Peak Memory Use' in line:
196 pattern = r'.*=\s*([\d]+) kB'
197 peak_memory = int(re.search(pattern, line).group(1))
199 # add the last image
200 _add_atoms(
201 images,
202 lattice_real,
203 species,
204 custom_species,
205 positions_frac,
206 constraints,
207 results,
208 )
210 for atoms in images:
211 # these variables are temporarily assigned to `SinglePointCalculator`
212 # to be assigned to the `Castep` calculator for backward compatibility
213 atoms.calc._cut_off_energy = cut_off_energy
214 atoms.calc._kpoints = kpoints
215 atoms.calc._species_pot = species_pot
216 atoms.calc._total_time = total_time
217 atoms.calc._peak_memory = peak_memory
218 atoms.calc._parameters_header = parameters_header
220 if castep_warnings:
221 warnings.warn('WARNING: .castep file contains warnings')
222 for warning in castep_warnings:
223 warnings.warn(warning)
225 if isinstance(index, str):
226 index = string2index(index)
228 return images[index]
231def _find_last_record(fd):
232 """Find the last record of the .castep file.
234 Returns
235 -------
236 start : int
237 Line number of the first line of the last record.
238 end : int
239 Line number of the last line of the last record.
240 end_found : bool
241 True if the .castep file ends as expected.
243 """
244 start = -1
245 for i, line in enumerate(fd):
246 if (('Welcome' in line or 'Materials Studio' in line)
247 and 'CASTEP' in line):
248 start = i
250 if start < 0:
251 warnings.warn(
252 f'Could not find CASTEP label in result file: {fd.name}.'
253 ' Are you sure this is a .castep file?'
254 )
255 return None
257 # search for regular end of file
258 end_found = False
259 # start to search from record beginning from the back
260 # and see if
261 end = -1
262 fd.seek(0)
263 for i, line in enumerate(fd):
264 if i < start:
265 continue
266 if 'Finalisation time =' in line:
267 end_found = True
268 end = i
269 break
271 return (start, end, end_found)
274def _read_header(out: io.TextIOBase):
275 """Read the header blocks from a .castep file.
277 Returns
278 -------
279 parameters : dict
280 Dictionary storing keys and values of a .param file.
281 """
282 def _parse_on_off(_: str):
283 return {'on': True, 'off': False}[_]
285 read_title = False
286 parameters: Dict[str, Any] = {}
287 for line in out:
288 if len(line) == 0: # end of file
289 break
290 if re.search(r'^\s*\*+$', line) and read_title: # end of header
291 break
293 if re.search(r'\**.* Title \**', line):
294 read_title = True
296 # General Parameters
298 elif 'output verbosity' in line:
299 parameters['iprint'] = int(line.split()[-1][1])
300 elif re.match(r'\stype of calculation\s*:', line):
301 parameters['task'] = {
302 'single point energy': 'SinglePoint',
303 'geometry optimization': 'GeometryOptimization',
304 'band structure': 'BandStructure',
305 'molecular dynamics': 'MolecularDynamics',
306 'optical properties': 'Optics',
307 'phonon calculation': 'Phonon',
308 'E-field calculation': 'Efield',
309 'Phonon followed by E-field': 'Phonon+Efield',
310 'transition state search': 'TransitionStateSearch',
311 'Magnetic Resonance': 'MagRes',
312 'Core level spectra': 'Elnes',
313 'Electronic Spectroscopy': 'ElectronicSpectroscopy',
314 }[line.split(':')[-1].strip()]
315 elif 'stress calculation' in line:
316 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])
317 elif 'calculation limited to maximum' in line:
318 parameters['run_time'] = float(line.split()[-2])
319 elif re.match(r'\soptimization strategy\s*:', line):
320 parameters['opt_strategy'] = {
321 'maximize speed(+++)': 'Speed',
322 'minimize memory(---)': 'Memory',
323 'balance speed and memory': 'Default',
324 }[line.split(':')[-1].strip()]
326 # Exchange-Correlation Parameters
328 elif re.match(r'\susing functional\s*:', line):
329 parameters['xc_functional'] = {
330 'Local Density Approximation': 'LDA',
331 'Perdew Wang (1991)': 'PW91',
332 'Perdew Burke Ernzerhof': 'PBE',
333 'revised Perdew Burke Ernzerhof': 'RPBE',
334 'PBE with Wu-Cohen exchange': 'WC',
335 'PBE for solids (2008)': 'PBESOL',
336 'Hartree-Fock': 'HF',
337 'Hartree-Fock +': 'HF-LDA',
338 'Screened Hartree-Fock': 'sX',
339 'Screened Hartree-Fock + ': 'sX-LDA',
340 'hybrid PBE0': 'PBE0',
341 'hybrid B3LYP': 'B3LYP',
342 'hybrid HSE03': 'HSE03',
343 'hybrid HSE06': 'HSE06',
344 'RSCAN': 'RSCAN',
345 }[line.split(':')[-1].strip()]
346 elif 'DFT+D: Semi-empirical dispersion correction' in line:
347 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])
348 elif 'SEDC with' in line:
349 parameters['sedc_scheme'] = {
350 'OBS correction scheme': 'OBS',
351 'G06 correction scheme': 'G06',
352 'D3 correction scheme': 'D3',
353 'D3(BJ) correction scheme': 'D3-BJ',
354 'D4 correction scheme': 'D4',
355 'JCHS correction scheme': 'JCHS',
356 'TS correction scheme': 'TS',
357 'TSsurf correction scheme': 'TSSURF',
358 'TS+SCS correction scheme': 'TSSCS',
359 'aperiodic TS+SCS correction scheme': 'ATSSCS',
360 'aperiodic MBD@SCS method': 'AMBD',
361 'MBD@SCS method': 'MBD',
362 'aperiodic MBD@rsSCS method': 'AMBD*',
363 'MBD@rsSCS method': 'MBD*',
364 'XDM correction scheme': 'XDM',
365 }[line.split(':')[-1].strip()]
367 # Basis Set Parameters
369 elif 'basis set accuracy' in line:
370 parameters['basis_precision'] = line.split()[-1]
371 elif 'plane wave basis set cut-off' in line:
372 parameters['cut_off_energy'] = float(line.split()[-2])
373 elif re.match(r'\sfinite basis set correction\s*:', line):
374 parameters['finite_basis_corr'] = {
375 'none': 0,
376 'manual': 1,
377 'automatic': 2,
378 }[line.split()[-1]]
380 # Electronic Parameters
382 elif 'treating system as spin-polarized' in line:
383 parameters['spin_polarized'] = True
385 # Electronic Minimization Parameters
387 elif 'Treating system as non-metallic' in line:
388 parameters['fix_occupancy'] = True
389 elif 'total energy / atom convergence tol.' in line:
390 parameters['elec_energy_tol'] = float(line.split()[-2])
391 elif 'convergence tolerance window' in line:
392 parameters['elec_convergence_win'] = int(line.split()[-2])
393 elif 'max. number of SCF cycles:' in line:
394 parameters['max_scf_cycles'] = float(line.split()[-1])
395 elif 'dump wavefunctions every' in line:
396 parameters['num_dump_cycles'] = float(line.split()[-3])
398 # Density Mixing Parameters
400 elif 'density-mixing scheme' in line:
401 parameters['mixing_scheme'] = line.split()[-1]
403 return parameters
406def _read_unit_cell(out: io.TextIOBase):
407 """Read a Unit Cell block from a .castep file."""
408 for line in out:
409 fields = line.split()
410 if len(fields) == 6:
411 break
412 lattice_real = []
413 for _ in range(3):
414 lattice_real.append([float(f) for f in fields[0:3]])
415 line = out.readline()
416 fields = line.split()
417 return np.array(lattice_real)
420def _read_forces(out: io.TextIOBase, n_atoms: int):
421 """Read a block for atomic forces from a .castep file."""
422 constraints: List[FixConstraint] = []
423 forces = []
424 for line in out:
425 fields = line.split()
426 if len(fields) == 7:
427 break
428 for n in range(n_atoms):
429 consd = np.array([0, 0, 0])
430 fxyz = [0.0, 0.0, 0.0]
431 for i, force_component in enumerate(fields[-4:-1]):
432 if force_component.count("(cons'd)") > 0:
433 consd[i] = 1
434 # remove constraint labels in force components
435 fxyz[i] = float(force_component.replace("(cons'd)", ''))
436 if consd.all():
437 constraints.append(FixAtoms(n))
438 elif consd.any():
439 constraints.append(FixCartesian(n, consd))
440 forces.append(fxyz)
441 line = out.readline()
442 fields = line.split()
443 return forces, constraints
446def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):
447 """Read fractional coordinates from a .castep file."""
448 species: List[str] = []
449 custom_species: Optional[List[str]] = None # A CASTEP special thing
450 positions_frac: List[List[float]] = []
451 for line in out:
452 fields = line.split()
453 if len(fields) == 7:
454 break
455 for _ in range(n_atoms):
456 spec_custom = fields[1].split(':', 1)
457 elem = spec_custom[0]
458 if len(spec_custom) > 1 and custom_species is None:
459 # Add it to the custom info!
460 custom_species = list(species)
461 species.append(elem)
462 if custom_species is not None:
463 custom_species.append(fields[1])
464 positions_frac.append([float(s) for s in fields[3:6]])
465 line = out.readline()
466 fields = line.split()
467 return species, custom_species, positions_frac
470def _read_stress(out: io.TextIOBase):
471 """Read a block for the stress tensor from a .castep file."""
472 for line in out:
473 fields = line.split()
474 if len(fields) == 6:
475 break
476 results = {}
477 stress = []
478 for _ in range(3):
479 stress.append([float(s) for s in fields[2:5]])
480 line = out.readline()
481 fields = line.split()
482 # stress in .castep file is given in GPa
483 results['stress'] = np.array(stress) * units.GPa
484 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]
485 line = out.readline()
486 if "Pressure:" in line:
487 results['pressure'] = float(
488 line.split()[-2]) * units.GPa # type: ignore[assignment]
489 return results
492def _add_atoms(
493 images,
494 lattice_real,
495 species,
496 custom_species,
497 positions_frac,
498 constraints,
499 results,
500):
501 # If all the lattice parameters are fixed,
502 # the `Unit Cell` block in the .castep file is not printed
503 # in every ionic step.
504 # The lattice paramters are therefore taken from the last step.
505 # This happens:
506 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`
507 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`
508 if lattice_real is None:
509 lattice_real = images[-1].cell.copy()
511 # for highly symmetric systems (where essentially only the
512 # stress is optimized, but the atomic positions) positions
513 # are only printed once.
514 if species is None:
515 species = images[-1].symbols
516 if positions_frac is None:
517 positions_frac = images[-1].get_scaled_positions()
519 _set_energy_and_free_energy(results)
521 atoms = Atoms(
522 species,
523 cell=lattice_real,
524 constraint=constraints,
525 pbc=True,
526 scaled_positions=positions_frac,
527 )
528 if custom_species is not None:
529 atoms.new_array(
530 'castep_custom_species',
531 np.array(custom_species),
532 )
534 atoms.calc = SinglePointCalculator(atoms)
535 atoms.calc.results = results
537 images.append(atoms)
540def _read_mulliken_charges(out: io.TextIOBase):
541 """Read a block for Mulliken charges from a .castep file."""
542 for i in range(3):
543 line = out.readline()
544 if i == 1:
545 spin_polarized = 'Spin' in line
546 results = defaultdict(list)
547 for line in out:
548 fields = line.split()
549 if len(fields) == 1:
550 break
551 if spin_polarized:
552 if len(fields) != 7: # due to CASTEP 18 outformat changes
553 results['charges'].append(float(fields[-2]))
554 results['magmoms'].append(float(fields[-1]))
555 else:
556 results['charges'].append(float(fields[-1]))
557 return {k: np.array(v) for k, v in results.items()}
560def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):
561 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""
562 results = defaultdict(list)
563 for _ in range(n_atoms):
564 for line in out:
565 if line.strip() == '':
566 break # end for each atom
567 if 'Hirshfeld / free atomic volume :' in line:
568 line = out.readline()
569 fields = line.split()
570 results['hirshfeld_volume_ratios'].append(float(fields[0]))
571 return {k: np.array(v) for k, v in results.items()}
574def _read_hirshfeld_charges(out: io.TextIOBase):
575 """Read a block for Hirshfeld charges from a .castep file."""
576 for i in range(3):
577 line = out.readline()
578 if i == 1:
579 spin_polarized = 'Spin' in line
580 results = defaultdict(list)
581 for line in out:
582 fields = line.split()
583 if len(fields) == 1:
584 break
585 if spin_polarized:
586 results['hirshfeld_charges'].append(float(fields[-2]))
587 results['hirshfeld_magmoms'].append(float(fields[-1]))
588 else:
589 results['hirshfeld_charges'].append(float(fields[-1]))
590 return {k: np.array(v) for k, v in results.items()}
593def _set_energy_and_free_energy(results: Dict[str, Any]):
594 """Set values referred to as `energy` and `free_energy`."""
595 if 'energy_with_dispersion_correction' in results:
596 suffix = '_with_dispersion_correction'
597 else:
598 suffix = '_without_dispersion_correction'
600 if 'free_energy' + suffix in results: # metallic
601 keye = 'energy_zero' + suffix
602 keyf = 'free_energy' + suffix
603 else: # non-metallic
604 # The finite basis set correction is applied to the energy at finite T
605 # (not the energy at 0 K). We should hence refer to the corrected value
606 # as `energy` only when the free energy is unavailable, i.e., only when
607 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.
608 if 'energy_with_finite_basis_set_correction' in results:
609 keye = 'energy_with_finite_basis_set_correction'
610 else:
611 keye = 'energy' + suffix
612 keyf = 'energy' + suffix
614 results['energy'] = results[keye]
615 results['free_energy'] = results[keyf]