Coverage for /builds/debichem-team/python-ase/ase/io/onetep.py: 82.72%

567 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1import re 

2import warnings 

3from copy import deepcopy 

4from os.path import dirname, isfile 

5from pathlib import Path 

6 

7import numpy as np 

8 

9from ase.atoms import Atoms 

10from ase.calculators.singlepoint import SinglePointDFTCalculator 

11from ase.cell import Cell 

12from ase.units import Bohr 

13 

14no_positions_error = ( 

15 "no positions can be read from this onetep output " 

16 "if you wish to use ASE to read onetep outputs " 

17 "please use uppercase block positions in your calculations" 

18) 

19 

20unable_to_read = "unable to read this onetep output file, ending" 

21 

22# taken from onetep source code, 

23# does not seem to be from any known NIST data 

24units = {"Hartree": 27.2116529, "Bohr": 1 / 1.889726134583548707935} 

25 

26# Want to add a functionality? add a global constant below 

27ONETEP_START = re.compile( 

28 r"(?i)^\s*\|\s*Linear-Scaling\s*Ab\s*" 

29 r"Initio\s*Total\s*Energy\s*Program\s*\|\s*$" 

30) 

31ONETEP_STOP = re.compile(r"(?i)^\s*-+\s*TIMING\s*INFORMATION\s*-+\s*$") 

32ONETEP_TOTAL_ENERGY = re.compile( 

33 r"(?i)^\s*\|\s*\*{3}\s*NGWF\s*" r"optimisation\s*converged\s*\*{3}\s*\|\s*$" 

34) 

35ONETEP_FORCE = re.compile(r"(?i)^\s*\*+\s*Forces\s*\*+\s*$") 

36ONETEP_MULLIKEN = re.compile(r"(?i)^\s*Mulliken\s*Atomic\s*Populations\s*$") 

37ONETEP_SPIN = re.compile(r"(?i)^\s*Down\s*spin\s*density") 

38ONETEP_POSITION = re.compile(r"(?i)^\s*Cell\s*Contents\s*$") 

39ONETEP_FIRST_POSITION = re.compile( 

40 r"^\s*%BLOCK\s*POSITIONS\s*_?\s*(ABS|FRAC)\s*:?\s*([*#!].*)?$" 

41) 

42ONETEP_WRONG_FIRST_POSITION = re.compile( 

43 r"^\s*%block\s*positions\s*_?\s*(abs|frac)\s*:?\s*([*#!].*)?$" 

44) 

45ONETEP_RESUMING_GEOM = re.compile( 

46 r"(?i)^\s*<{16}\s*Resuming\s*previous" 

47 r"\s*ONETEP\s*Geometry\s*Optimisation\s*>{16}\s*$" 

48) 

49 

50ONETEP_ATOM_COUNT = re.compile(r"(?i)^\s*Totals\s*:\s*(\d+\s*)*$") 

51ONETEP_IBFGS_ITER = re.compile(r"(?i)^\s*BFGS\s*:\s*starting\s*iteration") 

52ONETEP_IBFGS_IMPROVE = re.compile(r"(?i)^\s*BFGS\s*:\s*improving\s*iteration") 

53ONETEP_START_GEOM = re.compile( 

54 r"(?i)^<+\s*Starting\s*ONETEP\s*Geometry\s*Optimisation\s*>+$" 

55) 

56ONETEP_END_GEOM = re.compile(r"(?i)^\s*BFGS\s*:\s*Final\s*Configuration:\s*$") 

57 

58ONETEP_SPECIES = re.compile(r"(?i)^\s*%BLOCK\s*SPECIES\s*:?\s*([*#!].*)?$") 

59 

60ONETEP_FIRST_CELL = re.compile( 

61 r"(?i)^\s*%BLOCK\s*LATTICE\s*_?\s*CART\s*:?\s*([*#!].*)?$" 

62) 

63ONETEP_STRESS_CELL = re.compile( 

64 r"(?i)^\s*stress_calculation:\s*cell\s*geometry\s*$" 

65) 

66 

67 

68def get_onetep_keywords(path): 

69 if isinstance(path, str): 

70 with open(path) as fd: 

71 results = read_onetep_in(fd, only_keywords=True) 

72 else: 

73 results = read_onetep_in(path, only_keywords=True) 

74 

75 # If there is an include file, the entire 

76 # file keyword's will be included in the dict 

77 # and the include_file keyword will be deleted 

78 if "include_file" in results["keywords"]: 

79 warnings.warn("include_file will be deleted from the dict") 

80 del results["keywords"]["include_file"] 

81 return results["keywords"] 

82 

83 

84def read_onetep_in(fd, **kwargs): 

85 """ 

86 Read a single ONETEP input. 

87 

88 This function can be used to visually check ONETEP inputs, 

89 using the ase gui. It can also be used to get access to 

90 the input parameters attached to the ONETEP calculator 

91 returned 

92 

93 The function should work on inputs which contain 

94 'include_file' command(s), (possibly recursively 

95 but untested) 

96 

97 The function should work on input which contains 

98 exotic element(s) name(s) if the specie block is 

99 present to map them back to real element(s) 

100 

101 Parameters 

102 ---------- 

103 fd : io-object 

104 File to read. 

105 

106 Return 

107 ------ 

108 structure: Atoms 

109 Atoms object with cell and a Onetep calculator 

110 attached which contains the keywords dictionary 

111 """ 

112 

113 fdi_lines = fd.readlines() 

114 

115 try: 

116 fd_path = Path(fd.name).resolve() 

117 fd_parent = fd_path.parent 

118 include_files = [fd_path] 

119 except AttributeError: 

120 # We are in a StringIO or something similar 

121 fd_path = Path().cwd() 

122 fd_parent = fd_path 

123 include_files = [Path().cwd()] 

124 

125 def clean_lines(lines): 

126 """ 

127 Remove indesirable line from the input 

128 """ 

129 new_lines = [] 

130 for line in lines: 

131 sep = re.split(r"[!#]", line.strip())[0] 

132 if sep: 

133 new_lines.append(sep) 

134 return new_lines 

135 

136 # Skip comments and empty lines 

137 fdi_lines = clean_lines(fdi_lines) 

138 

139 # Are we in a block? 

140 block_start = 0 

141 

142 keywords = {} 

143 atoms = Atoms() 

144 cell = np.zeros((3, 3)) 

145 fractional = False 

146 positions = False 

147 symbols = False 

148 

149 # Main loop reading the input 

150 for n, line in enumerate(fdi_lines): 

151 line_lower = line.lower() 

152 if re.search(r"^\s*%block", line_lower): 

153 block_start = n + 1 

154 if re.search(r"lattice_cart$", line_lower): 

155 if re.search(r"^\s*ang\s*$", fdi_lines[block_start]): 

156 cell = np.loadtxt(fdi_lines[n + 2: n + 5]) 

157 else: 

158 cell = np.loadtxt(fdi_lines[n + 1: n + 4]) 

159 cell *= Bohr 

160 

161 if not block_start: 

162 if "devel_code" in line_lower: 

163 warnings.warn("devel_code is not supported") 

164 continue 

165 # Splits line on any valid onetep separator 

166 sep = re.split(r"[:=\s]+", line) 

167 keywords[sep[0]] = " ".join(sep[1:]) 

168 # If include_file is used, we open the included file 

169 # and insert it in the current fdi_lines... 

170 # ONETEP does not work with cascade 

171 # and this SHOULD NOT work with cascade 

172 if re.search(r"^\s*include_file$", sep[0]): 

173 name = sep[1].replace("'", "") 

174 name = name.replace('"', "") 

175 new_path = fd_parent / name 

176 for path in include_files: 

177 if new_path.samefile(path): 

178 raise ValueError("invalid/recursive include_file") 

179 new_fd = open(new_path) 

180 new_lines = new_fd.readlines() 

181 new_lines = clean_lines(new_lines) 

182 for include_line in new_lines: 

183 sep = re.split(r"[:=\s]+", include_line) 

184 if re.search(r"^\s*include_file$", sep[0]): 

185 raise ValueError("nested include_file") 

186 fdi_lines[:] = ( 

187 fdi_lines[: n + 1] + new_lines + fdi_lines[n + 1:] 

188 ) 

189 include_files.append(new_path) 

190 continue 

191 

192 if re.search(r"^\s*%endblock", line_lower): 

193 if re.search(r"\s*positions_", line_lower): 

194 head = re.search(r"(?i)^\s*(\S*)\s*$", fdi_lines[block_start]) 

195 head = head.group(1).lower() if head else "" 

196 conv = 1 if head == "ang" else units["Bohr"] 

197 # Skip one line if head is True 

198 to_read = fdi_lines[block_start + int(bool(head)): n] 

199 positions = np.loadtxt(to_read, usecols=(1, 2, 3)) 

200 positions *= conv 

201 symbols = np.loadtxt(to_read, usecols=(0), dtype="str") 

202 if re.search(r".*frac$", line_lower): 

203 fractional = True 

204 elif re.search(r"^\s*%endblock\s*species$", line_lower): 

205 els = fdi_lines[block_start:n] 

206 species = {} 

207 for el in els: 

208 sep = el.split() 

209 species[sep[0]] = sep[1] 

210 to_read = [i.strip() for i in fdi_lines[block_start:n]] 

211 keywords["species"] = to_read 

212 elif re.search(r"lattice_cart$", line_lower): 

213 pass 

214 else: 

215 to_read = [i.strip() for i in fdi_lines[block_start:n]] 

216 block_title = line_lower.replace("%endblock", "").strip() 

217 keywords[block_title] = to_read 

218 block_start = 0 

219 

220 # We don't need a fully valid onetep 

221 # input to read the keywords, just 

222 # the keywords 

223 if kwargs.get("only_keywords", False): 

224 return {"keywords": keywords} 

225 # Necessary if we have only one atom 

226 # Check if the cell is valid (3D) 

227 if not cell.any(axis=1).all(): 

228 raise ValueError("invalid cell specified") 

229 

230 if positions is False: 

231 raise ValueError("invalid position specified") 

232 

233 if symbols is False: 

234 raise ValueError("no symbols found") 

235 

236 positions = positions.reshape(-1, 3) 

237 symbols = symbols.reshape(-1) 

238 tags = [] 

239 info = {"onetep_species": []} 

240 for symbol in symbols: 

241 label = symbol.replace(species[symbol], "") 

242 if label.isdigit(): 

243 tags.append(int(label)) 

244 else: 

245 tags.append(0) 

246 info["onetep_species"].append(symbol) 

247 atoms = Atoms( 

248 [species[i] for i in symbols], cell=cell, pbc=True, tags=tags, info=info 

249 ) 

250 if fractional: 

251 atoms.set_scaled_positions(positions / units["Bohr"]) 

252 else: 

253 atoms.set_positions(positions) 

254 results = {"atoms": atoms, "keywords": keywords} 

255 return results 

256 

257 

258def write_onetep_in( 

259 fd, 

260 atoms, 

261 edft=False, 

262 xc="PBE", 

263 ngwf_count=-1, 

264 ngwf_radius=9.0, 

265 keywords={}, 

266 pseudopotentials={}, 

267 pseudo_path=".", 

268 pseudo_suffix=None, 

269 **kwargs 

270): 

271 """ 

272 Write a single ONETEP input. 

273 

274 This function will be used by ASE to perform 

275 various workflows (Opt, NEB...) or can be used 

276 manually to quickly create ONETEP input file(s). 

277 

278 The function will first write keywords in 

279 alphabetic order in lowercase. Secondly, blocks 

280 will be written in alphabetic order in uppercase. 

281 

282 Two ways to work with the function: 

283 

284 - By providing only (simple) keywords present in 

285 the parameters. ngwf_count and ngwf_radius 

286 accept multiple types as described in the Parameters 

287 section. 

288 

289 - If the keywords parameters is provided as a dictionary 

290 these keywords will be used to write the input file and 

291 will take priority. 

292 

293 If no pseudopotentials are provided in the parameters and 

294 the function will try to look for suitable pseudopotential 

295 in the pseudo_path. 

296 

297 Parameters 

298 ---------- 

299 fd : file 

300 File to write. 

301 atoms: Atoms 

302 Atoms including Cell object to write. 

303 edft: Bool 

304 Activate EDFT. 

305 xc: str 

306 DFT xc to use e.g (PBE, RPBE, ...) 

307 ngwf_count: int|list|dict 

308 Behaviour depends on the type: 

309 int: every species will have this amount 

310 of ngwfs. 

311 list: list of int, will be attributed 

312 alphabetically to species: 

313 dict: keys are species name(s), 

314 value are their number: 

315 ngwf_radius: int|list|dict 

316 Behaviour depends on the type: 

317 float: every species will have this radius. 

318 list: list of float, will be attributed 

319 alphabetically to species: 

320 [10.0, 9.0] 

321 dict: keys are species name(s), 

322 value are their radius: 

323 {'Na': 9.0, 'Cl': 10.0} 

324 keywords: dict 

325 Dictionary with ONETEP keywords to write, 

326 keywords with lists as values will be 

327 treated like blocks, with each element 

328 of list being a different line. 

329 pseudopotentials: dict 

330 Behaviour depends on the type: 

331 keys are species name(s) their 

332 value are the pseudopotential file to use: 

333 {'Na': 'Na.usp', 'Cl': 'Cl.usp'} 

334 pseudo_path: str 

335 Where to look for pseudopotential, correspond 

336 to the pseudo_path keyword of ONETEP. 

337 pseudo_suffix: str 

338 Suffix for the pseudopotential filename 

339 to look for, useful if you have multiple sets of 

340 pseudopotentials in pseudo_path. 

341 """ 

342 

343 label = kwargs.get("label", "onetep") 

344 try: 

345 directory = kwargs.get("directory", Path(dirname(fd.name))) 

346 except AttributeError: 

347 directory = "." 

348 autorestart = kwargs.get("autorestart", False) 

349 elements = np.array(atoms.symbols) 

350 tags = np.array(atoms.get_tags()) 

351 species_maybe = atoms.info.get("onetep_species", False) 

352 #  We look if the atom.info contains onetep species information 

353 # If it does, we use it, as it might contains character 

354 #  which are not allowed in ase tags, if not we fall back 

355 # to tags and use them instead. 

356 if species_maybe: 

357 if set(species_maybe) != set(elements): 

358 species = np.array(species_maybe) 

359 else: 

360 species = elements 

361 else: 

362 formatted_tags = np.array(["" if i == 0 else str(i) for i in tags]) 

363 species = np.char.add(elements, formatted_tags) 

364 numbers = np.array(atoms.numbers) 

365 tmp = np.argsort(species) 

366 # We sort both Z and name the same 

367 numbers = np.take_along_axis(numbers, tmp, axis=0) 

368 # u_elements = np.take_along_axis(elements, tmp, axis=0) 

369 u_species = np.take_along_axis(species, tmp, axis=0) 

370 elements = np.take_along_axis(elements, tmp, axis=0) 

371 # We want to keep unique but without sort: small trick with index 

372 idx = np.unique(u_species, return_index=True)[1] 

373 elements = elements[idx] 

374 # Unique species 

375 u_species = u_species[idx] 

376 numbers = numbers[idx] 

377 n_sp = len(u_species) 

378 

379 if isinstance(ngwf_count, int): 

380 ngwf_count = dict(zip(u_species, [ngwf_count] * n_sp)) 

381 elif isinstance(ngwf_count, list): 

382 ngwf_count = dict(zip(u_species, ngwf_count)) 

383 elif isinstance(ngwf_count, dict): 

384 pass 

385 else: 

386 raise TypeError("ngwf_count can only be int|list|dict") 

387 

388 if isinstance(ngwf_radius, float): 

389 ngwf_radius = dict(zip(u_species, [ngwf_radius] * n_sp)) 

390 elif isinstance(ngwf_radius, list): 

391 ngwf_radius = dict(zip(u_species, ngwf_radius)) 

392 elif isinstance(ngwf_radius, dict): 

393 pass 

394 else: 

395 raise TypeError("ngwf_radius can only be float|list|dict") 

396 

397 pp_files = re.sub("'|\"", "", keywords.get("pseudo_path", pseudo_path)) 

398 pp_files = Path(pp_files).glob("*") 

399 pp_files = [i for i in pp_files if i.is_file()] 

400 

401 if pseudo_suffix: 

402 common_suffix = [pseudo_suffix] 

403 else: 

404 common_suffix = [".usp", ".recpot", ".upf", ".paw", ".psp", ".pspnc"] 

405 

406 if keywords.get("species_pot", False): 

407 pp_list = keywords["species_pot"] 

408 elif isinstance(pseudopotentials, dict): 

409 pp_list = [] 

410 for idx, el in enumerate(u_species): 

411 if el in pseudopotentials: 

412 pp_list.append(f"{el} {pseudopotentials[el]}") 

413 else: 

414 for i in pp_files: 

415 reg_el_candidate = re.split(r"[-_.:= ]+", i.stem)[0] 

416 if ( 

417 elements[idx] == reg_el_candidate.title() 

418 and i.suffix.lower() in common_suffix 

419 ): 

420 pp_list.append(f"{el} {i.name}") 

421 else: 

422 raise TypeError("pseudopotentials object can only be dict") 

423 

424 default_species = [] 

425 for idx, el in enumerate(u_species): 

426 tmp = "" 

427 tmp += u_species[idx] + " " + elements[idx] + " " 

428 tmp += str(numbers[idx]) + " " 

429 try: 

430 tmp += str(ngwf_count[el]) + " " 

431 except KeyError: 

432 tmp += str(ngwf_count[elements[idx]]) + " " 

433 try: 

434 tmp += str(ngwf_radius[el]) 

435 except KeyError: 

436 tmp += str(ngwf_radius[elements[idx]]) 

437 default_species.append(tmp) 

438 

439 positions_abs = ["ang"] 

440 for s, p in zip(species, atoms.get_positions()): 

441 line = "{s:>5} {0:>12.6f} {1:>12.6f} {2:>12.6f}".format(s=s, *p) 

442 positions_abs.append(line) 

443 

444 lattice_cart = ["ang"] 

445 for axis in atoms.get_cell(): 

446 line = "{:>16.8f} {:>16.8f} {:>16.8f}".format(*axis) 

447 lattice_cart.append(line) 

448 

449 # Default keywords if not provided by the user, 

450 # most of them are ONETEP default, except write_forces 

451 # which is always turned on. 

452 default_keywords = { 

453 "xc_functional": xc, 

454 "edft": edft, 

455 "cutoff_energy": 20, 

456 "paw": False, 

457 "task": "singlepoint", 

458 "output_detail": "normal", 

459 "species": default_species, 

460 "pseudo_path": pseudo_path, 

461 "species_pot": pp_list, 

462 "positions_abs": positions_abs, 

463 "lattice_cart": lattice_cart, 

464 "write_forces": True, 

465 "forces_output_detail": "verbose", 

466 } 

467 

468 # Main loop, fill the keyword dictionary 

469 keywords = {key.lower(): value for key, value in keywords.items()} 

470 for value in default_keywords: 

471 if not keywords.get(value, None): 

472 keywords[value] = default_keywords[value] 

473 

474 # No pseudopotential provided, we look for them in pseudo_path 

475 # If autorestart is True, we look for restart files, 

476 # and turn on relevant keywords... 

477 if autorestart: 

478 keywords["read_denskern"] = isfile(directory / (label + ".dkn")) 

479 keywords["read_tightbox_ngwfs"] = isfile( 

480 directory / (label + ".tightbox_ngwfs") 

481 ) 

482 keywords["read_hamiltonian"] = isfile(directory / (label + ".ham")) 

483 

484 # If not EDFT, hamiltonian is irrelevant. 

485 # print(keywords.get('edft', False)) 

486 # keywords['read_hamiltonian'] = \ 

487 # keywords.get('read_hamiltonian', False) & keywords.get('edft', False) 

488 

489 keywords = dict(sorted(keywords.items())) 

490 

491 lines = [] 

492 block_lines = [] 

493 

494 for key, value in keywords.items(): 

495 if isinstance(value, (list, np.ndarray)): 

496 if not all(isinstance(_, str) for _ in value): 

497 raise TypeError("list values for blocks must be strings only") 

498 block_lines.append(("\n%block " + key).upper()) 

499 block_lines.extend(value) 

500 block_lines.append(("%endblock " + key).upper()) 

501 elif isinstance(value, bool): 

502 lines.append(str(key) + " : " + str(value)[0]) 

503 elif isinstance(value, (str, int, float)): 

504 lines.append(str(key) + " : " + str(value)) 

505 else: 

506 raise TypeError("keyword values must be list|str|bool") 

507 input_header = ( 

508 "!" 

509 + "-" * 78 

510 + "!\n" 

511 + "!" 

512 + "-" * 33 

513 + " INPUT FILE " 

514 + "-" * 33 

515 + "!\n" 

516 + "!" 

517 + "-" * 78 

518 + "!\n\n" 

519 ) 

520 

521 input_footer = ( 

522 "\n!" 

523 + "-" * 78 

524 + "!\n" 

525 + "!" 

526 + "-" * 32 

527 + " END OF INPUT " 

528 + "-" * 32 

529 + "!\n" 

530 + "!" 

531 + "-" * 78 

532 + "!" 

533 ) 

534 

535 fd.write(input_header) 

536 fd.writelines(line + "\n" for line in lines) 

537 fd.writelines(b_line + "\n" for b_line in block_lines) 

538 

539 if "devel_code" in kwargs: 

540 warnings.warn("writing devel code as it is, at the end of the file") 

541 fd.writelines("\n" + line for line in kwargs["devel_code"]) 

542 

543 fd.write(input_footer) 

544 

545 

546def read_onetep_out(fd, index=-1, improving=False, **kwargs): 

547 """ 

548 Read ONETEP output(s). 

549 

550 !!! 

551 This function will be used by ASE when performing 

552 various workflows (Opt, NEB...) 

553 !!! 

554 

555 Parameters 

556 ---------- 

557 fd : file 

558 File to read. 

559 index: slice 

560 Which atomic configuration to read 

561 improving: Bool 

562 If the output is a geometry optimisation, 

563 improving = True will keep line search 

564 configuration from BFGS 

565 

566 Yields 

567 ------ 

568 structure: Atoms|list of Atoms 

569 """ 

570 # Put everything in memory 

571 fdo_lines = fd.readlines() 

572 n_lines = len(fdo_lines) 

573 

574 freg = re.compile(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+\-]?\d+)?") 

575 

576 # Used to store index of important elements 

577 output = { 

578 ONETEP_START: [], 

579 ONETEP_STOP: [], 

580 ONETEP_TOTAL_ENERGY: [], 

581 ONETEP_FORCE: [], 

582 ONETEP_SPIN: [], 

583 ONETEP_MULLIKEN: [], 

584 ONETEP_POSITION: [], 

585 ONETEP_FIRST_POSITION: [], 

586 ONETEP_WRONG_FIRST_POSITION: [], 

587 ONETEP_ATOM_COUNT: [], 

588 ONETEP_IBFGS_IMPROVE: [], 

589 ONETEP_IBFGS_ITER: [], 

590 ONETEP_START_GEOM: [], 

591 ONETEP_RESUMING_GEOM: [], 

592 ONETEP_END_GEOM: [], 

593 ONETEP_SPECIES: [], 

594 ONETEP_FIRST_CELL: [], 

595 ONETEP_STRESS_CELL: [], 

596 } 

597 

598 # Index will be treated to get rid of duplicate or improving iterations 

599 output_corr = deepcopy(output) 

600 

601 # Core properties that will be used in Yield 

602 properties = [ 

603 ONETEP_TOTAL_ENERGY, 

604 ONETEP_FORCE, 

605 ONETEP_MULLIKEN, 

606 ONETEP_FIRST_CELL, 

607 ] 

608 

609 # Find all matches append them to the dictionary 

610 breg = "|".join([i.pattern.replace("(?i)", "") for i in output.keys()]) 

611 prematch = {} 

612 

613 for idx, line in enumerate(fdo_lines): 

614 matches = re.search(breg, line) 

615 if matches: 

616 prematch[idx] = matches.group(0) 

617 

618 for key, value in prematch.items(): 

619 for reg in output.keys(): 

620 if re.search(reg, value): 

621 output[reg].append(key) 

622 break 

623 

624 output = {key: np.array(value) for key, value in output.items()} 

625 # Conveniance notation (pointers: no overhead, no additional memory) 

626 ibfgs_iter = np.hstack((output[ONETEP_IBFGS_ITER], output[ONETEP_END_GEOM])) 

627 ibfgs_start = output[ONETEP_START_GEOM] 

628 ibfgs_improve = output[ONETEP_IBFGS_IMPROVE] 

629 ibfgs_resume = output[ONETEP_RESUMING_GEOM] 

630 

631 onetep_start = output[ONETEP_START] 

632 onetep_stop = output[ONETEP_STOP] 

633 

634 bfgs_keywords = np.hstack((ibfgs_improve, ibfgs_resume, ibfgs_iter)) 

635 bfgs_keywords = np.sort(bfgs_keywords) 

636 

637 core_keywords = np.hstack( 

638 ( 

639 ibfgs_iter, 

640 ibfgs_start, 

641 ibfgs_improve, 

642 ibfgs_resume, 

643 ibfgs_iter, 

644 onetep_start, 

645 onetep_stop, 

646 ) 

647 ) 

648 core_keywords = np.sort(core_keywords) 

649 

650 i_first_positions = output[ONETEP_FIRST_POSITION] 

651 is_frac_positions = [i for i in i_first_positions if "FRAC" in fdo_lines[i]] 

652 

653 # In onetep species can have arbritary names, 

654 # We want to map them to real element names 

655 # Via the species block 

656 # species = np.concatenate((output[ONETEP_SPECIES], 

657 # output[ONETEP_SPECIESL])).astype(np.int32) 

658 species = output[ONETEP_SPECIES] 

659 

660 icells = np.hstack((output[ONETEP_FIRST_CELL], output[ONETEP_STRESS_CELL])) 

661 icells = icells.astype(np.int32) 

662 # Using the fact that 0 == False and > 0 == True 

663 has_bfgs = ( 

664 len(ibfgs_iter) 

665 + len(output[ONETEP_START_GEOM]) 

666 + len(output[ONETEP_RESUMING_GEOM]) 

667 ) 

668 

669 # When the input block position is written in lowercase 

670 # ONETEP does not print the initial position but a hash 

671 # of it, might be needed 

672 has_hash = len(output[ONETEP_WRONG_FIRST_POSITION]) 

673 

674 def is_in_bfgs(idx): 

675 """ 

676 Check if a given index is in a BFGS block 

677 """ 

678 for past, future in zip( 

679 output[ONETEP_START], 

680 np.hstack((output[ONETEP_START][1:], [n_lines])), 

681 ): 

682 if past < idx < future: 

683 if np.any( 

684 (past < ibfgs_start) & (ibfgs_start < future) 

685 ) or np.any((past < ibfgs_resume) & (ibfgs_resume < future)): 

686 return True 

687 return False 

688 

689 def where_in_bfgs(idx): 

690 for past, future in zip( 

691 core_keywords, np.hstack((core_keywords[1:], [n_lines])) 

692 ): 

693 if past < idx < future: 

694 if past in onetep_start: 

695 if future in ibfgs_start or future in ibfgs_resume: 

696 return "resume" 

697 continue 

698 # Are we in start or resume or improve 

699 if past in ibfgs_start: 

700 return "start" 

701 elif past in ibfgs_resume: 

702 return "resume" 

703 elif past in ibfgs_improve: 

704 return "improve" 

705 

706 return False 

707 

708 ipositions = np.hstack((output[ONETEP_POSITION], i_first_positions)).astype( 

709 np.int32 

710 ) 

711 ipositions = np.sort(ipositions) 

712 

713 n_pos = len(ipositions) 

714 

715 # Some ONETEP files will not have any positions 

716 # due to how the software is coded. As a last 

717 # resort we look for a geom file with the same label. 

718 

719 if n_pos == 0: 

720 if has_hash: 

721 raise RuntimeError(no_positions_error) 

722 raise RuntimeError(unable_to_read) 

723 

724 to_del = [] 

725 

726 # Important loop which: 

727 # - Get rid of improving BFGS iteration if improving == False 

728 # - Append None to properties to make sure each properties will 

729 # have the same length and each index correspond to the right 

730 # atomic configuration (hopefully). 

731 # Past is the index of the current atomic conf, future is the 

732 # index of the next one. 

733 

734 for idx, (past, future) in enumerate( 

735 zip(ipositions, np.hstack((ipositions[1:], [n_lines]))) 

736 ): 

737 if has_bfgs: 

738 which_bfgs = where_in_bfgs(past) 

739 

740 if which_bfgs == "resume": 

741 to_del.append(idx) 

742 continue 

743 

744 if not improving: 

745 if which_bfgs == "improve": 

746 to_del.append(idx) 

747 continue 

748 

749 # We append None if no properties in contained for 

750 # one specific atomic configurations. 

751 for prop in properties: 

752 (tmp,) = np.where((past < output[prop]) & (output[prop] <= future)) 

753 if len(tmp) == 0: 

754 output_corr[prop].append(None) 

755 else: 

756 output_corr[prop].extend(output[prop][tmp[:1]]) 

757 

758 if to_del and len(to_del) != n_pos: 

759 new_indices = np.setdiff1d(np.arange(n_pos), to_del) 

760 ipositions = ipositions[new_indices] 

761 

762 # Bunch of methods to grep properties from output. 

763 def parse_cell(idx): 

764 a, b, c = np.loadtxt([fdo_lines[idx + 2]]) * units["Bohr"] 

765 al, be, ga = np.loadtxt([fdo_lines[idx + 4]]) 

766 cell = Cell.fromcellpar([a, b, c, al, be, ga]) 

767 return np.array(cell) 

768 

769 def parse_charge(idx): 

770 n = 0 

771 offset = 4 

772 while idx + n < len(fdo_lines): 

773 if not fdo_lines[idx + n].strip(): 

774 tmp_charges = np.loadtxt( 

775 fdo_lines[idx + offset: idx + n - 1], usecols=3 

776 ) 

777 return np.reshape(tmp_charges, -1) 

778 n += 1 

779 return None 

780 

781 #  In ONETEP there is no way to differentiate electronic entropy 

782 #  and entropy due to solvent, therefore there is no way to 

783 # extrapolate the energy at 0 K. We return the last energy 

784 #  instead. 

785 

786 def parse_energy(idx): 

787 n = 0 

788 while idx + n < len(fdo_lines): 

789 if re.search(r"^\s*\|\s*Total\s*:.*\|\s*$", fdo_lines[idx + n]): 

790 energy_str = re.search(freg, fdo_lines[idx + n]).group(0) 

791 return float(energy_str) * units["Hartree"] 

792 n += 1 

793 return None 

794 

795 def parse_fermi_level(idx): 

796 n = 0 

797 fermi_levels = None 

798 while idx + n < len(fdo_lines): 

799 if "Fermi_level" in fdo_lines[idx + n]: 

800 tmp = "\n".join(fdo_lines[idx + n: idx + n + 1]) 

801 fermi_level = re.findall(freg, tmp) 

802 fermi_levels = [ 

803 float(i) * units["Hartree"] for i in fermi_level 

804 ] 

805 if re.search( 

806 r"^\s*<{5}\s*CALCULATION\s*SUMMARY\s*>{5}\s*$", 

807 fdo_lines[idx + n], 

808 ): 

809 return fermi_levels 

810 n += 1 

811 return None 

812 

813 def parse_first_cell(idx): 

814 n = 0 

815 offset = 1 

816 while idx + n < len(fdo_lines): 

817 if re.search( 

818 r'(?i)^\s*"?\s*ang\s*"?\s*([*#!].*)?$', fdo_lines[idx + n] 

819 ): 

820 offset += 1 

821 if re.search( 

822 r"(?i)^\s*%ENDBLOCK\s*LATTICE" 

823 r"\s*_?\s*CART\s*:?\s*([*#!].*)?$", 

824 fdo_lines[idx + n], 

825 ): 

826 cell = np.loadtxt(fdo_lines[idx + offset: idx + n]) 

827 return cell if offset == 2 else cell * units["Bohr"] 

828 n += 1 

829 return None 

830 

831 def parse_first_positions(idx): 

832 n = 0 

833 offset = 1 

834 while idx + n < len(fdo_lines): 

835 if re.search( 

836 r'(?i)^\s*"?\s*ang\s*"?\s*([*#!].*)?$', fdo_lines[idx + n] 

837 ): 

838 offset += 1 

839 if re.search(r"^\s*%ENDBLOCK\s*POSITIONS_", fdo_lines[idx + n]): 

840 if "FRAC" in fdo_lines[idx + n]: 

841 conv_factor = 1 

842 else: 

843 conv_factor = units["Bohr"] 

844 tmp = np.loadtxt( 

845 fdo_lines[idx + offset: idx + n], dtype="str" 

846 ).reshape(-1, 4) 

847 els = np.char.array(tmp[:, 0]) 

848 if offset == 2: 

849 pos = tmp[:, 1:].astype(np.float64) 

850 else: 

851 pos = tmp[:, 1:].astype(np.float64) * conv_factor 

852 try: 

853 atoms = Atoms(els, pos) 

854 # ASE doesn't recognize names used in ONETEP 

855 # as chemical symbol: dig deeper 

856 except KeyError: 

857 tags, real_elements = find_correct_species( 

858 els, idx, first=True 

859 ) 

860 atoms = Atoms(real_elements, pos) 

861 atoms.set_tags(tags) 

862 atoms.info["onetep_species"] = list(els) 

863 return atoms 

864 n += 1 

865 return None 

866 

867 def parse_force(idx): 

868 n = 0 

869 while idx + n < len(fdo_lines): 

870 if re.search(r"(?i)^\s*\*\s*TOTAL:.*\*\s*$", fdo_lines[idx + n]): 

871 tmp = np.loadtxt( 

872 fdo_lines[idx + 6: idx + n - 2], 

873 dtype=np.float64, 

874 usecols=(3, 4, 5), 

875 ) 

876 return tmp * units["Hartree"] / units["Bohr"] 

877 n += 1 

878 return None 

879 

880 def parse_positions(idx): 

881 n = 0 

882 offset = 7 

883 stop = 0 

884 while idx + n < len(fdo_lines): 

885 if re.search(r"^\s*x{60,}\s*$", fdo_lines[idx + n]): 

886 stop += 1 

887 if stop == 2: 

888 tmp = np.loadtxt( 

889 fdo_lines[idx + offset: idx + n], 

890 dtype="str", 

891 usecols=(1, 3, 4, 5), 

892 ) 

893 els = np.char.array(tmp[:, 0]) 

894 pos = tmp[:, 1:].astype(np.float64) * units["Bohr"] 

895 try: 

896 atoms = Atoms(els, pos) 

897 # ASE doesn't recognize names used in ONETEP 

898 # as chemical symbol: dig deeper 

899 except KeyError: 

900 tags, real_elements = find_correct_species(els, idx) 

901 atoms = Atoms(real_elements, pos) 

902 atoms.set_tags(tags) 

903 atoms.info["onetep_species"] = list(els) 

904 return atoms 

905 n += 1 

906 return None 

907 

908 def parse_species(idx): 

909 n = 1 

910 element_map = {} 

911 while idx + n < len(fdo_lines): 

912 sep = fdo_lines[idx + n].split() 

913 if re.search( 

914 r"(?i)^\s*%ENDBLOCK\s*SPECIES\s*:?\s*([*#!].*)?$", 

915 fdo_lines[idx + n], 

916 ): 

917 return element_map 

918 to_skip = re.search( 

919 r"(?i)^\s*(ang|bohr)\s*([*#!].*)?$", fdo_lines[idx + n] 

920 ) 

921 if not to_skip: 

922 element_map[sep[0]] = sep[1] 

923 n += 1 

924 return None 

925 

926 def parse_spin(idx): 

927 n = 0 

928 offset = 4 

929 while idx + n < len(fdo_lines): 

930 if not fdo_lines[idx + n].strip(): 

931 # If no spin is present we return None 

932 try: 

933 tmp_spins = np.loadtxt( 

934 fdo_lines[idx + offset: idx + n - 1], usecols=4 

935 ) 

936 return np.reshape(tmp_spins, -1) 

937 except ValueError: 

938 return None 

939 n += 1 

940 return None 

941 

942 # This is needed if ASE doesn't recognize the element 

943 def find_correct_species(els, idx, first=False): 

944 real_elements = [] 

945 tags = [] 

946 # Find nearest species block in case of 

947 # multi-output file with different species blocks. 

948 if first: 

949 closest_species = np.argmin(abs(idx - species)) 

950 else: 

951 tmp = idx - species 

952 tmp[tmp < 0] = 9999999999 

953 closest_species = np.argmin(tmp) 

954 elements_map = real_species[closest_species] 

955 for el in els: 

956 real_elements.append(elements_map[el]) 

957 tag_maybe = el.replace(elements_map[el], "") 

958 if tag_maybe.isdigit(): 

959 tags.append(int(tag_maybe)) 

960 else: 

961 tags.append(False) 

962 return tags, real_elements 

963 

964 cells = [] 

965 for idx in icells: 

966 if idx in output[ONETEP_STRESS_CELL]: 

967 cell = parse_cell(idx) if idx else None 

968 else: 

969 cell = parse_first_cell(idx) if idx else None 

970 cells.append(cell) 

971 

972 charges = [] 

973 for idx in output_corr[ONETEP_MULLIKEN]: 

974 charge = parse_charge(idx) if idx else None 

975 charges.append(charge) 

976 

977 energies = [] 

978 for idx in output_corr[ONETEP_TOTAL_ENERGY]: 

979 energy = parse_energy(idx) if idx else None 

980 energies.append(energy) 

981 

982 fermi_levels = [] 

983 for idx in output_corr[ONETEP_TOTAL_ENERGY]: 

984 fermi_level = parse_fermi_level(idx) if idx else None 

985 fermi_levels.append(fermi_level) 

986 

987 magmoms = [] 

988 for idx in output_corr[ONETEP_MULLIKEN]: 

989 magmom = parse_spin(idx) if idx else None 

990 magmoms.append(magmom) 

991 

992 real_species = [] 

993 for idx in species: 

994 real_specie = parse_species(idx) 

995 real_species.append(real_specie) 

996 

997 positions, forces = [], [] 

998 for idx in ipositions: 

999 if idx in i_first_positions: 

1000 position = parse_first_positions(idx) 

1001 else: 

1002 position = parse_positions(idx) 

1003 if position: 

1004 positions.append(position) 

1005 else: 

1006 n_pos -= 1 

1007 break 

1008 for idx in output_corr[ONETEP_FORCE]: 

1009 force = parse_force(idx) if idx else None 

1010 forces.append(force) 

1011 

1012 n_pos = len(positions) 

1013 

1014 # Numpy trick to get rid of configuration that are essentially the same 

1015 # in a regular geometry optimisation with internal BFGS, the first 

1016 # configuration is printed three time, we get rid of it 

1017 properties = [energies, forces, charges, magmoms] 

1018 

1019 if has_bfgs: 

1020 tmp = [i.positions for i in positions] 

1021 to_del = [] 

1022 for i in range(len(tmp[:-1])): 

1023 if is_in_bfgs(ipositions[i]): 

1024 if np.array_equal(tmp[i], tmp[i + 1]): 

1025 # If the deleted configuration has a property 

1026 # we want to keep it 

1027 for prop in properties: 

1028 if prop[i + 1] is not None and prop[i] is None: 

1029 prop[i] = prop[i + 1] 

1030 to_del.append(i + 1) 

1031 c = np.full((len(tmp)), True) 

1032 c[to_del] = False 

1033 energies = [energies[i] for i in range(n_pos) if c[i]] 

1034 forces = [forces[i] for i in range(n_pos) if c[i]] 

1035 charges = [charges[i] for i in range(n_pos) if c[i]] 

1036 magmoms = [magmoms[i] for i in range(n_pos) if c[i]] 

1037 positions = [positions[i] for i in range(n_pos) if c[i]] 

1038 ipositions = [ipositions[i] for i in range(n_pos) if c[i]] 

1039 

1040 n_pos = len(positions) 

1041 

1042 # We take care of properties that only show up at 

1043 # the beginning of onetep calculation 

1044 whole = np.append(output[ONETEP_START], n_lines) 

1045 

1046 if n_pos == 0: 

1047 raise RuntimeError(unable_to_read) 

1048 

1049 spin = np.full((n_pos), 1) 

1050 for sp in output[ONETEP_SPIN]: 

1051 tmp = zip(whole, whole[1:]) 

1052 for past, future in tmp: 

1053 if past < sp < future: 

1054 p = (past < ipositions) & (ipositions < future) 

1055 spin[p] = 2 

1056 

1057 cells_all = np.ones((n_pos, 3, 3)) 

1058 for idx, cell in enumerate(output[ONETEP_FIRST_CELL]): 

1059 tmp = zip(whole, whole[1:]) 

1060 for past, future in tmp: 

1061 if past < cell < future: 

1062 p = (past < ipositions) & (ipositions < future) 

1063 cells_all[p] = cells[idx] 

1064 

1065 # Prepare atom objects with all the properties 

1066 if isinstance(index, int): 

1067 indices = [range(n_pos)[index]] 

1068 else: 

1069 indices = range(n_pos)[index] 

1070 

1071 for idx in indices: 

1072 positions[idx].set_cell(cells_all[idx]) 

1073 if ipositions[idx] in is_frac_positions: 

1074 positions[idx].set_scaled_positions(positions[idx].get_positions()) 

1075 positions[idx].set_initial_charges(charges[idx]) 

1076 calc = SinglePointDFTCalculator( 

1077 positions[idx], 

1078 energy=energies[idx] if energies else None, 

1079 free_energy=energies[idx] if energies else None, 

1080 forces=forces[idx] if forces else None, 

1081 charges=charges[idx] if charges else None, 

1082 magmoms=magmoms[idx] if magmoms else None, 

1083 ) 

1084 # calc.kpts = [(0, 0, 0) for _ in range(spin[idx])] 

1085 positions[idx].calc = calc 

1086 yield positions[idx]