Coverage for /builds/debichem-team/python-ase/ase/io/gaussian.py: 95.28%

635 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1import logging 

2import re 

3import warnings 

4from collections.abc import Iterable 

5from copy import deepcopy 

6 

7import numpy as np 

8 

9from ase import Atoms 

10from ase.calculators.calculator import Calculator, InputError 

11from ase.calculators.gaussian import Gaussian 

12from ase.calculators.singlepoint import SinglePointCalculator 

13from ase.data import atomic_masses_iupac2016, chemical_symbols 

14from ase.io import ParseError 

15from ase.io.zmatrix import parse_zmatrix 

16from ase.units import Bohr, Debye, Hartree 

17 

18logger = logging.getLogger(__name__) 

19 

20_link0_keys = [ 

21 'mem', 

22 'chk', 

23 'oldchk', 

24 'schk', 

25 'rwf', 

26 'oldmatrix', 

27 'oldrawmatrix', 

28 'int', 

29 'd2e', 

30 'save', 

31 'nosave', 

32 'errorsave', 

33 'cpu', 

34 'nprocshared', 

35 'gpucpu', 

36 'lindaworkers', 

37 'usessh', 

38 'ssh', 

39 'debuglinda', 

40] 

41 

42 

43_link0_special = [ 

44 'kjob', 

45 'subst', 

46] 

47 

48 

49# Certain problematic methods do not provide well-defined potential energy 

50# surfaces, because these "composite" methods involve geometry optimization 

51# and/or vibrational frequency analysis. In addition, the "energy" calculated 

52# by these methods are typically ZPVE corrected and/or temperature dependent 

53# free energies. 

54_problem_methods = [ 

55 'cbs-4m', 'cbs-qb3', 'cbs-apno', 

56 'g1', 'g2', 'g3', 'g4', 'g2mp2', 'g3mp2', 'g3b3', 'g3mp2b3', 'g4mp4', 

57 'w1', 'w1u', 'w1bd', 'w1ro', 

58] 

59 

60 

61_xc_to_method = dict( 

62 pbe='pbepbe', 

63 pbe0='pbe1pbe', 

64 hse06='hseh1pbe', 

65 hse03='ohse2pbe', 

66 lda='svwn', # gaussian "knows about" LSDA, but maybe not LDA. 

67 tpss='tpsstpss', 

68 revtpss='revtpssrevtpss', 

69) 

70 

71_nuclear_prop_names = ['spin', 'zeff', 'qmom', 'nmagm', 'znuc', 

72 'radnuclear', 'iso'] 

73 

74 

75def _get_molecule_spec(atoms, nuclear_props): 

76 ''' Generate the molecule specification section to write 

77 to the Gaussian input file, from the Atoms object and dict 

78 of nuclear properties''' 

79 molecule_spec = [] 

80 for i, atom in enumerate(atoms): 

81 symbol_section = atom.symbol + '(' 

82 # Check whether any nuclear properties of the atom have been set, 

83 # and if so, add them to the symbol section. 

84 nuclear_props_set = False 

85 for keyword, array in nuclear_props.items(): 

86 if array is not None and array[i] is not None: 

87 string = keyword + '=' + str(array[i]) + ', ' 

88 symbol_section += string 

89 nuclear_props_set = True 

90 

91 # Check whether the mass of the atom has been modified, 

92 # and if so, add it to the symbol section: 

93 mass_set = False 

94 symbol = atom.symbol 

95 expected_mass = atomic_masses_iupac2016[chemical_symbols.index( 

96 symbol)] 

97 if expected_mass != atoms[i].mass: 

98 mass_set = True 

99 string = 'iso' + '=' + str(atoms[i].mass) 

100 symbol_section += string 

101 

102 if nuclear_props_set or mass_set: 

103 symbol_section = symbol_section.strip(', ') 

104 symbol_section += ')' 

105 else: 

106 symbol_section = symbol_section.strip('(') 

107 

108 # Then attach the properties appropriately 

109 # this formatting was chosen for backwards compatibility reasons, but 

110 # it would probably be better to 

111 # 1) Ensure proper spacing between entries with explicit spaces 

112 # 2) Use fewer columns for the element 

113 # 3) Use 'e' (scientific notation) instead of 'f' for positions 

114 molecule_spec.append('{:<10s}{:20.10f}{:20.10f}{:20.10f}'.format( 

115 symbol_section, *atom.position)) 

116 

117 # unit cell vectors, in case of periodic boundary conditions 

118 for ipbc, tv in zip(atoms.pbc, atoms.cell): 

119 if ipbc: 

120 molecule_spec.append('TV {:20.10f}{:20.10f}{:20.10f}'.format(*tv)) 

121 

122 molecule_spec.append('') 

123 

124 return molecule_spec 

125 

126 

127def _format_output_type(output_type): 

128 ''' Given a letter: output_type, return 

129 a string formatted for a gaussian input file''' 

130 # Change output type to P if it has been set to T, because ASE 

131 # does not support reading info from 'terse' output files. 

132 if output_type is None or output_type == '' or 't' in output_type.lower(): 

133 output_type = 'P' 

134 

135 return f'#{output_type}' 

136 

137 

138def _check_problem_methods(method): 

139 ''' Check method string for problem methods and warn appropriately''' 

140 if method.lower() in _problem_methods: 

141 warnings.warn( 

142 'The requested method, {}, is a composite method. Composite ' 

143 'methods do not have well-defined potential energy surfaces, ' 

144 'so the energies, forces, and other properties returned by ' 

145 'ASE may not be meaningful, or they may correspond to a ' 

146 'different geometry than the one provided. ' 

147 'Please use these methods with caution.'.format(method) 

148 ) 

149 

150 

151def _pop_link0_params(params): 

152 '''Takes the params dict and returns a dict with the link0 keywords 

153 removed, and a list containing the link0 keywords and options 

154 to be written to the gaussian input file''' 

155 params = deepcopy(params) 

156 out = [] 

157 # Remove keywords from params, so they are not set again later in 

158 # route section 

159 for key in _link0_keys: 

160 if key not in params: 

161 continue 

162 val = params.pop(key) 

163 if not val or (isinstance(val, str) and key.lower() == val.lower()): 

164 out.append(f'%{key}') 

165 else: 

166 out.append(f'%{key}={val}') 

167 

168 # These link0 keywords have a slightly different syntax 

169 for key in _link0_special: 

170 if key not in params: 

171 continue 

172 val = params.pop(key) 

173 if not isinstance(val, str) and isinstance(val, Iterable): 

174 val = ' '.join(val) 

175 out.append(f'%{key} L{val}') 

176 

177 return params, out 

178 

179 

180def _format_method_basis(output_type, method, basis, fitting_basis): 

181 output_string = "" 

182 if basis and method and fitting_basis: 

183 output_string = '{} {}/{}/{} ! ASE formatted method and basis'.format( 

184 output_type, method, basis, fitting_basis) 

185 elif basis and method: 

186 output_string = '{} {}/{} ! ASE formatted method and basis'.format( 

187 output_type, method, basis) 

188 else: 

189 output_string = f'{output_type}' 

190 for value in [method, basis]: 

191 if value is not None: 

192 output_string += f' {value}' 

193 return output_string 

194 

195 

196def _format_route_params(params): 

197 '''Get keywords and values from the params dictionary and return 

198 as a list of lines to add to the gaussian input file''' 

199 out = [] 

200 for key, val in params.items(): 

201 # assume bare keyword if val is falsey, i.e. '', None, False, etc. 

202 # also, for backwards compatibility: assume bare keyword if key and 

203 # val are the same 

204 if not val or (isinstance(val, str) and key.lower() == val.lower()): 

205 out.append(key) 

206 elif not isinstance(val, str) and isinstance(val, Iterable): 

207 out.append('{}({})'.format(key, ','.join(val))) 

208 else: 

209 out.append(f'{key}({val})') 

210 return out 

211 

212 

213def _format_addsec(addsec): 

214 '''Format addsec string as a list of lines to be added to the gaussian 

215 input file.''' 

216 out = [] 

217 if addsec is not None: 

218 out.append('') 

219 if isinstance(addsec, str): 

220 out.append(addsec) 

221 elif isinstance(addsec, Iterable): 

222 out += list(addsec) 

223 return out 

224 

225 

226def _format_basis_set(basis, basisfile, basis_set): 

227 '''Format either: the basis set filename (basisfile), the basis set file 

228 contents (from reading basisfile), or the basis_set text as a list of 

229 strings to be added to the gaussian input file.''' 

230 out = [] 

231 # if basis='gen', set basisfile. Either give a path to a basisfile, or 

232 # read in the provided file and paste it verbatim 

233 if basisfile is not None: 

234 if basisfile[0] == '@': 

235 out.append(basisfile) 

236 else: 

237 with open(basisfile) as fd: 

238 out.append(fd.read()) 

239 elif basis_set is not None: 

240 out.append(basis_set) 

241 else: 

242 if basis is not None and basis.lower() == 'gen': 

243 raise InputError('Please set basisfile or basis_set') 

244 return out 

245 

246 

247def write_gaussian_in(fd, atoms, properties=['energy'], 

248 method=None, basis=None, fitting_basis=None, 

249 output_type='P', basisfile=None, basis_set=None, 

250 xc=None, charge=None, mult=None, extra=None, 

251 ioplist=None, addsec=None, spinlist=None, 

252 zefflist=None, qmomlist=None, nmagmlist=None, 

253 znuclist=None, radnuclearlist=None, 

254 **params): 

255 ''' 

256 Generates a Gaussian input file 

257 

258 Parameters 

259 ----------- 

260 fd: file-like 

261 where the Gaussian input file will be written 

262 atoms: Atoms 

263 Structure to write to the input file 

264 properties: list 

265 Properties to calculate 

266 method: str 

267 Level of theory to use, e.g. ``hf``, ``ccsd``, ``mp2``, or ``b3lyp``. 

268 Overrides ``xc`` (see below). 

269 xc: str 

270 Level of theory to use. Translates several XC functionals from 

271 their common name (e.g. ``PBE``) to their internal Gaussian name 

272 (e.g. ``PBEPBE``). 

273 basis: str 

274 The basis set to use. If not provided, no basis set will be requested, 

275 which usually results in ``STO-3G``. Maybe omitted if basisfile is set 

276 (see below). 

277 fitting_basis: str 

278 The name of the fitting basis set to use. 

279 output_type: str 

280 Level of output to record in the Gaussian 

281 output file - this may be ``N``- normal or ``P`` - 

282 additional. 

283 basisfile: str 

284 The name of the basis file to use. If a value is provided, basis may 

285 be omitted (it will be automatically set to 'gen') 

286 basis_set: str 

287 The basis set definition to use. This is an alternative 

288 to basisfile, and would be the same as the contents 

289 of such a file. 

290 charge: int 

291 The system charge. If not provided, it will be automatically 

292 determined from the ``Atoms`` object’s initial_charges. 

293 mult: int 

294 The system multiplicity (``spin + 1``). If not provided, it will be 

295 automatically determined from the ``Atoms`` object’s 

296 ``initial_magnetic_moments``. 

297 extra: str 

298 Extra lines to be included in the route section verbatim. 

299 It should not be necessary to use this, but it is included for 

300 backwards compatibility. 

301 ioplist: list 

302 A collection of IOPs definitions to be included in the route line. 

303 addsec: str 

304 Text to be added after the molecular geometry specification, e.g. for 

305 defining masses with ``freq=ReadIso``. 

306 spinlist: list 

307 A list of nuclear spins to be added into the nuclear 

308 propeties section of the molecule specification. 

309 zefflist: list 

310 A list of effective charges to be added into the nuclear 

311 propeties section of the molecule specification. 

312 qmomlist: list 

313 A list of nuclear quadropole moments to be added into 

314 the nuclear propeties section of the molecule 

315 specification. 

316 nmagmlist: list 

317 A list of nuclear magnetic moments to be added into 

318 the nuclear propeties section of the molecule 

319 specification. 

320 znuclist: list 

321 A list of nuclear charges to be added into the nuclear 

322 propeties section of the molecule specification. 

323 radnuclearlist: list 

324 A list of nuclear radii to be added into the nuclear 

325 propeties section of the molecule specification. 

326 params: dict 

327 Contains any extra keywords and values that will be included in either 

328 the link0 section or route section of the gaussian input file. 

329 To be included in the link0 section, the keyword must be one of the 

330 following: ``mem``, ``chk``, ``oldchk``, ``schk``, ``rwf``, 

331 ``oldmatrix``, ``oldrawmatrix``, ``int``, ``d2e``, ``save``, 

332 ``nosave``, ``errorsave``, ``cpu``, ``nprocshared``, ``gpucpu``, 

333 ``lindaworkers``, ``usessh``, ``ssh``, ``debuglinda``. 

334 Any other keywords will be placed (along with their values) in the 

335 route section. 

336 ''' 

337 

338 params = deepcopy(params) 

339 

340 if properties is None: 

341 properties = ['energy'] 

342 

343 output_type = _format_output_type(output_type) 

344 

345 # basis can be omitted if basisfile is provided 

346 if basis is None: 

347 if basisfile is not None or basis_set is not None: 

348 basis = 'gen' 

349 

350 # determine method from xc if it is provided 

351 if method is None: 

352 if xc is not None: 

353 method = _xc_to_method.get(xc.lower(), xc) 

354 

355 # If the user requests a problematic method, rather than raising an error 

356 # or proceeding blindly, give the user a warning that the results parsed 

357 # by ASE may not be meaningful. 

358 if method is not None: 

359 _check_problem_methods(method) 

360 

361 # determine charge from initial charges if not passed explicitly 

362 if charge is None: 

363 charge = atoms.get_initial_charges().sum() 

364 

365 # determine multiplicity from initial magnetic moments 

366 # if not passed explicitly 

367 if mult is None: 

368 mult = atoms.get_initial_magnetic_moments().sum() + 1 

369 

370 # set up link0 arguments 

371 out = [] 

372 params, link0_list = _pop_link0_params(params) 

373 out.extend(link0_list) 

374 

375 # begin route line 

376 # note: unlike in old calculator, each route keyword is put on its own 

377 # line. 

378 out.append(_format_method_basis(output_type, method, basis, fitting_basis)) 

379 

380 # If the calculator's parameter dictionary contains an isolist, we ignore 

381 # this - it is up to the user to attach this info as the atoms' masses 

382 # if they wish for it to be used: 

383 params.pop('isolist', None) 

384 

385 # Any params left will belong in the route section of the file: 

386 out.extend(_format_route_params(params)) 

387 

388 if ioplist is not None: 

389 out.append('IOP(' + ', '.join(ioplist) + ')') 

390 

391 # raw list of explicit keywords for backwards compatibility 

392 if extra is not None: 

393 out.append(extra) 

394 

395 # Add 'force' iff the user requested forces, since Gaussian crashes when 

396 # 'force' is combined with certain other keywords such as opt and irc. 

397 if 'forces' in properties and 'force' not in params: 

398 out.append('force') 

399 

400 # header, charge, and mult 

401 out += ['', 'Gaussian input prepared by ASE', '', 

402 f'{charge:.0f} {mult:.0f}'] 

403 

404 # make dict of nuclear properties: 

405 nuclear_props = {'spin': spinlist, 'zeff': zefflist, 'qmom': qmomlist, 

406 'nmagm': nmagmlist, 'znuc': znuclist, 

407 'radnuclear': radnuclearlist} 

408 nuclear_props = {k: v for k, v in nuclear_props.items() if v is not None} 

409 

410 # atomic positions and nuclear properties: 

411 molecule_spec = _get_molecule_spec(atoms, nuclear_props) 

412 for line in molecule_spec: 

413 out.append(line) 

414 

415 out.extend(_format_basis_set(basis, basisfile, basis_set)) 

416 

417 out.extend(_format_addsec(addsec)) 

418 

419 out += ['', ''] 

420 fd.write('\n'.join(out)) 

421 

422 

423# Regexp for reading an input file: 

424 

425_re_link0 = re.compile(r'^\s*%([^\=\)\(!]+)=?([^\=\)\(!]+)?(!.+)?') 

426# Link0 lines are in the format: 

427# '% keyword = value' or '% keyword' 

428# (with or without whitespaces) 

429 

430_re_output_type = re.compile(r'^\s*#\s*([NPTnpt]?)\s*') 

431# The start of the route section begins with a '#', and then may 

432# be followed by the desired level of output in the output file: P, N or T. 

433 

434_re_method_basis = re.compile( 

435 r"\s*([\w-]+)\s*\/([^/=!]+)([\/]([^!]+))?\s*(!.+)?") 

436# Matches method, basis and optional fitting basis in the format: 

437# method/basis/fitting_basis ! comment 

438# They will appear in this format if the Gaussian file has been generated 

439# by ASE using a calculator with the basis and method keywords set. 

440 

441_re_chgmult = re.compile(r'^(\s*[+-]?\d+(?:,\s*|\s+)[+-]?\d+\s*){1,}$') 

442# This is a bit more complex of a regex than we typically want, but it 

443# can be difficult to determine whether a line contains the charge and 

444# multiplicity, rather than just another route keyword. By making sure 

445# that the line contains exactly an even number of *integers*, separated by 

446# either a comma (and possibly whitespace) or some amount of whitespace, we 

447# can be more confident that we've actually found the charge and multiplicity. 

448# Note that in recent versions of Gaussian, the gjf file can contain fragments, 

449# where you can give a charge and multiplicity to each fragment. This pattern 

450# will allow ASE to read this line in for the charge and multiplicity, however 

451# the _get_charge_mult method will only input the first two integers that 

452# always gives the overall charge and multiplcity for the full chemical system. 

453# The charge and multiplicity of the fragments will be ignored in the 

454# _get_charge_mult method. 

455 

456_re_nuclear_props = re.compile(r'\(([^\)]+)\)') 

457# Matches the nuclear properties, which are contained in parantheses. 

458 

459# The following methods are used in GaussianConfiguration's 

460# parse_gaussian_input method: 

461 

462 

463def _get_link0_param(link0_match): 

464 '''Gets link0 keyword and option from a re.Match and returns them 

465 in a dictionary format''' 

466 value = link0_match.group(2) 

467 if value is not None: 

468 value = value.strip() 

469 else: 

470 value = '' 

471 return {link0_match.group(1).lower().strip(): value.lower()} 

472 

473 

474def _get_all_link0_params(link0_section): 

475 ''' Given a string link0_section which contains the link0 

476 section of a gaussian input file, returns a dictionary of 

477 keywords and values''' 

478 parameters = {} 

479 for line in link0_section: 

480 link0_match = _re_link0.match(line) 

481 link0_param = _get_link0_param(link0_match) 

482 if link0_param is not None: 

483 parameters.update(link0_param) 

484 return parameters 

485 

486 

487def _convert_to_symbol(string): 

488 '''Converts an input string into a format 

489 that can be input to the 'symbol' parameter of an 

490 ASE Atom object (can be a chemical symbol (str) 

491 or an atomic number (int)). 

492 This is achieved by either stripping any 

493 integers from the string, or converting a string 

494 containing an atomic number to integer type''' 

495 symbol = _validate_symbol_string(string) 

496 if symbol.isnumeric(): 

497 atomic_number = int(symbol) 

498 symbol = chemical_symbols[atomic_number] 

499 else: 

500 match = re.match(r'([A-Za-z]+)', symbol) 

501 symbol = match.group(1) 

502 return symbol 

503 

504 

505def _validate_symbol_string(string): 

506 if "-" in string: 

507 raise ParseError("ERROR: Could not read the Gaussian input file, as" 

508 " molecule specifications for molecular mechanics " 

509 "calculations are not supported.") 

510 return string 

511 

512 

513def _get_key_value_pairs(line): 

514 '''Read line of a gaussian input file with keywords and options 

515 separated according to the rules of the route section. 

516 

517 Parameters 

518 ---------- 

519 line (string) 

520 A line of a gaussian input file. 

521 

522 Returns 

523 --------- 

524 params (dict) 

525 Contains the keywords and options found in the line. 

526 ''' 

527 params = {} 

528 line = line.strip(' #') 

529 line = line.split('!')[0] # removes any comments 

530 # First, get the keywords and options sepatated with 

531 # parantheses: 

532 match_iterator = re.finditer(r'\(([^\)]+)\)', line) 

533 index_ranges = [] 

534 for match in match_iterator: 

535 index_range = [match.start(0), match.end(0)] 

536 options = match.group(1) 

537 # keyword is last word in previous substring: 

538 keyword_string = line[:match.start(0)] 

539 keyword_match_iter = [k for k in re.finditer( 

540 r'[^\,/\s]+', keyword_string) if k.group() != '='] 

541 keyword = keyword_match_iter[-1].group().strip(' =') 

542 index_range[0] = keyword_match_iter[-1].start() 

543 params.update({keyword.lower(): options.lower()}) 

544 index_ranges.append(index_range) 

545 

546 # remove from the line the keywords and options that we have saved: 

547 index_ranges.reverse() 

548 for index_range in index_ranges: 

549 start = index_range[0] 

550 stop = index_range[1] 

551 line = line[0: start:] + line[stop + 1::] 

552 

553 # Next, get the keywords and options separated with 

554 # an equals sign, and those without an equals sign 

555 # must be keywords without options: 

556 

557 # remove any whitespaces around '=': 

558 line = re.sub(r'\s*=\s*', '=', line) 

559 line = [x for x in re.split(r'[\s,\/]', line) if x != ''] 

560 

561 for s in line: 

562 if '=' in s: 

563 s = s.split('=') 

564 keyword = s.pop(0) 

565 options = s.pop(0) 

566 params.update({keyword.lower(): options.lower()}) 

567 else: 

568 if len(s) > 0: 

569 params.update({s.lower(): None}) 

570 

571 return params 

572 

573 

574def _get_route_params(line): 

575 '''Reads keywords and values from a line in 

576 a Gaussian input file's route section, 

577 and returns them as a dictionary''' 

578 method_basis_match = _re_method_basis.match(line) 

579 if method_basis_match: 

580 params = {} 

581 ase_gen_comment = '! ASE formatted method and basis' 

582 if method_basis_match.group(5) == ase_gen_comment: 

583 params['method'] = method_basis_match.group(1).strip().lower() 

584 params['basis'] = method_basis_match.group(2).strip().lower() 

585 if method_basis_match.group(4): 

586 params['fitting_basis'] = method_basis_match.group( 

587 4).strip().lower() 

588 return params 

589 

590 return _get_key_value_pairs(line) 

591 

592 

593def _get_all_route_params(route_section): 

594 ''' Given a string: route_section which contains the route 

595 section of a gaussian input file, returns a dictionary of 

596 keywords and values''' 

597 parameters = {} 

598 

599 for line in route_section: 

600 output_type_match = _re_output_type.match(line) 

601 if not parameters.get('output_type') and output_type_match: 

602 line = line.strip(output_type_match.group(0)) 

603 parameters.update( 

604 {'output_type': output_type_match.group(1).lower()}) 

605 # read route section 

606 route_params = _get_route_params(line) 

607 if route_params is not None: 

608 parameters.update(route_params) 

609 return parameters 

610 

611 

612def _get_charge_mult(chgmult_section): 

613 '''return a dict with the charge and multiplicity from 

614 a list chgmult_section that contains the charge and multiplicity 

615 line, read from a gaussian input file''' 

616 chgmult_match = _re_chgmult.match(str(chgmult_section)) 

617 try: 

618 chgmult = chgmult_match.group(0).split() 

619 return {'charge': int(chgmult[0]), 'mult': int(chgmult[1])} 

620 except (IndexError, AttributeError): 

621 raise ParseError("ERROR: Could not read the charge and " 

622 "multiplicity from the Gaussian input " 

623 "file. There must be an even number of " 

624 "integers separated with whitespace or " 

625 "a comma, where the first two integers " 

626 "must be the overall charge and overall " 

627 "multiplicity of the chemical system, " 

628 "respectively.") 

629 

630 

631def _get_nuclear_props(line): 

632 ''' Reads any info in parantheses in the line and returns 

633 a dictionary of the nuclear properties.''' 

634 nuclear_props_match = _re_nuclear_props.search(line) 

635 nuclear_props = {} 

636 if nuclear_props_match: 

637 nuclear_props = _get_key_value_pairs(nuclear_props_match.group(1)) 

638 updated_nuclear_props = {} 

639 for key, value in nuclear_props.items(): 

640 if value.isnumeric(): 

641 value = int(value) 

642 else: 

643 value = float(value) 

644 if key not in _nuclear_prop_names: 

645 if "fragment" in key: 

646 warnings.warn("Fragments are not " 

647 "currently supported.") 

648 warnings.warn("The following nuclear properties " 

649 "could not be saved: {}".format( 

650 {key: value})) 

651 else: 

652 updated_nuclear_props[key] = value 

653 nuclear_props = updated_nuclear_props 

654 

655 for k in _nuclear_prop_names: 

656 if k not in nuclear_props: 

657 nuclear_props[k] = None 

658 

659 return nuclear_props 

660 

661 

662def _get_atoms_info(line): 

663 '''Returns the symbol and position of an atom from a line 

664 in the molecule specification section''' 

665 nuclear_props_match = _re_nuclear_props.search(line) 

666 if nuclear_props_match: 

667 line = line.replace(nuclear_props_match.group(0), '') 

668 tokens = line.split() 

669 symbol = _convert_to_symbol(tokens[0]) 

670 pos = list(tokens[1:]) 

671 

672 return symbol, pos 

673 

674 

675def _get_cartesian_atom_coords(symbol, pos): 

676 '''Returns the coordinates: pos as a list of floats if they 

677 are cartesian, and not in z-matrix format''' 

678 if len(pos) < 3 or (pos[0] == '0' and symbol != 'TV'): 

679 # In this case, we have a z-matrix definition, so 

680 # no cartesian coords. 

681 return None 

682 elif len(pos) > 3: 

683 raise ParseError("ERROR: Gaussian input file could " 

684 "not be read as freeze codes are not" 

685 " supported. If using cartesian " 

686 "coordinates, these must be " 

687 "given as 3 numbers separated " 

688 "by whitespace.") 

689 else: 

690 try: 

691 return list(map(float, pos)) 

692 except ValueError: 

693 raise ParseError( 

694 "ERROR: Molecule specification in" 

695 "Gaussian input file could not be read") 

696 

697 

698def _get_zmatrix_line(line): 

699 ''' Converts line into the format needed for it to 

700 be added to the z-matrix contents ''' 

701 line_list = line.split() 

702 if len(line_list) == 8 and line_list[7] == '1': 

703 raise ParseError( 

704 "ERROR: Could not read the Gaussian input file" 

705 ", as the alternative Z-matrix format using " 

706 "two bond angles instead of a bond angle and " 

707 "a dihedral angle is not supported.") 

708 return line.strip() + '\n' 

709 

710 

711def _read_zmatrix(zmatrix_contents, zmatrix_vars=None): 

712 ''' Reads a z-matrix (zmatrix_contents) using its list of variables 

713 (zmatrix_vars), and returns atom positions and symbols ''' 

714 try: 

715 atoms = parse_zmatrix(zmatrix_contents, defs=zmatrix_vars) 

716 except (ValueError, AssertionError) as e: 

717 raise ParseError("Failed to read Z-matrix from " 

718 "Gaussian input file: ", e) 

719 except KeyError as e: 

720 raise ParseError("Failed to read Z-matrix from " 

721 "Gaussian input file, as symbol: {}" 

722 "could not be recognised. Please make " 

723 "sure you use element symbols, not " 

724 "atomic numbers in the element labels.".format(e)) 

725 positions = atoms.positions 

726 symbols = atoms.get_chemical_symbols() 

727 return positions, symbols 

728 

729 

730def _get_nuclear_props_for_all_atoms(nuclear_props): 

731 ''' Returns the nuclear properties for all atoms as a dictionary, 

732 in the format needed for it to be added to the parameters dictionary.''' 

733 params = {k + 'list': [] for k in _nuclear_prop_names} 

734 for dictionary in nuclear_props: 

735 for key, value in dictionary.items(): 

736 params[key + 'list'].append(value) 

737 

738 for key, array in params.items(): 

739 values_set = False 

740 for value in array: 

741 if value is not None: 

742 values_set = True 

743 if not values_set: 

744 params[key] = None 

745 return params 

746 

747 

748def _get_atoms_from_molspec(molspec_section): 

749 ''' Takes a string: molspec_section which contains the molecule 

750 specification section of a gaussian input file, and returns an atoms 

751 object that represents this.''' 

752 # These will contain info that will be attached to the Atoms object: 

753 symbols = [] 

754 positions = [] 

755 pbc = np.zeros(3, dtype=bool) 

756 cell = np.zeros((3, 3)) 

757 npbc = 0 

758 

759 # Will contain a dictionary of nuclear properties for each atom, 

760 # that will later be saved to the parameters dict: 

761 nuclear_props = [] 

762 

763 # Info relating to the z-matrix definition (if set) 

764 zmatrix_type = False 

765 zmatrix_contents = "" 

766 zmatrix_var_section = False 

767 zmatrix_vars = "" 

768 

769 for line in molspec_section: 

770 # Remove any comments and replace '/' and ',' with whitespace, 

771 # as these are equivalent: 

772 line = line.split('!')[0].replace('/', ' ').replace(',', ' ') 

773 if (line.split()): 

774 if zmatrix_type: 

775 # Save any variables set when defining the z-matrix: 

776 if zmatrix_var_section: 

777 zmatrix_vars += line.strip() + '\n' 

778 continue 

779 elif 'variables' in line.lower(): 

780 zmatrix_var_section = True 

781 continue 

782 elif 'constants' in line.lower(): 

783 zmatrix_var_section = True 

784 warnings.warn("Constants in the optimisation are " 

785 "not currently supported. Instead " 

786 "setting constants as variables.") 

787 continue 

788 

789 symbol, pos = _get_atoms_info(line) 

790 current_nuclear_props = _get_nuclear_props(line) 

791 

792 if not zmatrix_type: 

793 pos = _get_cartesian_atom_coords(symbol, pos) 

794 if pos is None: 

795 zmatrix_type = True 

796 

797 if symbol.upper() == 'TV' and pos is not None: 

798 pbc[npbc] = True 

799 cell[npbc] = pos 

800 npbc += 1 

801 else: 

802 nuclear_props.append(current_nuclear_props) 

803 if not zmatrix_type: 

804 symbols.append(symbol) 

805 positions.append(pos) 

806 

807 if zmatrix_type: 

808 zmatrix_contents += _get_zmatrix_line(line) 

809 

810 # Now that we are past the molecule spec. section, we can read 

811 # the entire z-matrix (if set): 

812 if len(positions) == 0: 

813 if zmatrix_type: 

814 if zmatrix_vars == '': 

815 zmatrix_vars = None 

816 positions, symbols = _read_zmatrix( 

817 zmatrix_contents, zmatrix_vars) 

818 

819 try: 

820 atoms = Atoms(symbols, positions, pbc=pbc, cell=cell) 

821 except (IndexError, ValueError, KeyError) as e: 

822 raise ParseError("ERROR: Could not read the Gaussian input file, " 

823 "due to a problem with the molecule " 

824 "specification: {}".format(e)) 

825 

826 nuclear_props = _get_nuclear_props_for_all_atoms(nuclear_props) 

827 

828 return atoms, nuclear_props 

829 

830 

831def _get_readiso_param(parameters): 

832 ''' Returns a tuple containing the frequency 

833 keyword and its options, if the frequency keyword is 

834 present in parameters and ReadIso is one of its options''' 

835 freq_options = parameters.get('freq', None) 

836 if freq_options: 

837 freq_name = 'freq' 

838 else: 

839 freq_options = parameters.get('frequency', None) 

840 freq_name = 'frequency' 

841 if freq_options is not None: 

842 if ('readiso' or 'readisotopes') in freq_options: 

843 return freq_name, freq_options 

844 return None, None 

845 

846 

847def _get_readiso_info(line, parameters): 

848 '''Reads the temperature, pressure and scale from the first line 

849 of a ReadIso section of a Gaussian input file. Returns these in 

850 a dictionary.''' 

851 readiso_params = {} 

852 if _get_readiso_param(parameters)[0] is not None: 

853 # when count_iso is 0 we are in the line where 

854 # temperature, pressure, [scale] is saved 

855 line = line.replace( 

856 '[', '').replace(']', '') 

857 tokens = line.strip().split() 

858 try: 

859 readiso_params['temperature'] = tokens[0] 

860 readiso_params['pressure'] = tokens[1] 

861 readiso_params['scale'] = tokens[2] 

862 except IndexError: 

863 pass 

864 if readiso_params != {}: 

865 

866 return readiso_params 

867 

868 

869def _delete_readiso_param(parameters): 

870 '''Removes the readiso parameter from the parameters dict''' 

871 parameters = deepcopy(parameters) 

872 freq_name, freq_options = _get_readiso_param(parameters) 

873 if freq_name is not None: 

874 if 'readisotopes' in freq_options: 

875 iso_name = 'readisotopes' 

876 else: 

877 iso_name = 'readiso' 

878 freq_options = [v.group() for v in re.finditer( 

879 r'[^\,/\s]+', freq_options)] 

880 freq_options.remove(iso_name) 

881 new_freq_options = '' 

882 for v in freq_options: 

883 new_freq_options += v + ' ' 

884 if new_freq_options == '': 

885 new_freq_options = None 

886 else: 

887 new_freq_options = new_freq_options.strip() 

888 parameters[freq_name] = new_freq_options 

889 return parameters 

890 

891 

892def _update_readiso_params(parameters, symbols): 

893 ''' Deletes the ReadIso option from the route section as we 

894 write out the masses in the nuclear properties section 

895 instead of the ReadIso section. 

896 Ensures the masses array is the same length as the 

897 symbols array. This is necessary due to the way the 

898 ReadIso section is defined: 

899 The mass of each atom is listed on a separate line, in 

900 order of appearance in the molecule spec. A blank line 

901 indicates not to modify the mass for that atom. 

902 But you do not have to leave blank lines equal to the 

903 remaining atoms after you finsihed setting masses. 

904 E.g. if you had 10 masses and only want to set the mass 

905 for the first atom, you don't have to leave 9 blank lines 

906 after it. 

907 ''' 

908 parameters = _delete_readiso_param(parameters) 

909 if parameters.get('isolist') is not None: 

910 if len(parameters['isolist']) < len(symbols): 

911 for _ in range(len(symbols) - len(parameters['isolist'])): 

912 parameters['isolist'].append(None) 

913 if all(m is None for m in parameters['isolist']): 

914 parameters['isolist'] = None 

915 

916 return parameters 

917 

918 

919def _validate_params(parameters): 

920 '''Checks whether all of the required parameters exist in the 

921 parameters dict and whether it contains any unsupported settings 

922 ''' 

923 # Check for unsupported settings 

924 unsupported_settings = { 

925 "z-matrix", "modredun", "modredundant", "addredundant", "addredun", 

926 "readopt", "rdopt"} 

927 

928 for s in unsupported_settings: 

929 for v in parameters.values(): 

930 if v is not None and s in str(v): 

931 raise ParseError( 

932 "ERROR: Could not read the Gaussian input file" 

933 ", as the option: {} is currently unsupported." 

934 .format(s)) 

935 

936 for k in list(parameters.keys()): 

937 if "popt" in k: 

938 parameters["opt"] = parameters.pop(k) 

939 warnings.warn("The option {} is currently unsupported. " 

940 "This has been replaced with {}." 

941 .format("POpt", "opt")) 

942 return 

943 

944 

945def _get_extra_section_params(extra_section, parameters, atoms): 

946 ''' Takes a list of strings: extra_section, which contains 

947 the 'extra' lines in a gaussian input file. Also takes the parameters 

948 that have been read so far, and the atoms that have been read from the 

949 file. 

950 Returns an updated version of the parameters dict, containing details from 

951 this extra section. This may include the basis set definition or filename, 

952 and/or the readiso section.''' 

953 

954 new_parameters = deepcopy(parameters) 

955 # Will store the basis set definition (if set): 

956 basis_set = "" 

957 # This will indicate whether we have a readiso section: 

958 readiso = _get_readiso_param(new_parameters)[0] 

959 # This will indicate which line of the readiso section we are reading: 

960 count_iso = 0 

961 readiso_masses = [] 

962 

963 for line in extra_section: 

964 if line.split(): 

965 # check that the line isn't just a comment 

966 if line.split()[0] == '!': 

967 continue 

968 line = line.strip().split('!')[0] 

969 

970 if len(line) > 0 and line[0] == '@': 

971 # If the name of a basis file is specified, this line 

972 # begins with a '@' 

973 new_parameters['basisfile'] = line 

974 elif readiso and count_iso < len(atoms.symbols) + 1: 

975 # The ReadIso section has 1 more line than the number of 

976 # symbols 

977 if count_iso == 0 and line != '\n': 

978 # The first line in the readiso section contains the 

979 # temperature, pressure, scale. Here we save this: 

980 readiso_info = _get_readiso_info(line, new_parameters) 

981 if readiso_info is not None: 

982 new_parameters.update(readiso_info) 

983 # If the atom masses were set in the nuclear properties 

984 # section, they will be overwritten by the ReadIso 

985 # section 

986 readiso_masses = [] 

987 count_iso += 1 

988 elif count_iso > 0: 

989 # The remaining lines in the ReadIso section are 

990 # the masses of the atoms 

991 try: 

992 readiso_masses.append(float(line)) 

993 except ValueError: 

994 readiso_masses.append(None) 

995 count_iso += 1 

996 else: 

997 # If the rest of the file is not the ReadIso section, 

998 # then it must be the definition of the basis set. 

999 if new_parameters.get('basis', '') == 'gen' \ 

1000 or 'gen' in new_parameters: 

1001 if line.strip() != '': 

1002 basis_set += line + '\n' 

1003 

1004 if readiso: 

1005 new_parameters['isolist'] = readiso_masses 

1006 new_parameters = _update_readiso_params(new_parameters, atoms.symbols) 

1007 

1008 # Saves the basis set definition to the parameters array if 

1009 # it has been set: 

1010 if basis_set != '': 

1011 new_parameters['basis_set'] = basis_set 

1012 new_parameters['basis'] = 'gen' 

1013 new_parameters.pop('gen', None) 

1014 

1015 return new_parameters 

1016 

1017 

1018def _get_gaussian_in_sections(fd): 

1019 ''' Reads a gaussian input file and returns 

1020 a dictionary of the sections of the file - each dictionary 

1021 value is a list of strings - each string is a line in that 

1022 section. ''' 

1023 # These variables indicate which section of the 

1024 # input file is currently being read: 

1025 route_section = False 

1026 atoms_section = False 

1027 atoms_saved = False 

1028 # Here we will store the sections of the file in a dictionary, 

1029 # as lists of strings 

1030 gaussian_sections = {'link0': [], 'route': [], 

1031 'charge_mult': [], 'mol_spec': [], 'extra': []} 

1032 

1033 for line in fd: 

1034 line = line.strip(' ') 

1035 link0_match = _re_link0.match(line) 

1036 output_type_match = _re_output_type.match(line) 

1037 chgmult_match = _re_chgmult.match(line) 

1038 if link0_match: 

1039 gaussian_sections['link0'].append(line) 

1040 # The first blank line appears at the end of the route section 

1041 # and a blank line appears at the end of the atoms section: 

1042 elif line == '\n' and (route_section or atoms_section): 

1043 route_section = False 

1044 atoms_section = False 

1045 elif output_type_match or route_section: 

1046 route_section = True 

1047 gaussian_sections['route'].append(line) 

1048 elif chgmult_match: 

1049 gaussian_sections['charge_mult'] = line 

1050 # After the charge and multiplicty have been set, the 

1051 # molecule specification section of the input file begins: 

1052 atoms_section = True 

1053 elif atoms_section: 

1054 gaussian_sections['mol_spec'].append(line) 

1055 atoms_saved = True 

1056 elif atoms_saved: 

1057 # Next we read the other sections below the molecule spec. 

1058 # This may include the ReadIso section and the basis set 

1059 # definition or filename 

1060 gaussian_sections['extra'].append(line) 

1061 

1062 return gaussian_sections 

1063 

1064 

1065class GaussianConfiguration: 

1066 

1067 def __init__(self, atoms, parameters): 

1068 self.atoms = atoms.copy() 

1069 self.parameters = deepcopy(parameters) 

1070 

1071 def get_atoms(self): 

1072 return self.atoms 

1073 

1074 def get_parameters(self): 

1075 return self.parameters 

1076 

1077 def get_calculator(self): 

1078 # Explicitly set parameters that must for security reasons not be 

1079 # taken from file: 

1080 calc = Gaussian(atoms=self.atoms, command=None, restart=None, 

1081 ignore_bad_restart_file=Calculator._deprecated, 

1082 label='Gaussian', directory='.', **self.parameters) 

1083 return calc 

1084 

1085 @staticmethod 

1086 def parse_gaussian_input(fd): 

1087 '''Reads a gaussian input file into an atoms object and 

1088 parameters dictionary. 

1089 

1090 Parameters 

1091 ---------- 

1092 fd: file-like 

1093 Contains the contents of a gaussian input file 

1094 

1095 Returns 

1096 --------- 

1097 GaussianConfiguration 

1098 Contains an atoms object created using the structural 

1099 information from the input file. 

1100 Contains a parameters dictionary, which stores any 

1101 keywords and options found in the link-0 and route 

1102 sections of the input file. 

1103 ''' 

1104 # The parameters dict to attach to the calculator 

1105 parameters = {} 

1106 

1107 file_sections = _get_gaussian_in_sections(fd) 

1108 

1109 # Update the parameters dictionary with the keywords and values 

1110 # from each section of the input file: 

1111 parameters.update(_get_all_link0_params(file_sections['link0'])) 

1112 parameters.update(_get_all_route_params(file_sections['route'])) 

1113 parameters.update(_get_charge_mult(file_sections['charge_mult'])) 

1114 atoms, nuclear_props = _get_atoms_from_molspec( 

1115 file_sections['mol_spec']) 

1116 parameters.update(nuclear_props) 

1117 parameters.update(_get_extra_section_params( 

1118 file_sections['extra'], parameters, atoms)) 

1119 

1120 _validate_params(parameters) 

1121 

1122 return GaussianConfiguration(atoms, parameters) 

1123 

1124 

1125def read_gaussian_in(fd, attach_calculator=False): 

1126 ''' 

1127 Reads a gaussian input file and returns an Atoms object. 

1128 

1129 Parameters 

1130 ---------- 

1131 fd: file-like 

1132 Contains the contents of a gaussian input file 

1133 

1134 attach_calculator: bool 

1135 When set to ``True``, a Gaussian calculator will be 

1136 attached to the Atoms object which is returned. 

1137 This will mean that additional information is read 

1138 from the input file (see below for details). 

1139 

1140 Returns 

1141 ---------- 

1142 Atoms 

1143 An Atoms object containing the following information that has been 

1144 read from the input file: symbols, positions, cell. 

1145 

1146 Notes 

1147 ---------- 

1148 Gaussian input files can be read where the atoms' locations are set with 

1149 cartesian coordinates or as a z-matrix. Variables may be used in the 

1150 z-matrix definition, but currently setting constants for constraining 

1151 a geometry optimisation is not supported. Note that the `alternative` 

1152 z-matrix definition where two bond angles are set instead of a bond angle 

1153 and a dihedral angle is not currently supported. 

1154 

1155 If the parameter ``attach_calculator`` is set to ``True``, then the Atoms 

1156 object is returned with a Gaussian calculator attached. 

1157 

1158 This Gaussian calculator will contain a parameters dictionary which will 

1159 contain the Link 0 commands and the options and keywords set in the route 

1160 section of the Gaussian input file, as well as: 

1161 

1162 • The charge and multiplicity 

1163 

1164 • The selected level of output 

1165 

1166 • The method, basis set and (optional) fitting basis set. 

1167 

1168 • Basis file name/definition if set 

1169 

1170 • Nuclear properties 

1171 

1172 • Masses set in the nuclear properties section or in the ReadIsotopes 

1173 section (if ``freq=ReadIso`` is set). These will be saved in an array 

1174 with keyword ``isolist``, in the parameters dictionary. For these to 

1175 actually be used in calculations and/or written out to input files, 

1176 the Atoms object's masses must be manually updated to these values 

1177 (this is not done automatically) 

1178 

1179 If the Gaussian input file has been generated by ASE's 

1180 ``write_gaussian_in`` method, then the basis set, method and fitting 

1181 basis will be saved under the ``basis``, ``method`` and ``fitting_basis`` 

1182 keywords in the parameters dictionary. Otherwise they are saved as 

1183 keywords in the parameters dict. 

1184 

1185 Currently this does not support reading of any other sections which would 

1186 be found below the molecule specification that have not been mentioned 

1187 here (such as the ModRedundant section). 

1188 ''' 

1189 gaussian_input = GaussianConfiguration.parse_gaussian_input(fd) 

1190 atoms = gaussian_input.get_atoms() 

1191 

1192 if attach_calculator: 

1193 atoms.calc = gaussian_input.get_calculator() 

1194 

1195 return atoms 

1196 

1197 

1198# In the interest of using the same RE for both atomic positions and forces, 

1199# we make one of the columns optional. That's because atomic positions have 

1200# 6 columns, while forces only has 5 columns. Otherwise they are very similar. 

1201_re_atom = re.compile( 

1202 r'^\s*\S+\s+(\S+)\s+(?:\S+\s+)?(\S+)\s+(\S+)\s+(\S+)\s*$' 

1203) 

1204_re_forceblock = re.compile(r'^\s*Center\s+Atomic\s+Forces\s+\S+\s*$') 

1205_re_l716 = re.compile(r'^\s*\(Enter .+l716.exe\)$') 

1206 

1207 

1208def _compare_merge_configs(configs, new): 

1209 """Append new to configs if it contains a new geometry or new data. 

1210 

1211 Gaussian sometimes repeats a geometry, for example at the end of an 

1212 optimization, or when a user requests vibrational frequency 

1213 analysis in the same calculation as a geometry optimization. 

1214 

1215 In those cases, rather than repeating the structure in the list of 

1216 returned structures, try to merge results if doing so doesn't change 

1217 any previously calculated values. If that's not possible, then create 

1218 a new "image" with the new results. 

1219 """ 

1220 if not configs: 

1221 configs.append(new) 

1222 return 

1223 

1224 old = configs[-1] 

1225 

1226 if old != new: 

1227 configs.append(new) 

1228 return 

1229 

1230 oldres = old.calc.results 

1231 newres = new.calc.results 

1232 common_keys = set(oldres).intersection(newres) 

1233 

1234 for key in common_keys: 

1235 if np.any(oldres[key] != newres[key]): 

1236 configs.append(new) 

1237 return 

1238 oldres.update(newres) 

1239 

1240 

1241def _read_charges(fd): 

1242 fd.readline() 

1243 qs = [] 

1244 ms = [] 

1245 for line in fd: 

1246 if not line.strip()[0].isdigit(): 

1247 break 

1248 qs.append(float(line.split()[2])) 

1249 if len(line.split()) > 3: 

1250 ms.append(float(line.split()[3])) 

1251 return {'charges': qs, 'magmoms': ms} if ms else {'charges': qs} 

1252 

1253 

1254def read_gaussian_out(fd, index=-1): 

1255 """Reads a gaussian output file and returns an Atoms object.""" 

1256 configs = [] 

1257 atoms = None 

1258 energy = None 

1259 results = {} 

1260 orientation = None # Orientation of the coordinates stored in atoms 

1261 for line in fd: 

1262 line = line.strip() 

1263 if line.startswith(r'1\1\GINC'): 

1264 # We've reached the "archive" block at the bottom, stop parsing 

1265 break 

1266 

1267 if (line == 'Input orientation:' 

1268 or line == 'Z-Matrix orientation:' 

1269 or line == "Standard orientation:"): 

1270 if atoms is not None: 

1271 # Add configuration to the currently-parsed list 

1272 # only after an energy or force has been parsed 

1273 # (we assume these are in the output file) 

1274 if energy is None: 

1275 continue 

1276 # "atoms" should store the first geometry encountered 

1277 # in the input file which is often the input orientation, 

1278 # which is the orientation for forces. 

1279 # If there are forces and the orientation of atoms is not 

1280 # the input coordinate system, warn the user 

1281 if orientation != "Input" and 'forces' in results: 

1282 logger.warning('Configuration geometry is not in the input' 

1283 f'orientation. It is {orientation}') 

1284 calc = SinglePointCalculator(atoms, energy=energy, **results) 

1285 atoms.calc = calc 

1286 _compare_merge_configs(configs, atoms) 

1287 atoms = None 

1288 orientation = line.split()[0] # Store the orientation 

1289 energy = None 

1290 results = {} 

1291 

1292 numbers = [] 

1293 positions = [] 

1294 pbc = np.zeros(3, dtype=bool) 

1295 cell = np.zeros((3, 3)) 

1296 npbc = 0 

1297 # skip 4 irrelevant lines 

1298 for _ in range(4): 

1299 fd.readline() 

1300 while True: 

1301 match = _re_atom.match(fd.readline()) 

1302 if match is None: 

1303 break 

1304 number = int(match.group(1)) 

1305 pos = list(map(float, match.group(2, 3, 4))) 

1306 if number == -2: 

1307 pbc[npbc] = True 

1308 cell[npbc] = pos 

1309 npbc += 1 

1310 else: 

1311 numbers.append(max(number, 0)) 

1312 positions.append(pos) 

1313 atoms = Atoms(numbers, positions, pbc=pbc, cell=cell) 

1314 elif (line.startswith('Energy=') 

1315 or line.startswith('SCF Done:')): 

1316 # Some semi-empirical methods (Huckel, MINDO3, etc.), 

1317 # or SCF methods (HF, DFT, etc.) 

1318 energy = float(line.split('=')[1].split()[0].replace('D', 'e')) 

1319 energy *= Hartree 

1320 elif (line.startswith('E2 =') or line.startswith('E3 =') 

1321 or line.startswith('E4(') or line.startswith('DEMP5 =') 

1322 or line.startswith('E2(')): 

1323 # MP{2,3,4,5} energy 

1324 # also some double hybrid calculations, like B2PLYP 

1325 energy = float(line.split('=')[-1].strip().replace('D', 'e')) 

1326 energy *= Hartree 

1327 elif line.startswith('Wavefunction amplitudes converged. E(Corr)'): 

1328 # "correlated method" energy, e.g. CCSD 

1329 energy = float(line.split('=')[-1].strip().replace('D', 'e')) 

1330 energy *= Hartree 

1331 elif line.startswith('CCSD(T)='): 

1332 # CCSD(T) energy 

1333 energy = float(line.split('=')[-1].strip().replace('D', 'e')) 

1334 energy *= Hartree 

1335 elif ( 

1336 line.startswith('Mulliken charges') 

1337 or line.startswith('Lowdin Atomic Charges') 

1338 or line.startswith('Hirshfeld charges, spin densities,') 

1339 ): 

1340 # Löwdin is printed after Mulliken and overwrites `charges`. 

1341 # Hirshfeld is printed after Mulliken and overwrites `charges`. 

1342 results.update(_read_charges(fd)) 

1343 elif line.startswith('Dipole moment') and energy is not None: 

1344 # dipole moment in `l601.exe`, printed unless `Pop=None` 

1345 # Skipped if energy is not printed in the same section. 

1346 # This happens in the last geometry record when `opt` or `irc` is 

1347 # specified. In this case, the record is compared with the previous 

1348 # one in `_compare_merge_configs`, and there the dipole moment 

1349 # from `l601` conflicts with the previous record from `l716`. 

1350 line = fd.readline().strip() 

1351 dipole = np.array([float(_) for _ in line.split()[1:6:2]]) * Debye 

1352 results['dipole'] = dipole 

1353 elif _re_l716.match(line): 

1354 # Sometimes Gaussian will print "Rotating derivatives to 

1355 # standard orientation" after the matched line (which looks like 

1356 # "(Enter /opt/gaussian/g16/l716.exe)", though the exact path 

1357 # depends on where Gaussian is installed). We *skip* the dipole 

1358 # in this case, because it might be rotated relative to the input 

1359 # orientation (and also it is numerically different even if the 

1360 # standard orientation is the same as the input orientation). 

1361 line = fd.readline().strip() 

1362 if not line.startswith('Dipole'): 

1363 continue 

1364 dip = line.split('=')[1].replace('D', 'e') 

1365 tokens = dip.split() 

1366 dipole = [] 

1367 # dipole elements can run together, depending on what method was 

1368 # used to calculate them. First see if there is a space between 

1369 # values. 

1370 if len(tokens) == 3: 

1371 dipole = list(map(float, tokens)) 

1372 elif len(dip) % 3 == 0: 

1373 # next, check if the number of tokens is divisible by 3 

1374 nchars = len(dip) // 3 

1375 for i in range(3): 

1376 dipole.append(float(dip[nchars * i:nchars * (i + 1)])) 

1377 else: 

1378 # otherwise, just give up on trying to parse it. 

1379 dipole = None 

1380 continue 

1381 # this dipole moment is printed in atomic units, e-Bohr 

1382 # ASE uses e-Angstrom for dipole moments. 

1383 results['dipole'] = np.array(dipole) * Bohr 

1384 elif _re_forceblock.match(line): 

1385 # skip 2 irrelevant lines 

1386 fd.readline() 

1387 fd.readline() 

1388 forces = [] 

1389 while True: 

1390 match = _re_atom.match(fd.readline()) 

1391 if match is None: 

1392 break 

1393 forces.append(list(map(float, match.group(2, 3, 4)))) 

1394 results['forces'] = np.array(forces) * Hartree / Bohr 

1395 if atoms is not None: 

1396 atoms.calc = SinglePointCalculator(atoms, energy=energy, **results) 

1397 _compare_merge_configs(configs, atoms) 

1398 return configs[index]