Coverage for /builds/debichem-team/python-ase/ase/ga/cutandsplicepairing.py: 90.19%

214 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1"""Implementation of the cut-and-splice paring operator.""" 

2import numpy as np 

3 

4from ase import Atoms 

5from ase.ga.offspring_creator import OffspringCreator 

6from ase.ga.utilities import ( 

7 atoms_too_close, 

8 atoms_too_close_two_sets, 

9 gather_atoms_by_tag, 

10) 

11from ase.geometry import find_mic 

12 

13 

14class Positions: 

15 """Helper object to simplify the pairing process. 

16 

17 Parameters: 

18 

19 scaled_positions: (Nx3) array 

20 Positions in scaled coordinates 

21 cop: (1x3) array 

22 Center-of-positions (also in scaled coordinates) 

23 symbols: str 

24 String with the atomic symbols 

25 distance: float 

26 Signed distance to the cutting plane 

27 origin: int (0 or 1) 

28 Determines at which side of the plane the position should be. 

29 """ 

30 

31 def __init__(self, scaled_positions, cop, symbols, distance, origin): 

32 self.scaled_positions = scaled_positions 

33 self.cop = cop 

34 self.symbols = symbols 

35 self.distance = distance 

36 self.origin = origin 

37 

38 def to_use(self): 

39 """Tells whether this position is at the right side.""" 

40 if self.distance > 0. and self.origin == 0: 

41 return True 

42 elif self.distance < 0. and self.origin == 1: 

43 return True 

44 else: 

45 return False 

46 

47 

48class CutAndSplicePairing(OffspringCreator): 

49 """The Cut and Splice operator by Deaven and Ho. 

50 

51 Creates offspring from two parent structures using 

52 a randomly generated cutting plane. 

53 

54 The parents may have different unit cells, in which 

55 case the offspring unit cell will be a random combination 

56 of the parent cells. 

57 

58 The basic implementation (for fixed unit cells) is 

59 described in: 

60 

61 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012) 

62 <10.1103/PhysRevLett.108.126101`> 

63 

64 The extension to variable unit cells is similar to: 

65 

66 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720 

67 <10.1016/j.cpc.2006.07.020>` 

68 

69 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387 

70 <10.1016/j.cpc.2010.07.048>` 

71 

72 The operator can furthermore preserve molecular identity 

73 if desired (see the *use_tags* kwarg). Atoms with the same 

74 tag will then be considered as belonging to the same molecule, 

75 and their internal geometry will not be changed by the operator. 

76 

77 If use_tags is enabled, the operator will also conserve the 

78 number of molecules of each kind (in addition to conserving 

79 the overall stoichiometry). Currently, molecules are considered 

80 to be of the same kind if their chemical symbol strings are 

81 identical. In rare cases where this may not be sufficient 

82 (i.e. when desiring to keep the same ratio of isomers), the 

83 different isomers can be differentiated by giving them different 

84 elemental orderings (e.g. 'XY2' and 'Y2X'). 

85 

86 Parameters: 

87 

88 slab: Atoms object 

89 Specifies the cell vectors and periodic boundary conditions 

90 to be applied to the randomly generated structures. 

91 Any included atoms (e.g. representing an underlying slab) 

92 are copied to these new structures. 

93 

94 n_top: int 

95 The number of atoms to optimize 

96 

97 blmin: dict 

98 Dictionary with minimal interatomic distances. 

99 Note: when preserving molecular identity (see use_tags), 

100 the blmin dict will (naturally) only be applied 

101 to intermolecular distances (not the intramolecular 

102 ones). 

103 

104 number_of_variable_cell_vectors: int (default 0) 

105 The number of variable cell vectors (0, 1, 2 or 3). 

106 To keep things simple, it is the 'first' vectors which 

107 will be treated as variable, i.e. the 'a' vector in the 

108 univariate case, the 'a' and 'b' vectors in the bivariate 

109 case, etc. 

110 

111 p1: float or int between 0 and 1 

112 Probability that a parent is shifted over a random 

113 distance along the normal of the cutting plane 

114 (only operative if number_of_variable_cell_vectors > 0). 

115 

116 p2: float or int between 0 and 1 

117 Same as p1, but for shifting along the directions 

118 in the cutting plane (only operative if 

119 number_of_variable_cell_vectors > 0). 

120 

121 minfrac: float between 0 and 1, or None (default) 

122 Minimal fraction of atoms a parent must contribute 

123 to the child. If None, each parent must contribute 

124 at least one atom. 

125 

126 cellbounds: ase.ga.utilities.CellBounds instance 

127 Describing limits on the cell shape, see 

128 :class:`~ase.ga.utilities.CellBounds`. 

129 Note that it only make sense to impose conditions 

130 regarding cell vectors which have been marked as 

131 variable (see number_of_variable_cell_vectors). 

132 

133 use_tags: bool 

134 Whether to use the atomic tags to preserve 

135 molecular identity. 

136 

137 test_dist_to_slab: bool (default True) 

138 Whether to make sure that the distances between 

139 the atoms and the slab satisfy the blmin. 

140 

141 rng: Random number generator 

142 By default numpy.random. 

143 """ 

144 

145 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0, 

146 p1=1, p2=0.05, minfrac=None, cellbounds=None, 

147 test_dist_to_slab=True, use_tags=False, rng=np.random, 

148 verbose=False): 

149 

150 OffspringCreator.__init__(self, verbose, rng=rng) 

151 self.slab = slab 

152 self.n_top = n_top 

153 self.blmin = blmin 

154 assert number_of_variable_cell_vectors in range(4) 

155 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

156 self.p1 = p1 

157 self.p2 = p2 

158 self.minfrac = minfrac 

159 self.cellbounds = cellbounds 

160 self.test_dist_to_slab = test_dist_to_slab 

161 self.use_tags = use_tags 

162 

163 self.scaling_volume = None 

164 self.descriptor = 'CutAndSplicePairing' 

165 self.min_inputs = 2 

166 

167 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0): 

168 """Updates the scaling volume that is used in the pairing 

169 

170 w_adapt: weight of the new vs the old scaling volume 

171 n_adapt: number of best candidates in the population that 

172 are used to calculate the new scaling volume 

173 """ 

174 if not n_adapt: 

175 # take best 20% of the population 

176 n_adapt = int(np.ceil(0.2 * len(population))) 

177 v_new = np.mean([a.get_volume() for a in population[:n_adapt]]) 

178 

179 if not self.scaling_volume: 

180 self.scaling_volume = v_new 

181 else: 

182 volumes = [self.scaling_volume, v_new] 

183 weights = [1 - w_adapt, w_adapt] 

184 self.scaling_volume = np.average(volumes, weights=weights) 

185 

186 def get_new_individual(self, parents): 

187 """The method called by the user that 

188 returns the paired structure.""" 

189 f, m = parents 

190 

191 indi = self.cross(f, m) 

192 desc = 'pairing: {} {}'.format(f.info['confid'], 

193 m.info['confid']) 

194 # It is ok for an operator to return None 

195 # It means that it could not make a legal offspring 

196 # within a reasonable amount of time 

197 if indi is None: 

198 return indi, desc 

199 indi = self.initialize_individual(f, indi) 

200 indi.info['data']['parents'] = [f.info['confid'], 

201 m.info['confid']] 

202 

203 return self.finalize_individual(indi), desc 

204 

205 def cross(self, a1, a2): 

206 """Crosses the two atoms objects and returns one""" 

207 

208 if len(a1) != len(self.slab) + self.n_top: 

209 raise ValueError('Wrong size of structure to optimize') 

210 if len(a1) != len(a2): 

211 raise ValueError('The two structures do not have the same length') 

212 

213 N = self.n_top 

214 

215 # Only consider the atoms to optimize 

216 a1 = a1[len(a1) - N: len(a1)] 

217 a2 = a2[len(a2) - N: len(a2)] 

218 

219 if not np.array_equal(a1.numbers, a2.numbers): 

220 err = 'Trying to pair two structures with different stoichiometry' 

221 raise ValueError(err) 

222 

223 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()): 

224 err = 'Trying to pair two structures with different tags' 

225 raise ValueError(err) 

226 

227 cell1 = a1.get_cell() 

228 cell2 = a2.get_cell() 

229 for i in range(self.number_of_variable_cell_vectors, 3): 

230 err = 'Unit cells are supposed to be identical in direction %d' 

231 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2) 

232 

233 invalid = True 

234 counter = 0 

235 maxcount = 1000 

236 a1_copy = a1.copy() 

237 a2_copy = a2.copy() 

238 

239 # Run until a valid pairing is made or maxcount pairings are tested. 

240 while invalid and counter < maxcount: 

241 counter += 1 

242 

243 newcell = self.generate_unit_cell(cell1, cell2) 

244 if newcell is None: 

245 # No valid unit cell could be generated. 

246 # This strongly suggests that it is near-impossible 

247 # to generate one from these parent cells and it is 

248 # better to abort now. 

249 break 

250 

251 # Choose direction of cutting plane normal 

252 if self.number_of_variable_cell_vectors == 0: 

253 # Will be generated entirely at random 

254 theta = np.pi * self.rng.random() 

255 phi = 2. * np.pi * self.rng.random() 

256 cut_n = np.array([np.cos(phi) * np.sin(theta), 

257 np.sin(phi) * np.sin(theta), np.cos(theta)]) 

258 else: 

259 # Pick one of the 'variable' cell vectors 

260 cut_n = self.rng.choice(self.number_of_variable_cell_vectors) 

261 

262 # Randomly translate parent structures 

263 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]): 

264 a_copy.set_positions(a.get_positions()) 

265 

266 cell = a_copy.get_cell() 

267 for i in range(self.number_of_variable_cell_vectors): 

268 r = self.rng.random() 

269 cond1 = i == cut_n and r < self.p1 

270 cond2 = i != cut_n and r < self.p2 

271 if cond1 or cond2: 

272 a_copy.positions += self.rng.random() * cell[i] 

273 

274 if self.use_tags: 

275 # For correct determination of the center- 

276 # of-position of the multi-atom blocks, 

277 # we need to group their constituent atoms 

278 # together 

279 gather_atoms_by_tag(a_copy) 

280 else: 

281 a_copy.wrap() 

282 

283 # Generate the cutting point in scaled coordinates 

284 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0) 

285 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0) 

286 cut_p = np.zeros((1, 3)) 

287 for i in range(3): 

288 if i < self.number_of_variable_cell_vectors: 

289 cut_p[0, i] = self.rng.random() 

290 else: 

291 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i]) 

292 

293 # Perform the pairing: 

294 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell) 

295 if child is None: 

296 continue 

297 

298 # Verify whether the atoms are too close or not: 

299 if atoms_too_close(child, self.blmin, use_tags=self.use_tags): 

300 continue 

301 

302 if self.test_dist_to_slab and len(self.slab) > 0: 

303 if atoms_too_close_two_sets(self.slab, child, self.blmin): 

304 continue 

305 

306 # Passed all the tests 

307 child = self.slab + child 

308 child.set_cell(newcell, scale_atoms=False) 

309 child.wrap() 

310 return child 

311 

312 return None 

313 

314 def generate_unit_cell(self, cell1, cell2, maxcount=10000): 

315 """Generates a new unit cell by a random linear combination 

316 of the parent cells. The new cell must satisfy the 

317 self.cellbounds constraints. Returns None if no such cell 

318 was generated within a given number of attempts. 

319 

320 Parameters: 

321 

322 maxcount: int 

323 The maximal number of attempts. 

324 """ 

325 # First calculate the scaling volume 

326 if not self.scaling_volume: 

327 v1 = np.abs(np.linalg.det(cell1)) 

328 v2 = np.abs(np.linalg.det(cell2)) 

329 r = self.rng.random() 

330 v_ref = r * v1 + (1 - r) * v2 

331 else: 

332 v_ref = self.scaling_volume 

333 

334 # Now the cell vectors 

335 if self.number_of_variable_cell_vectors == 0: 

336 assert np.allclose(cell1, cell2), 'Parent cells are not the same' 

337 newcell = np.copy(cell1) 

338 else: 

339 count = 0 

340 while count < maxcount: 

341 r = self.rng.random() 

342 newcell = r * cell1 + (1 - r) * cell2 

343 

344 vol = abs(np.linalg.det(newcell)) 

345 scaling = v_ref / vol 

346 scaling **= 1. / self.number_of_variable_cell_vectors 

347 newcell[:self.number_of_variable_cell_vectors] *= scaling 

348 

349 found = True 

350 if self.cellbounds is not None: 

351 found = self.cellbounds.is_within_bounds(newcell) 

352 if found: 

353 break 

354 

355 count += 1 

356 else: 

357 # Did not find acceptable cell 

358 newcell = None 

359 

360 return newcell 

361 

362 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell): 

363 """Creates a child from two parents using the given cut. 

364 

365 Returns None if the generated structure does not contain 

366 a large enough fraction of each parent (see self.minfrac). 

367 

368 Does not check whether atoms are too close. 

369 

370 Assumes the 'slab' parts have been removed from the parent 

371 structures and that these have been checked for equal 

372 lengths, stoichiometries, and tags (if self.use_tags). 

373 

374 Parameters: 

375 

376 cutting_normal: int or (1x3) array 

377 

378 cutting_point: (1x3) array 

379 In fractional coordinates 

380 

381 cell: (3x3) array 

382 The unit cell for the child structure 

383 """ 

384 symbols = a1.get_chemical_symbols() 

385 tags = a1.get_tags() if self.use_tags else np.arange(len(a1)) 

386 

387 # Generate list of all atoms / atom groups: 

388 p1, p2, sym = [], [], [] 

389 for i in np.unique(tags): 

390 indices = np.where(tags == i)[0] 

391 s = ''.join([symbols[j] for j in indices]) 

392 sym.append(s) 

393 

394 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])): 

395 c = a.get_cell() 

396 cop = np.mean(a.positions[indices], axis=0) 

397 cut_p = np.dot(cutting_point, c) 

398 if isinstance(cutting_normal, int): 

399 vecs = [c[j] for j in range(3) if j != cutting_normal] 

400 cut_n = np.cross(vecs[0], vecs[1]) 

401 else: 

402 cut_n = np.dot(cutting_normal, c) 

403 d = np.dot(cop - cut_p, cut_n) 

404 spos = a.get_scaled_positions()[indices] 

405 scop = np.mean(spos, axis=0) 

406 p.append(Positions(spos, scop, s, d, i)) 

407 

408 all_points = p1 + p2 

409 unique_sym = np.unique(sym) 

410 types = {s: sym.count(s) for s in unique_sym} 

411 

412 # Sort these by chemical symbols: 

413 all_points.sort(key=lambda x: x.symbols, reverse=True) 

414 

415 # For each atom type make the pairing 

416 unique_sym.sort() 

417 use_total = {} 

418 for s in unique_sym: 

419 used = [] 

420 not_used = [] 

421 # The list is looked trough in 

422 # reverse order so atoms can be removed 

423 # from the list along the way. 

424 for i in reversed(range(len(all_points))): 

425 # If there are no more atoms of this type 

426 if all_points[i].symbols != s: 

427 break 

428 # Check if the atom should be included 

429 if all_points[i].to_use(): 

430 used.append(all_points.pop(i)) 

431 else: 

432 not_used.append(all_points.pop(i)) 

433 

434 assert len(used) + len(not_used) == types[s] * 2 

435 

436 # While we have too few of the given atom type 

437 while len(used) < types[s]: 

438 index = self.rng.randint(len(not_used)) 

439 used.append(not_used.pop(index)) 

440 

441 # While we have too many of the given atom type 

442 while len(used) > types[s]: 

443 # remove randomly: 

444 index = self.rng.randint(len(used)) 

445 not_used.append(used.pop(index)) 

446 

447 use_total[s] = used 

448 

449 n_tot = sum(len(ll) for ll in use_total.values()) 

450 assert n_tot == len(sym) 

451 

452 # check if the generated structure contains 

453 # atoms from both parents: 

454 count1, count2, N = 0, 0, len(a1) 

455 for x in use_total.values(): 

456 count1 += sum(y.origin == 0 for y in x) 

457 count2 += sum(y.origin == 1 for y in x) 

458 

459 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N)) 

460 if count1 < nmin or count2 < nmin: 

461 return None 

462 

463 # Construct the cartesian positions and reorder the atoms 

464 # to follow the original order 

465 newpos = [] 

466 pbc = a1.get_pbc() 

467 for s in sym: 

468 p = use_total[s].pop() 

469 c = a1.get_cell() if p.origin == 0 else a2.get_cell() 

470 pos = np.dot(p.scaled_positions, c) 

471 cop = np.dot(p.cop, c) 

472 vectors, _lengths = find_mic(pos - cop, c, pbc) 

473 newcop = np.dot(p.cop, cell) 

474 pos = newcop + vectors 

475 for row in pos: 

476 newpos.append(row) 

477 

478 newpos = np.reshape(newpos, (N, 3)) 

479 num = a1.get_atomic_numbers() 

480 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell, 

481 tags=tags) 

482 child.wrap() 

483 return child