Coverage for /builds/debichem-team/python-ase/ase/ga/cutandsplicepairing.py: 90.19%

1"""Implementation of the cut-and-splice paring operator."""

2import numpy as np

4from ase import Atoms

5from ase.ga.offspring_creator import OffspringCreator

6from ase.ga.utilities import (

7 atoms_too_close,

8 atoms_too_close_two_sets,

9 gather_atoms_by_tag,

10)

11from ase.geometry import find_mic

14class Positions:

15 """Helper object to simplify the pairing process.

17 Parameters:

19 scaled_positions: (Nx3) array

20 Positions in scaled coordinates

21 cop: (1x3) array

22 Center-of-positions (also in scaled coordinates)

23 symbols: str

24 String with the atomic symbols

25 distance: float

26 Signed distance to the cutting plane

27 origin: int (0 or 1)

28 Determines at which side of the plane the position should be.

29 """

31 def __init__(self, scaled_positions, cop, symbols, distance, origin):

32 self.scaled_positions = scaled_positions

33 self.cop = cop

34 self.symbols = symbols

35 self.distance = distance

36 self.origin = origin

38 def to_use(self):

39 """Tells whether this position is at the right side."""

40 if self.distance > 0. and self.origin == 0:

41 return True

42 elif self.distance < 0. and self.origin == 1:

43 return True

44 else:

45 return False

48class CutAndSplicePairing(OffspringCreator):

49 """The Cut and Splice operator by Deaven and Ho.

51 Creates offspring from two parent structures using

52 a randomly generated cutting plane.

54 The parents may have different unit cells, in which

55 case the offspring unit cell will be a random combination

56 of the parent cells.

58 The basic implementation (for fixed unit cells) is

59 described in:

61 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012)

62 <10.1103/PhysRevLett.108.126101`>

64 The extension to variable unit cells is similar to:

66 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720

67 <10.1016/j.cpc.2006.07.020>`

69 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387

70 <10.1016/j.cpc.2010.07.048>`

72 The operator can furthermore preserve molecular identity

73 if desired (see the *use_tags* kwarg). Atoms with the same

74 tag will then be considered as belonging to the same molecule,

75 and their internal geometry will not be changed by the operator.

77 If use_tags is enabled, the operator will also conserve the

78 number of molecules of each kind (in addition to conserving

79 the overall stoichiometry). Currently, molecules are considered

80 to be of the same kind if their chemical symbol strings are

81 identical. In rare cases where this may not be sufficient

82 (i.e. when desiring to keep the same ratio of isomers), the

83 different isomers can be differentiated by giving them different

84 elemental orderings (e.g. 'XY2' and 'Y2X').

86 Parameters:

88 slab: Atoms object

89 Specifies the cell vectors and periodic boundary conditions

90 to be applied to the randomly generated structures.

91 Any included atoms (e.g. representing an underlying slab)

92 are copied to these new structures.

94 n_top: int

95 The number of atoms to optimize

97 blmin: dict

98 Dictionary with minimal interatomic distances.

99 Note: when preserving molecular identity (see use_tags),

100 the blmin dict will (naturally) only be applied

101 to intermolecular distances (not the intramolecular

102 ones).

103

104 number_of_variable_cell_vectors: int (default 0)

105 The number of variable cell vectors (0, 1, 2 or 3).

106 To keep things simple, it is the 'first' vectors which

107 will be treated as variable, i.e. the 'a' vector in the

108 univariate case, the 'a' and 'b' vectors in the bivariate

109 case, etc.

110

111 p1: float or int between 0 and 1

112 Probability that a parent is shifted over a random

113 distance along the normal of the cutting plane

114 (only operative if number_of_variable_cell_vectors > 0).

115

116 p2: float or int between 0 and 1

117 Same as p1, but for shifting along the directions

118 in the cutting plane (only operative if

119 number_of_variable_cell_vectors > 0).

120

121 minfrac: float between 0 and 1, or None (default)

122 Minimal fraction of atoms a parent must contribute

123 to the child. If None, each parent must contribute

124 at least one atom.

125

126 cellbounds: ase.ga.utilities.CellBounds instance

127 Describing limits on the cell shape, see

128 :class:`~ase.ga.utilities.CellBounds`.

129 Note that it only make sense to impose conditions

130 regarding cell vectors which have been marked as

131 variable (see number_of_variable_cell_vectors).

132

133 use_tags: bool

134 Whether to use the atomic tags to preserve

135 molecular identity.

136

137 test_dist_to_slab: bool (default True)

138 Whether to make sure that the distances between

139 the atoms and the slab satisfy the blmin.

140

141 rng: Random number generator

142 By default numpy.random.

143 """

144

145 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0,

146 p1=1, p2=0.05, minfrac=None, cellbounds=None,

147 test_dist_to_slab=True, use_tags=False, rng=np.random,

148 verbose=False):

149

150 OffspringCreator.__init__(self, verbose, rng=rng)

151 self.slab = slab

152 self.n_top = n_top

153 self.blmin = blmin

154 assert number_of_variable_cell_vectors in range(4)

155 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors

156 self.p1 = p1

157 self.p2 = p2

158 self.minfrac = minfrac

159 self.cellbounds = cellbounds

160 self.test_dist_to_slab = test_dist_to_slab

161 self.use_tags = use_tags

162

163 self.scaling_volume = None

164 self.descriptor = 'CutAndSplicePairing'

165 self.min_inputs = 2

166

167 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0):

168 """Updates the scaling volume that is used in the pairing

169

170 w_adapt: weight of the new vs the old scaling volume

171 n_adapt: number of best candidates in the population that

172 are used to calculate the new scaling volume

173 """

174 if not n_adapt:

175 # take best 20% of the population

176 n_adapt = int(np.ceil(0.2 * len(population)))

177 v_new = np.mean([a.get_volume() for a in population[:n_adapt]])

178

179 if not self.scaling_volume:

180 self.scaling_volume = v_new

181 else:

182 volumes = [self.scaling_volume, v_new]

183 weights = [1 - w_adapt, w_adapt]

184 self.scaling_volume = np.average(volumes, weights=weights)

185

186 def get_new_individual(self, parents):

187 """The method called by the user that

188 returns the paired structure."""

189 f, m = parents

190

191 indi = self.cross(f, m)

192 desc = 'pairing: {} {}'.format(f.info['confid'],

193 m.info['confid'])

194 # It is ok for an operator to return None

195 # It means that it could not make a legal offspring

196 # within a reasonable amount of time

197 if indi is None:

198 return indi, desc

199 indi = self.initialize_individual(f, indi)

200 indi.info['data']['parents'] = [f.info['confid'],

201 m.info['confid']]

202

203 return self.finalize_individual(indi), desc

204

205 def cross(self, a1, a2):

206 """Crosses the two atoms objects and returns one"""

207

208 if len(a1) != len(self.slab) + self.n_top:

209 raise ValueError('Wrong size of structure to optimize')

210 if len(a1) != len(a2):

211 raise ValueError('The two structures do not have the same length')

212

213 N = self.n_top

214

215 # Only consider the atoms to optimize

216 a1 = a1[len(a1) - N: len(a1)]

217 a2 = a2[len(a2) - N: len(a2)]

218

219 if not np.array_equal(a1.numbers, a2.numbers):

220 err = 'Trying to pair two structures with different stoichiometry'

221 raise ValueError(err)

222

223 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()):

224 err = 'Trying to pair two structures with different tags'

225 raise ValueError(err)

226

227 cell1 = a1.get_cell()

228 cell2 = a2.get_cell()

229 for i in range(self.number_of_variable_cell_vectors, 3):

230 err = 'Unit cells are supposed to be identical in direction %d'

231 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2)

232

233 invalid = True

234 counter = 0

235 maxcount = 1000

236 a1_copy = a1.copy()

237 a2_copy = a2.copy()

238

239 # Run until a valid pairing is made or maxcount pairings are tested.

240 while invalid and counter < maxcount:

241 counter += 1

242

243 newcell = self.generate_unit_cell(cell1, cell2)

244 if newcell is None:

245 # No valid unit cell could be generated.

246 # This strongly suggests that it is near-impossible

247 # to generate one from these parent cells and it is

248 # better to abort now.

249 break

250

251 # Choose direction of cutting plane normal

252 if self.number_of_variable_cell_vectors == 0:

253 # Will be generated entirely at random

254 theta = np.pi * self.rng.random()

255 phi = 2. * np.pi * self.rng.random()

256 cut_n = np.array([np.cos(phi) * np.sin(theta),

257 np.sin(phi) * np.sin(theta), np.cos(theta)])

258 else:

259 # Pick one of the 'variable' cell vectors

260 cut_n = self.rng.choice(self.number_of_variable_cell_vectors)

261

262 # Randomly translate parent structures

263 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]):

264 a_copy.set_positions(a.get_positions())

265

266 cell = a_copy.get_cell()

267 for i in range(self.number_of_variable_cell_vectors):

268 r = self.rng.random()

269 cond1 = i == cut_n and r < self.p1

270 cond2 = i != cut_n and r < self.p2

271 if cond1 or cond2:

272 a_copy.positions += self.rng.random() * cell[i]

273

274 if self.use_tags:

275 # For correct determination of the center-

276 # of-position of the multi-atom blocks,

277 # we need to group their constituent atoms

278 # together

279 gather_atoms_by_tag(a_copy)

280 else:

281 a_copy.wrap()

282

283 # Generate the cutting point in scaled coordinates

284 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0)

285 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0)

286 cut_p = np.zeros((1, 3))

287 for i in range(3):

288 if i < self.number_of_variable_cell_vectors:

289 cut_p[0, i] = self.rng.random()

290 else:

291 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i])

292

293 # Perform the pairing:

294 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell)

295 if child is None:

296 continue

297

298 # Verify whether the atoms are too close or not:

299 if atoms_too_close(child, self.blmin, use_tags=self.use_tags):

300 continue

301

302 if self.test_dist_to_slab and len(self.slab) > 0:

303 if atoms_too_close_two_sets(self.slab, child, self.blmin):

304 continue

305

306 # Passed all the tests

307 child = self.slab + child

308 child.set_cell(newcell, scale_atoms=False)

309 child.wrap()

310 return child

311

312 return None

313

314 def generate_unit_cell(self, cell1, cell2, maxcount=10000):

315 """Generates a new unit cell by a random linear combination

316 of the parent cells. The new cell must satisfy the

317 self.cellbounds constraints. Returns None if no such cell

318 was generated within a given number of attempts.

319

320 Parameters:

321

322 maxcount: int

323 The maximal number of attempts.

324 """

325 # First calculate the scaling volume

326 if not self.scaling_volume:

327 v1 = np.abs(np.linalg.det(cell1))

328 v2 = np.abs(np.linalg.det(cell2))

329 r = self.rng.random()

330 v_ref = r * v1 + (1 - r) * v2

331 else:

332 v_ref = self.scaling_volume

333

334 # Now the cell vectors

335 if self.number_of_variable_cell_vectors == 0:

336 assert np.allclose(cell1, cell2), 'Parent cells are not the same'

337 newcell = np.copy(cell1)

338 else:

339 count = 0

340 while count < maxcount:

341 r = self.rng.random()

342 newcell = r * cell1 + (1 - r) * cell2

343

344 vol = abs(np.linalg.det(newcell))

345 scaling = v_ref / vol

346 scaling **= 1. / self.number_of_variable_cell_vectors

347 newcell[:self.number_of_variable_cell_vectors] *= scaling

348

349 found = True

350 if self.cellbounds is not None:

351 found = self.cellbounds.is_within_bounds(newcell)

352 if found:

353 break

354

355 count += 1

356 else:

357 # Did not find acceptable cell

358 newcell = None

359

360 return newcell

361

362 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell):

363 """Creates a child from two parents using the given cut.

364

365 Returns None if the generated structure does not contain

366 a large enough fraction of each parent (see self.minfrac).

367

368 Does not check whether atoms are too close.

369

370 Assumes the 'slab' parts have been removed from the parent

371 structures and that these have been checked for equal

372 lengths, stoichiometries, and tags (if self.use_tags).

373

374 Parameters:

375

376 cutting_normal: int or (1x3) array

377

378 cutting_point: (1x3) array

379 In fractional coordinates

380

381 cell: (3x3) array

382 The unit cell for the child structure

383 """

384 symbols = a1.get_chemical_symbols()

385 tags = a1.get_tags() if self.use_tags else np.arange(len(a1))

386

387 # Generate list of all atoms / atom groups:

388 p1, p2, sym = [], [], []

389 for i in np.unique(tags):

390 indices = np.where(tags == i)[0]

391 s = ''.join([symbols[j] for j in indices])

392 sym.append(s)

393

394 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])):

395 c = a.get_cell()

396 cop = np.mean(a.positions[indices], axis=0)

397 cut_p = np.dot(cutting_point, c)

398 if isinstance(cutting_normal, int):

399 vecs = [c[j] for j in range(3) if j != cutting_normal]

400 cut_n = np.cross(vecs[0], vecs[1])

401 else:

402 cut_n = np.dot(cutting_normal, c)

403 d = np.dot(cop - cut_p, cut_n)

404 spos = a.get_scaled_positions()[indices]

405 scop = np.mean(spos, axis=0)

406 p.append(Positions(spos, scop, s, d, i))

407

408 all_points = p1 + p2

409 unique_sym = np.unique(sym)

410 types = {s: sym.count(s) for s in unique_sym}

411

412 # Sort these by chemical symbols:

413 all_points.sort(key=lambda x: x.symbols, reverse=True)

414

415 # For each atom type make the pairing

416 unique_sym.sort()

417 use_total = {}

418 for s in unique_sym:

419 used = []

420 not_used = []

421 # The list is looked trough in

422 # reverse order so atoms can be removed

423 # from the list along the way.

424 for i in reversed(range(len(all_points))):

425 # If there are no more atoms of this type

426 if all_points[i].symbols != s:

427 break

428 # Check if the atom should be included

429 if all_points[i].to_use():

430 used.append(all_points.pop(i))

431 else:

432 not_used.append(all_points.pop(i))

433

434 assert len(used) + len(not_used) == types[s] * 2

435

436 # While we have too few of the given atom type

437 while len(used) < types[s]:

438 index = self.rng.randint(len(not_used))

439 used.append(not_used.pop(index))

440

441 # While we have too many of the given atom type

442 while len(used) > types[s]:

443 # remove randomly:

444 index = self.rng.randint(len(used))

445 not_used.append(used.pop(index))

446

447 use_total[s] = used

448

449 n_tot = sum(len(ll) for ll in use_total.values())

450 assert n_tot == len(sym)

451

452 # check if the generated structure contains

453 # atoms from both parents:

454 count1, count2, N = 0, 0, len(a1)

455 for x in use_total.values():

456 count1 += sum(y.origin == 0 for y in x)

457 count2 += sum(y.origin == 1 for y in x)

458

459 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N))

460 if count1 < nmin or count2 < nmin:

461 return None

462

463 # Construct the cartesian positions and reorder the atoms

464 # to follow the original order

465 newpos = []

466 pbc = a1.get_pbc()

467 for s in sym:

468 p = use_total[s].pop()

469 c = a1.get_cell() if p.origin == 0 else a2.get_cell()

470 pos = np.dot(p.scaled_positions, c)

471 cop = np.dot(p.cop, c)

472 vectors, _lengths = find_mic(pos - cop, c, pbc)

473 newcop = np.dot(p.cop, cell)

474 pos = newcop + vectors

475 for row in pos:

476 newpos.append(row)

477

478 newpos = np.reshape(newpos, (N, 3))

479 num = a1.get_atomic_numbers()

480 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell,

481 tags=tags)

482 child.wrap()

483 return child