Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Tools for generating new random starting candidates.""" 

2import numpy as np 

3from ase import Atoms 

4from ase.data import atomic_numbers 

5from ase.build import molecule 

6from ase.ga.utilities import (closest_distances_generator, atoms_too_close, 

7 atoms_too_close_two_sets) 

8 

9 

10class StartGenerator: 

11 """Class for generating random starting candidates. 

12 

13 Its basic task consists of randomly placing atoms or 

14 molecules within a predescribed box, while respecting 

15 certain minimal interatomic distances. 

16 

17 Depending on the problem at hand, certain box vectors 

18 may not be known or chosen beforehand, and hence also 

19 need to be generated at random. Common cases include 

20 bulk crystals, films and chains, with respectively 

21 3, 2 and 1 unknown cell vectors. 

22 

23 Parameters: 

24 

25 slab: Atoms object 

26 Specifies the cell vectors and periodic boundary conditions 

27 to be applied to the randomly generated structures. 

28 Any included atoms (e.g. representing an underlying slab) 

29 are copied to these new structures. 

30 Variable cell vectors (see number_of_variable_cell_vectors) 

31 will be ignored because these will be generated at random. 

32 

33 blocks: list 

34 List of building units for the structure. Each item can be: 

35 

36 * an integer: representing a single atom by its atomic number, 

37 * a string: for a single atom (a chemical symbol) or a 

38 molecule (name recognized by ase.build.molecule), 

39 * an Atoms object, 

40 * an (A, B) tuple or list where A is any of the above 

41 and B is the number of A units to include. 

42 

43 A few examples: 

44 

45 >>> blocks = ['Ti'] * 4 + ['O'] * 8 

46 >>> blocks = [('Ti', 4), ('O', 8)] 

47 >>> blocks = [('CO2', 3)] # 3 CO2 molecules 

48 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]]) 

49 >>> blocks = [(co, 3)] 

50 

51 Each individual block (single atom or molecule) in the 

52 randomly generated candidates is given a unique integer 

53 tag. These can be used to preserve the molecular identity 

54 of these subunits. 

55 

56 blmin: dict or float 

57 Dictionary with minimal interatomic distances. 

58 If a number is provided instead, the dictionary will 

59 be generated with this ratio of covalent bond radii. 

60 Note: when preserving molecular identity (see use_tags), 

61 the blmin dict will (naturally) only be applied 

62 to intermolecular distances (not the intramolecular 

63 ones). 

64 

65 number_of_variable_cell_vectors: int (default 0) 

66 The number of variable cell vectors (0, 1, 2 or 3). 

67 To keep things simple, it is the 'first' vectors which 

68 will be treated as variable, i.e. the 'a' vector in the 

69 univariate case, the 'a' and 'b' vectors in the bivariate 

70 case, etc. 

71 

72 box_to_place_in: [list, list of lists] (default None) 

73 The box in which the atoms can be placed. 

74 The default (None) means the box is equal to the 

75 entire unit cell of the 'slab' object. 

76 In many cases, however, smaller boxes are desired 

77 (e.g. for adsorbates on a slab surface or for isolated 

78 clusters). Then, box_to_place_in can be set as 

79 [p0, [v1, v2, v3]] with positions being generated as 

80 p0 + r1 * v1 + r2 * v2 + r3 + v3. 

81 In case of one or more variable cell vectors, 

82 the corresponding items in p0/v1/v2/v3 will be ignored. 

83 

84 box_volume: int or float or None (default) 

85 Initial guess for the box volume in cubic Angstrom 

86 (used in generating the variable cell vectors). 

87 Typical values in the solid state are 8-12 A^3 per atom. 

88 If there are no variable cell vectors, the default None 

89 is required (box volume equal to the box_to_place_in 

90 volume). 

91 

92 splits: dict or None 

93 Splitting scheme for increasing the translational symmetry 

94 in the random candidates, based on: 

95 

96 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__ 

97 

98 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007 

99 

100 This should be a dict specifying the relative probabilities 

101 for each split, written as tuples. For example, 

102 

103 >>> splits = {(2,): 3, (1,): 1} 

104 

105 This means that, for each structure, either a splitting 

106 factor of 2 is applied to one randomly chosen axis, 

107 or a splitting factor of 1 is applied (i.e., no splitting). 

108 The probability ratio of the two scenararios will be 3:1, 

109 i.e. 75% chance for the former and 25% chance for the latter 

110 splitting scheme. Only the directions in which the 'slab' 

111 object is periodic are eligible for splitting. 

112 

113 To e.g. always apply splitting factors of 2 and 3 along two 

114 randomly chosen axes: 

115 

116 >>> splits = {(2, 3): 1} 

117 

118 By default, no splitting is applied (splits = None = {(1,): 1}). 

119 

120 cellbounds: ase.ga.utilities.CellBounds instance 

121 Describing limits on the cell shape, see 

122 :class:`~ase.ga.utilities.CellBounds`. 

123 Note that it only make sense to impose conditions 

124 regarding cell vectors which have been marked as 

125 variable (see number_of_variable_cell_vectors). 

126 

127 test_dist_to_slab: bool (default True) 

128 Whether to make sure that the distances between 

129 the atoms and the slab satisfy the blmin. 

130 

131 test_too_far: bool (default True) 

132 Whether to also make sure that there are no isolated 

133 atoms or molecules with nearest-neighbour bond lengths 

134 larger than 2x the value in the blmin dict. 

135 

136 rng: Random number generator 

137 By default numpy.random. 

138 """ 

139 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0, 

140 box_to_place_in=None, box_volume=None, splits=None, 

141 cellbounds=None, test_dist_to_slab=True, test_too_far=True, 

142 rng=np.random): 

143 

144 self.slab = slab 

145 

146 self.blocks = [] 

147 for item in blocks: 

148 if isinstance(item, tuple) or isinstance(item, list): 

149 assert len(item) == 2, 'Item length %d != 2' % len(item) 

150 block, count = item 

151 else: 

152 block, count = item, 1 

153 

154 # Convert block into Atoms object 

155 if isinstance(block, Atoms): 

156 pass 

157 elif block in atomic_numbers: 

158 block = Atoms(block) 

159 elif isinstance(block, str): 

160 block = molecule(block) 

161 elif block in atomic_numbers.values(): 

162 block = Atoms(numbers=[block]) 

163 else: 

164 raise ValueError('Cannot parse this block:', block) 

165 

166 # Add to self.blocks, taking into account that 

167 # we want to group the same blocks together. 

168 # This is important for the cell splitting. 

169 for i, (b, c) in enumerate(self.blocks): 

170 if block == b: 

171 self.blocks[i][1] += count 

172 break 

173 else: 

174 self.blocks.append([block, count]) 

175 

176 if isinstance(blmin, dict): 

177 self.blmin = blmin 

178 else: 

179 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks]) 

180 self.blmin = closest_distances_generator(numbers, 

181 ratio_of_covalent_radii=blmin) 

182 

183 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

184 assert self.number_of_variable_cell_vectors in range(4) 

185 if len(self.slab) > 0: 

186 msg = 'Including atoms in the slab only makes sense' 

187 msg += ' if there are no variable unit cell vectors' 

188 assert self.number_of_variable_cell_vectors == 0, msg 

189 for i in range(self.number_of_variable_cell_vectors): 

190 msg = 'Unit cell %s-vector is marked as variable ' % ('abc'[i]) 

191 msg += 'and slab must then also be periodic in this direction' 

192 assert self.slab.pbc[i], msg 

193 

194 if box_to_place_in is None: 

195 p0 = np.array([0., 0., 0.]) 

196 cell = self.slab.get_cell() 

197 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]] 

198 else: 

199 self.box_to_place_in = box_to_place_in 

200 

201 if box_volume is None: 

202 assert self.number_of_variable_cell_vectors == 0 

203 box_volume = abs(np.linalg.det(self.box_to_place_in[1])) 

204 else: 

205 assert self.number_of_variable_cell_vectors > 0 

206 self.box_volume = box_volume 

207 assert self.box_volume > 0 

208 

209 if splits is None: 

210 splits = {(1,): 1} 

211 tot = sum([v for v in splits.values()]) # normalization 

212 self.splits = {k: v * 1. / tot for k, v in splits.items()} 

213 

214 self.cellbounds = cellbounds 

215 self.test_too_far = test_too_far 

216 self.test_dist_to_slab = test_dist_to_slab 

217 self.rng = rng 

218 

219 def get_new_candidate(self, maxiter=None): 

220 """Returns a new candidate. 

221 

222 maxiter: upper bound on the total number of times 

223 the random position generator is called 

224 when generating the new candidate. 

225 

226 By default (maxiter=None) no such bound 

227 is imposed. If the generator takes too 

228 long time to create a new candidate, it 

229 may be suitable to specify a finite value. 

230 When the bound is exceeded, None is returned. 

231 """ 

232 pbc = self.slab.get_pbc() 

233 

234 # Choose cell splitting 

235 r = self.rng.rand() 

236 cumprob = 0 

237 for split, prob in self.splits.items(): 

238 cumprob += prob 

239 if cumprob > r: 

240 break 

241 

242 # Choose direction(s) along which to split 

243 # and by how much 

244 directions = [i for i in range(3) if pbc[i]] 

245 repeat = [1, 1, 1] 

246 if len(directions) > 0: 

247 for number in split: 

248 d = self.rng.choice(directions) 

249 repeat[d] = number 

250 repeat = tuple(repeat) 

251 

252 # Generate the 'full' unit cell 

253 # for the eventual candidates 

254 cell = self.generate_unit_cell(repeat) 

255 if self.number_of_variable_cell_vectors == 0: 

256 assert np.allclose(cell, self.slab.get_cell()) 

257 

258 # Make the smaller 'box' in which we are 

259 # allowed to place the atoms and which will 

260 # then be repeated to fill the 'full' unit cell 

261 box = np.copy(cell) 

262 for i in range(self.number_of_variable_cell_vectors, 3): 

263 box[i] = np.array(self.box_to_place_in[1][i]) 

264 box /= np.array([repeat]).T 

265 

266 # Here we gather the (reduced) number of blocks 

267 # to put in the smaller box, and the 'surplus' 

268 # occurring when the block count is not divisible 

269 # by the number of repetitions. 

270 # E.g. if we have a ('Ti', 4) block and do a 

271 # [2, 3, 1] repetition, we employ a ('Ti', 1) 

272 # block in the smaller box and delete 2 out 6 

273 # Ti atoms afterwards 

274 nrep = int(np.prod(repeat)) 

275 blocks, ids, surplus = [], [], [] 

276 for i, (block, count) in enumerate(self.blocks): 

277 count_part = int(np.ceil(count * 1. / nrep)) 

278 blocks.extend([block] * count_part) 

279 surplus.append(nrep * count_part - count) 

280 ids.extend([i] * count_part) 

281 

282 N_blocks = len(blocks) 

283 

284 # Shuffle the ordering so different blocks 

285 # are added in random order 

286 order = np.arange(N_blocks) 

287 self.rng.shuffle(order) 

288 blocks = [blocks[i] for i in order] 

289 ids = np.array(ids)[order] 

290 

291 # Add blocks one by one until we have found 

292 # a valid candidate 

293 blmin = self.blmin 

294 blmin_too_far = {key: 2 * val for key, val in blmin.items()} 

295 

296 niter = 0 

297 while maxiter is None or niter < maxiter: 

298 cand = Atoms('', cell=box, pbc=pbc) 

299 

300 for i in range(N_blocks): 

301 atoms = blocks[i].copy() 

302 atoms.set_tags(i) 

303 atoms.set_pbc(pbc) 

304 atoms.set_cell(box, scale_atoms=False) 

305 

306 while maxiter is None or niter < maxiter: 

307 niter += 1 

308 cop = atoms.get_positions().mean(axis=0) 

309 pos = np.dot(self.rng.rand(1, 3), box) 

310 atoms.translate(pos - cop) 

311 

312 if len(atoms) > 1: 

313 # Apply a random rotation to multi-atom blocks 

314 phi, theta, psi = 360 * self.rng.rand(3) 

315 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi, 

316 center=pos) 

317 

318 if not atoms_too_close_two_sets(cand, atoms, blmin): 

319 cand += atoms 

320 break 

321 else: 

322 # Reached maximum iteration number 

323 # Break out of the for loop above 

324 cand = None 

325 break 

326 

327 if cand is None: 

328 # Exit the main while loop 

329 break 

330 

331 # Rebuild the candidate after repeating, 

332 # randomly deleting surplus blocks and 

333 # sorting back to the original order 

334 cand_full = cand.repeat(repeat) 

335 

336 tags_full = cand_full.get_tags() 

337 for i in range(nrep): 

338 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks 

339 cand_full.set_tags(tags_full) 

340 

341 cand = Atoms('', cell=cell, pbc=pbc) 

342 ids_full = np.tile(ids, nrep) 

343 

344 tag_counter = 0 

345 if len(self.slab) > 0: 

346 tag_counter = int(max(self.slab.get_tags())) + 1 

347 

348 for i, (block, count) in enumerate(self.blocks): 

349 tags = np.where(ids_full == i)[0] 

350 bad = self.rng.choice(tags, size=surplus[i], replace=False) 

351 for tag in tags: 

352 if tag not in bad: 

353 select = [a.index for a in cand_full if a.tag == tag] 

354 atoms = cand_full[select] # is indeed a copy! 

355 atoms.set_tags(tag_counter) 

356 assert len(atoms) == len(block) 

357 cand += atoms 

358 tag_counter += 1 

359 

360 for i in range(self.number_of_variable_cell_vectors, 3): 

361 cand.positions[:, i] += self.box_to_place_in[0][i] 

362 

363 # By construction, the minimal interatomic distances 

364 # within the structure should already be respected 

365 assert not atoms_too_close(cand, blmin, use_tags=True), \ 

366 'This is not supposed to happen; please report this bug' 

367 

368 if self.test_dist_to_slab and len(self.slab) > 0: 

369 if atoms_too_close_two_sets(self.slab, cand, blmin): 

370 continue 

371 

372 if self.test_too_far: 

373 tags = cand.get_tags() 

374 

375 for tag in np.unique(tags): 

376 too_far = True 

377 indices_i = np.where(tags == tag)[0] 

378 indices_j = np.where(tags != tag)[0] 

379 too_far = not atoms_too_close_two_sets(cand[indices_i], 

380 cand[indices_j], 

381 blmin_too_far) 

382 

383 if too_far and len(self.slab) > 0: 

384 # the block is too far from the rest 

385 # but might still be sufficiently 

386 # close to the slab 

387 too_far = not atoms_too_close_two_sets(cand[indices_i], 

388 self.slab, 

389 blmin_too_far) 

390 if too_far: 

391 break 

392 else: 

393 too_far = False 

394 

395 if too_far: 

396 continue 

397 

398 # Passed all the tests 

399 cand = self.slab + cand 

400 cand.set_cell(cell, scale_atoms=False) 

401 break 

402 else: 

403 # Reached max iteration count in the while loop 

404 return None 

405 

406 return cand 

407 

408 def generate_unit_cell(self, repeat): 

409 """Generates a random unit cell. 

410 

411 For this, we use the vectors in self.slab.cell 

412 in the fixed directions and randomly generate 

413 the variable ones. For such a cell to be valid, 

414 it has to satisfy the self.cellbounds constraints. 

415 

416 The cell will also be such that the volume of the 

417 box in which the atoms can be placed (box limits 

418 described by self.box_to_place_in) is equal to 

419 self.box_volume. 

420 

421 Parameters: 

422 

423 repeat: tuple of 3 integers 

424 Indicates by how much each cell vector 

425 will later be reduced by cell splitting. 

426 

427 This is used to ensure that the original 

428 cell is large enough so that the cell lengths 

429 of the smaller cell exceed the largest 

430 (X,X)-minimal-interatomic-distance in self.blmin. 

431 """ 

432 # Find the minimal cell length 'Lmin' 

433 # that we need in order to ensure that 

434 # an added atom or molecule will never 

435 # be 'too close to itself' 

436 Lmin = 0. 

437 for atoms, count in self.blocks: 

438 dist = atoms.get_all_distances(mic=False, vector=False) 

439 num = atoms.get_atomic_numbers() 

440 

441 for i in range(len(atoms)): 

442 dist[i, i] += self.blmin[(num[i], num[i])] 

443 for j in range(i): 

444 bl = self.blmin[(num[i], num[j])] 

445 dist[i, j] += bl 

446 dist[j, i] += bl 

447 

448 L = np.max(dist) 

449 if L > Lmin: 

450 Lmin = L 

451 

452 # Generate a suitable unit cell 

453 valid = False 

454 while not valid: 

455 cell = np.zeros((3, 3)) 

456 

457 for i in range(self.number_of_variable_cell_vectors): 

458 # on-diagonal values 

459 cell[i, i] = self.rng.rand() * np.cbrt(self.box_volume) 

460 cell[i, i] *= repeat[i] 

461 for j in range(i): 

462 # off-diagonal values 

463 cell[i, j] = (self.rng.rand() - 0.5) * cell[i - 1, i - 1] 

464 

465 # volume scaling 

466 for i in range(self.number_of_variable_cell_vectors, 3): 

467 cell[i] = self.box_to_place_in[1][i] 

468 

469 if self.number_of_variable_cell_vectors > 0: 

470 volume = abs(np.linalg.det(cell)) 

471 scaling = self.box_volume / volume 

472 scaling **= 1. / self.number_of_variable_cell_vectors 

473 cell[:self.number_of_variable_cell_vectors] *= scaling 

474 

475 for i in range(self.number_of_variable_cell_vectors, 3): 

476 cell[i] = self.slab.get_cell()[i] 

477 

478 # bounds checking 

479 valid = True 

480 

481 if self.cellbounds is not None: 

482 if not self.cellbounds.is_within_bounds(cell): 

483 valid = False 

484 

485 if valid: 

486 for i in range(3): 

487 if np.linalg.norm(cell[i]) < repeat[i] * Lmin: 

488 assert self.number_of_variable_cell_vectors > 0 

489 valid = False 

490 

491 return cell