Coverage for /builds/debichem-team/python-ase/ase/ga/startgenerator.py: 95.17%

1"""Tools for generating new random starting candidates."""

2import numpy as np

4from ase import Atoms

5from ase.build import molecule

6from ase.data import atomic_numbers

7from ase.ga.utilities import (

8 atoms_too_close,

9 atoms_too_close_two_sets,

10 closest_distances_generator,

11)

14class StartGenerator:

15 """Class for generating random starting candidates.

17 Its basic task consists of randomly placing atoms or

18 molecules within a predescribed box, while respecting

19 certain minimal interatomic distances.

21 Depending on the problem at hand, certain box vectors

22 may not be known or chosen beforehand, and hence also

23 need to be generated at random. Common cases include

24 bulk crystals, films and chains, with respectively

25 3, 2 and 1 unknown cell vectors.

27 Parameters:

29 slab: Atoms object

30 Specifies the cell vectors and periodic boundary conditions

31 to be applied to the randomly generated structures.

32 Any included atoms (e.g. representing an underlying slab)

33 are copied to these new structures.

34 Variable cell vectors (see number_of_variable_cell_vectors)

35 will be ignored because these will be generated at random.

37 blocks: list

38 List of building units for the structure. Each item can be:

40 * an integer: representing a single atom by its atomic number,

41 * a string: for a single atom (a chemical symbol) or a

42 molecule (name recognized by ase.build.molecule),

43 * an Atoms object,

44 * an (A, B) tuple or list where A is any of the above

45 and B is the number of A units to include.

47 A few examples:

49 >>> blocks = ['Ti'] * 4 + ['O'] * 8

50 >>> blocks = [('Ti', 4), ('O', 8)]

51 >>> blocks = [('CO2', 3)] # 3 CO2 molecules

52 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])

53 >>> blocks = [(co, 3)]

55 Each individual block (single atom or molecule) in the

56 randomly generated candidates is given a unique integer

57 tag. These can be used to preserve the molecular identity

58 of these subunits.

60 blmin: dict or float

61 Dictionary with minimal interatomic distances.

62 If a number is provided instead, the dictionary will

63 be generated with this ratio of covalent bond radii.

64 Note: when preserving molecular identity (see use_tags),

65 the blmin dict will (naturally) only be applied

66 to intermolecular distances (not the intramolecular

67 ones).

69 number_of_variable_cell_vectors: int (default 0)

70 The number of variable cell vectors (0, 1, 2 or 3).

71 To keep things simple, it is the 'first' vectors which

72 will be treated as variable, i.e. the 'a' vector in the

73 univariate case, the 'a' and 'b' vectors in the bivariate

74 case, etc.

76 box_to_place_in: [list, list of lists] (default None)

77 The box in which the atoms can be placed.

78 The default (None) means the box is equal to the

79 entire unit cell of the 'slab' object.

80 In many cases, however, smaller boxes are desired

81 (e.g. for adsorbates on a slab surface or for isolated

82 clusters). Then, box_to_place_in can be set as

83 [p0, [v1, v2, v3]] with positions being generated as

84 p0 + r1 * v1 + r2 * v2 + r3 + v3.

85 In case of one or more variable cell vectors,

86 the corresponding items in p0/v1/v2/v3 will be ignored.

88 box_volume: int or float or None (default)

89 Initial guess for the box volume in cubic Angstrom

90 (used in generating the variable cell vectors).

91 Typical values in the solid state are 8-12 A^3 per atom.

92 If there are no variable cell vectors, the default None

93 is required (box volume equal to the box_to_place_in

94 volume).

96 splits: dict or None

97 Splitting scheme for increasing the translational symmetry

98 in the random candidates, based on:

100 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__

101

102 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007

103

104 This should be a dict specifying the relative probabilities

105 for each split, written as tuples. For example,

106

107 >>> splits = {(2,): 3, (1,): 1}

108

109 This means that, for each structure, either a splitting

110 factor of 2 is applied to one randomly chosen axis,

111 or a splitting factor of 1 is applied (i.e., no splitting).

112 The probability ratio of the two scenararios will be 3:1,

113 i.e. 75% chance for the former and 25% chance for the latter

114 splitting scheme. Only the directions in which the 'slab'

115 object is periodic are eligible for splitting.

116

117 To e.g. always apply splitting factors of 2 and 3 along two

118 randomly chosen axes:

119

120 >>> splits = {(2, 3): 1}

121

122 By default, no splitting is applied (splits = None = {(1,): 1}).

123

124 cellbounds: ase.ga.utilities.CellBounds instance

125 Describing limits on the cell shape, see

126 :class:`~ase.ga.utilities.CellBounds`.

127 Note that it only make sense to impose conditions

128 regarding cell vectors which have been marked as

129 variable (see number_of_variable_cell_vectors).

130

131 test_dist_to_slab: bool (default True)

132 Whether to make sure that the distances between

133 the atoms and the slab satisfy the blmin.

134

135 test_too_far: bool (default True)

136 Whether to also make sure that there are no isolated

137 atoms or molecules with nearest-neighbour bond lengths

138 larger than 2x the value in the blmin dict.

139

140 rng: Random number generator

141 By default numpy.random.

142 """

143

144 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,

145 box_to_place_in=None, box_volume=None, splits=None,

146 cellbounds=None, test_dist_to_slab=True, test_too_far=True,

147 rng=np.random):

148

149 self.slab = slab

150

151 self.blocks = []

152 for item in blocks:

153 if isinstance(item, (tuple, list)):

154 assert len(item) == 2, 'Item length %d != 2' % len(item)

155 block, count = item

156 else:

157 block, count = item, 1

158

159 # Convert block into Atoms object

160 if isinstance(block, Atoms):

161 pass

162 elif block in atomic_numbers:

163 block = Atoms(block)

164 elif isinstance(block, str):

165 block = molecule(block)

166 elif block in atomic_numbers.values():

167 block = Atoms(numbers=[block])

168 else:

169 raise ValueError('Cannot parse this block:', block)

170

171 # Add to self.blocks, taking into account that

172 # we want to group the same blocks together.

173 # This is important for the cell splitting.

174 for i, (b, c) in enumerate(self.blocks):

175 if block == b:

176 self.blocks[i][1] += count

177 break

178 else:

179 self.blocks.append([block, count])

180

181 if isinstance(blmin, dict):

182 self.blmin = blmin

183 else:

184 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])

185 self.blmin = closest_distances_generator(

186 numbers,

187 ratio_of_covalent_radii=blmin)

188

189 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors

190 assert self.number_of_variable_cell_vectors in range(4)

191 if len(self.slab) > 0:

192 msg = 'Including atoms in the slab only makes sense'

193 msg += ' if there are no variable unit cell vectors'

194 assert self.number_of_variable_cell_vectors == 0, msg

195 for i in range(self.number_of_variable_cell_vectors):

196 msg = f'Unit cell {("abc"[i])}-vector is marked as variable '

197 msg += 'and slab must then also be periodic in this direction'

198 assert self.slab.pbc[i], msg

199

200 if box_to_place_in is None:

201 p0 = np.array([0., 0., 0.])

202 cell = self.slab.get_cell()

203 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]

204 else:

205 self.box_to_place_in = box_to_place_in

206

207 if box_volume is None:

208 assert self.number_of_variable_cell_vectors == 0

209 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))

210 else:

211 assert self.number_of_variable_cell_vectors > 0

212 self.box_volume = box_volume

213 assert self.box_volume > 0

214

215 if splits is None:

216 splits = {(1,): 1}

217 tot = sum(v for v in splits.values())

218 self.splits = {k: v * 1. / tot for k, v in splits.items()}

219

220 self.cellbounds = cellbounds

221 self.test_too_far = test_too_far

222 self.test_dist_to_slab = test_dist_to_slab

223 self.rng = rng

224

225 def get_new_candidate(self, maxiter=None):

226 """Returns a new candidate.

227

228 maxiter: upper bound on the total number of times

229 the random position generator is called

230 when generating the new candidate.

231

232 By default (maxiter=None) no such bound

233 is imposed. If the generator takes too

234 long time to create a new candidate, it

235 may be suitable to specify a finite value.

236 When the bound is exceeded, None is returned.

237 """

238 pbc = self.slab.get_pbc()

239

240 # Choose cell splitting

241 r = self.rng.random()

242 cumprob = 0

243 for split, prob in self.splits.items():

244 cumprob += prob

245 if cumprob > r:

246 break

247

248 # Choose direction(s) along which to split

249 # and by how much

250 directions = [i for i in range(3) if pbc[i]]

251 repeat = [1, 1, 1]

252 if len(directions) > 0:

253 for number in split:

254 d = self.rng.choice(directions)

255 repeat[d] = number

256 repeat = tuple(repeat)

257

258 # Generate the 'full' unit cell

259 # for the eventual candidates

260 cell = self.generate_unit_cell(repeat)

261 if self.number_of_variable_cell_vectors == 0:

262 assert np.allclose(cell, self.slab.get_cell())

263

264 # Make the smaller 'box' in which we are

265 # allowed to place the atoms and which will

266 # then be repeated to fill the 'full' unit cell

267 box = np.copy(cell)

268 for i in range(self.number_of_variable_cell_vectors, 3):

269 box[i] = np.array(self.box_to_place_in[1][i])

270 box /= np.array([repeat]).T

271

272 # Here we gather the (reduced) number of blocks

273 # to put in the smaller box, and the 'surplus'

274 # occurring when the block count is not divisible

275 # by the number of repetitions.

276 # E.g. if we have a ('Ti', 4) block and do a

277 # [2, 3, 1] repetition, we employ a ('Ti', 1)

278 # block in the smaller box and delete 2 out 6

279 # Ti atoms afterwards

280 nrep = int(np.prod(repeat))

281 blocks, ids, surplus = [], [], []

282 for i, (block, count) in enumerate(self.blocks):

283 count_part = int(np.ceil(count * 1. / nrep))

284 blocks.extend([block] * count_part)

285 surplus.append(nrep * count_part - count)

286 ids.extend([i] * count_part)

287

288 N_blocks = len(blocks)

289

290 # Shuffle the ordering so different blocks

291 # are added in random order

292 order = np.arange(N_blocks)

293 self.rng.shuffle(order)

294 blocks = [blocks[i] for i in order]

295 ids = np.array(ids)[order]

296

297 # Add blocks one by one until we have found

298 # a valid candidate

299 blmin = self.blmin

300 blmin_too_far = {key: 2 * val for key, val in blmin.items()}

301

302 niter = 0

303 while maxiter is None or niter < maxiter:

304 cand = Atoms('', cell=box, pbc=pbc)

305

306 for i in range(N_blocks):

307 atoms = blocks[i].copy()

308 atoms.set_tags(i)

309 atoms.set_pbc(pbc)

310 atoms.set_cell(box, scale_atoms=False)

311

312 while maxiter is None or niter < maxiter:

313 niter += 1

314 cop = atoms.get_positions().mean(axis=0)

315 pos = np.dot(self.rng.random((1, 3)), box)

316 atoms.translate(pos - cop)

317

318 if len(atoms) > 1:

319 # Apply a random rotation to multi-atom blocks

320 phi, theta, psi = 360 * self.rng.random(3)

321 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,

322 center=pos)

323

324 if not atoms_too_close_two_sets(cand, atoms, blmin):

325 cand += atoms

326 break

327 else:

328 # Reached maximum iteration number

329 # Break out of the for loop above

330 cand = None

331 break

332

333 if cand is None:

334 # Exit the main while loop

335 break

336

337 # Rebuild the candidate after repeating,

338 # randomly deleting surplus blocks and

339 # sorting back to the original order

340 cand_full = cand.repeat(repeat)

341

342 tags_full = cand_full.get_tags()

343 for i in range(nrep):

344 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks

345 cand_full.set_tags(tags_full)

346

347 cand = Atoms('', cell=cell, pbc=pbc)

348 ids_full = np.tile(ids, nrep)

349

350 tag_counter = 0

351 if len(self.slab) > 0:

352 tag_counter = int(max(self.slab.get_tags())) + 1

353

354 for i, (block, count) in enumerate(self.blocks):

355 tags = np.where(ids_full == i)[0]

356 bad = self.rng.choice(tags, size=surplus[i], replace=False)

357 for tag in tags:

358 if tag not in bad:

359 select = [a.index for a in cand_full if a.tag == tag]

360 atoms = cand_full[select] # is indeed a copy!

361 atoms.set_tags(tag_counter)

362 assert len(atoms) == len(block)

363 cand += atoms

364 tag_counter += 1

365

366 for i in range(self.number_of_variable_cell_vectors, 3):

367 cand.positions[:, i] += self.box_to_place_in[0][i]

368

369 # By construction, the minimal interatomic distances

370 # within the structure should already be respected

371 assert not atoms_too_close(cand, blmin, use_tags=True), \

372 'This is not supposed to happen; please report this bug'

373

374 if self.test_dist_to_slab and len(self.slab) > 0:

375 if atoms_too_close_two_sets(self.slab, cand, blmin):

376 continue

377

378 if self.test_too_far:

379 tags = cand.get_tags()

380

381 for tag in np.unique(tags):

382 too_far = True

383 indices_i = np.where(tags == tag)[0]

384 indices_j = np.where(tags != tag)[0]

385 too_far = not atoms_too_close_two_sets(cand[indices_i],

386 cand[indices_j],

387 blmin_too_far)

388

389 if too_far and len(self.slab) > 0:

390 # the block is too far from the rest

391 # but might still be sufficiently

392 # close to the slab

393 too_far = not atoms_too_close_two_sets(cand[indices_i],

394 self.slab,

395 blmin_too_far)

396 if too_far:

397 break

398 else:

399 too_far = False

400

401 if too_far:

402 continue

403

404 # Passed all the tests

405 cand = self.slab + cand

406 cand.set_cell(cell, scale_atoms=False)

407 break

408 else:

409 # Reached max iteration count in the while loop

410 return None

411

412 return cand

413

414 def generate_unit_cell(self, repeat):

415 """Generates a random unit cell.

416

417 For this, we use the vectors in self.slab.cell

418 in the fixed directions and randomly generate

419 the variable ones. For such a cell to be valid,

420 it has to satisfy the self.cellbounds constraints.

421

422 The cell will also be such that the volume of the

423 box in which the atoms can be placed (box limits

424 described by self.box_to_place_in) is equal to

425 self.box_volume.

426

427 Parameters:

428

429 repeat: tuple of 3 integers

430 Indicates by how much each cell vector

431 will later be reduced by cell splitting.

432

433 This is used to ensure that the original

434 cell is large enough so that the cell lengths

435 of the smaller cell exceed the largest

436 (X,X)-minimal-interatomic-distance in self.blmin.

437 """

438 # Find the minimal cell length 'Lmin'

439 # that we need in order to ensure that

440 # an added atom or molecule will never

441 # be 'too close to itself'

442 Lmin = 0.

443 for atoms, count in self.blocks:

444 dist = atoms.get_all_distances(mic=False, vector=False)

445 num = atoms.get_atomic_numbers()

446

447 for i in range(len(atoms)):

448 dist[i, i] += self.blmin[(num[i], num[i])]

449 for j in range(i):

450 bl = self.blmin[(num[i], num[j])]

451 dist[i, j] += bl

452 dist[j, i] += bl

453

454 L = np.max(dist)

455 if L > Lmin:

456 Lmin = L

457

458 # Generate a suitable unit cell

459 valid = False

460 while not valid:

461 cell = np.zeros((3, 3))

462

463 for i in range(self.number_of_variable_cell_vectors):

464 # on-diagonal values

465 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)

466 cell[i, i] *= repeat[i]

467 for j in range(i):

468 # off-diagonal values

469 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]

470

471 # volume scaling

472 for i in range(self.number_of_variable_cell_vectors, 3):

473 cell[i] = self.box_to_place_in[1][i]

474

475 if self.number_of_variable_cell_vectors > 0:

476 volume = abs(np.linalg.det(cell))

477 scaling = self.box_volume / volume

478 scaling **= 1. / self.number_of_variable_cell_vectors

479 cell[:self.number_of_variable_cell_vectors] *= scaling

480

481 for i in range(self.number_of_variable_cell_vectors, 3):

482 cell[i] = self.slab.get_cell()[i]

483

484 # bounds checking

485 valid = True

486

487 if self.cellbounds is not None:

488 if not self.cellbounds.is_within_bounds(cell):

489 valid = False

490

491 if valid:

492 for i in range(3):

493 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:

494 assert self.number_of_variable_cell_vectors > 0

495 valid = False

496

497 return cell