Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" Implementation of a population for maintaining a GA population and 

2proposing structures to pair. """ 

3from math import tanh, sqrt, exp 

4from operator import itemgetter 

5import numpy as np 

6 

7from ase.db.core import now 

8from ase.ga import get_raw_score 

9 

10 

11def count_looks_like(a, all_cand, comp): 

12 """Utility method for counting occurrences.""" 

13 n = 0 

14 for b in all_cand: 

15 if a.info['confid'] == b.info['confid']: 

16 continue 

17 if comp.looks_like(a, b): 

18 n += 1 

19 return n 

20 

21 

22class Population: 

23 """Population class which maintains the current population 

24 and proposes which candidates to pair together. 

25 

26 Parameters: 

27 

28 data_connection: DataConnection object 

29 Bla bla bla. 

30 

31 population_size: int 

32 The number of candidates in the population. 

33 

34 comparator: Comparator object 

35 this will tell if two configurations are equal. 

36 Default compare atoms objects directly. 

37 

38 logfile: str 

39 Text file that contains information about the population 

40 The format is:: 

41 

42 timestamp: generation(if available): id1,id2,id3... 

43 

44 Using this file greatly speeds up convergence checks. 

45 Default None meaning that no file is written. 

46 

47 use_extinct: boolean 

48 Set this to True if mass extinction and the extinct key 

49 are going to be used. Default is False. 

50 

51 rng: Random number generator 

52 By default numpy.random. 

53 """ 

54 def __init__(self, data_connection, population_size, 

55 comparator=None, logfile=None, use_extinct=False, 

56 rng=np.random): 

57 self.dc = data_connection 

58 self.pop_size = population_size 

59 if comparator is None: 

60 from ase.ga.standard_comparators import AtomsComparator 

61 comparator = AtomsComparator() 

62 self.comparator = comparator 

63 self.logfile = logfile 

64 self.use_extinct = use_extinct 

65 self.rng = rng 

66 self.pop = [] 

67 self.pairs = None 

68 self.all_cand = None 

69 self.__initialize_pop__() 

70 

71 def __initialize_pop__(self): 

72 """ Private method that initializes the population when 

73 the population is created. """ 

74 

75 # Get all relaxed candidates from the database 

76 ue = self.use_extinct 

77 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

78 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'], 

79 reverse=True) 

80 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

81 

82 # Fill up the population with the self.pop_size most stable 

83 # unique candidates. 

84 i = 0 

85 while i < len(all_cand) and len(self.pop) < self.pop_size: 

86 c = all_cand[i] 

87 i += 1 

88 eq = False 

89 for a in self.pop: 

90 if self.comparator.looks_like(a, c): 

91 eq = True 

92 break 

93 if not eq: 

94 self.pop.append(c) 

95 

96 for a in self.pop: 

97 a.info['looks_like'] = count_looks_like(a, all_cand, 

98 self.comparator) 

99 

100 self.all_cand = all_cand 

101 self.__calc_participation__() 

102 

103 def __calc_participation__(self): 

104 """ Determines, from the database, how many times each 

105 candidate has been used to generate new candidates. """ 

106 (participation, pairs) = self.dc.get_participation_in_pairing() 

107 for a in self.pop: 

108 if a.info['confid'] in participation.keys(): 

109 a.info['n_paired'] = participation[a.info['confid']] 

110 else: 

111 a.info['n_paired'] = 0 

112 self.pairs = pairs 

113 

114 def update(self, new_cand=None): 

115 """ New candidates can be added to the database 

116 after the population object has been created. 

117 This method extracts these new candidates from the 

118 database and includes them in the population. """ 

119 

120 if len(self.pop) == 0: 

121 self.__initialize_pop__() 

122 

123 if new_cand is None: 

124 ue = self.use_extinct 

125 new_cand = self.dc.get_all_relaxed_candidates(only_new=True, 

126 use_extinct=ue) 

127 

128 for a in new_cand: 

129 self.__add_candidate__(a) 

130 self.all_cand.append(a) 

131 self.__calc_participation__() 

132 self._write_log() 

133 

134 def get_current_population(self): 

135 """ Returns a copy of the current population. """ 

136 self.update() 

137 return [a.copy() for a in self.pop] 

138 

139 def get_population_after_generation(self, gen): 

140 """ Returns a copy of the population as it where 

141 after generation gen""" 

142 if self.logfile is not None: 

143 fd = open(self.logfile, 'r') 

144 gens = {} 

145 for l in fd: 

146 _, no, popul = l.split(':') 

147 gens[int(no)] = [int(i) for i in popul.split(',')] 

148 fd.close() 

149 return [c.copy() for c in self.all_cand[::-1] 

150 if c.info['relax_id'] in gens[gen]] 

151 

152 all_candidates = [c for c in self.all_cand 

153 if c.info['key_value_pairs']['generation'] <= gen] 

154 cands = [all_candidates[0]] 

155 for b in all_candidates: 

156 if b not in cands: 

157 for a in cands: 

158 if self.comparator.looks_like(a, b): 

159 break 

160 else: 

161 cands.append(b) 

162 pop = cands[:self.pop_size] 

163 return [a.copy() for a in pop] 

164 

165 def __add_candidate__(self, a): 

166 """ Adds a single candidate to the population. """ 

167 

168 # check if the structure is too low in raw score 

169 raw_score_a = get_raw_score(a) 

170 raw_score_worst = get_raw_score(self.pop[-1]) 

171 if raw_score_a < raw_score_worst \ 

172 and len(self.pop) == self.pop_size: 

173 return 

174 

175 # check if the new candidate should 

176 # replace a similar structure in the population 

177 for (i, b) in enumerate(self.pop): 

178 if self.comparator.looks_like(a, b): 

179 if get_raw_score(b) < raw_score_a: 

180 del self.pop[i] 

181 a.info['looks_like'] = count_looks_like(a, 

182 self.all_cand, 

183 self.comparator) 

184 self.pop.append(a) 

185 self.pop.sort(key=lambda x: get_raw_score(x), 

186 reverse=True) 

187 return 

188 

189 # the new candidate needs to be added, so remove the highest 

190 # energy one 

191 if len(self.pop) == self.pop_size: 

192 del self.pop[-1] 

193 

194 # add the new candidate 

195 a.info['looks_like'] = count_looks_like(a, 

196 self.all_cand, 

197 self.comparator) 

198 self.pop.append(a) 

199 self.pop.sort(key=lambda x: get_raw_score(x), reverse=True) 

200 

201 def __get_fitness__(self, indecies, with_history=True): 

202 """Calculates the fitness using the formula from 

203 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

204 

205 Sign change on the fitness compared to the formulation in the 

206 abovementioned paper due to maximizing raw_score instead of 

207 minimizing energy. (Set raw_score=-energy to optimize the energy) 

208 """ 

209 

210 scores = [get_raw_score(x) for x in self.pop] 

211 min_s = min(scores) 

212 max_s = max(scores) 

213 T = min_s - max_s 

214 if isinstance(indecies, int): 

215 indecies = [indecies] 

216 

217 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.)) 

218 for i in indecies] 

219 if with_history: 

220 M = [float(self.pop[i].info['n_paired']) for i in indecies] 

221 L = [float(self.pop[i].info['looks_like']) for i in indecies] 

222 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i]) 

223 for i in range(len(f))] 

224 return f 

225 

226 def get_two_candidates(self, with_history=True): 

227 """ Returns two candidates for pairing employing the 

228 fitness criteria from 

229 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

230 and the roulete wheel selection scheme described in 

231 R.L. Johnston Dalton Transactions, 

232 Vol. 22, No. 22. (2003), pp. 4193-4207 

233 """ 

234 

235 if len(self.pop) < 2: 

236 self.update() 

237 

238 if len(self.pop) < 2: 

239 return None 

240 

241 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

242 fmax = max(fit) 

243 c1 = self.pop[0] 

244 c2 = self.pop[0] 

245 used_before = False 

246 while c1.info['confid'] == c2.info['confid'] and not used_before: 

247 nnf = True 

248 while nnf: 

249 t = self.rng.randint(len(self.pop)) 

250 if fit[t] > self.rng.rand() * fmax: 

251 c1 = self.pop[t] 

252 nnf = False 

253 nnf = True 

254 while nnf: 

255 t = self.rng.randint(len(self.pop)) 

256 if fit[t] > self.rng.rand() * fmax: 

257 c2 = self.pop[t] 

258 nnf = False 

259 

260 c1id = c1.info['confid'] 

261 c2id = c2.info['confid'] 

262 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs 

263 return (c1.copy(), c2.copy()) 

264 

265 def get_one_candidate(self, with_history=True): 

266 """Returns one candidate for mutation employing the 

267 fitness criteria from 

268 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

269 and the roulete wheel selection scheme described in 

270 R.L. Johnston Dalton Transactions, 

271 Vol. 22, No. 22. (2003), pp. 4193-4207 

272 """ 

273 if len(self.pop) < 1: 

274 self.update() 

275 

276 if len(self.pop) < 1: 

277 return None 

278 

279 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

280 fmax = max(fit) 

281 nnf = True 

282 while nnf: 

283 t = self.rng.randint(len(self.pop)) 

284 if fit[t] > self.rng.rand() * fmax: 

285 c1 = self.pop[t] 

286 nnf = False 

287 

288 return c1.copy() 

289 

290 def _write_log(self): 

291 """Writes the population to a logfile. 

292 

293 The format is:: 

294 

295 timestamp: generation(if available): id1,id2,id3...""" 

296 if self.logfile is not None: 

297 ids = [str(a.info['relax_id']) for a in self.pop] 

298 if ids != []: 

299 try: 

300 gen_nums = [c.info['key_value_pairs']['generation'] 

301 for c in self.all_cand] 

302 max_gen = max(gen_nums) 

303 except KeyError: 

304 max_gen = ' ' 

305 fd = open(self.logfile, 'a') 

306 fd.write('{time}: {gen}: {pop}\n'.format(time=now(), 

307 pop=','.join(ids), 

308 gen=max_gen)) 

309 fd.close() 

310 

311 def is_uniform(self, func, min_std, pop=None): 

312 """Tests whether the current population is uniform or diverse. 

313 Returns True if uniform, False otherwise. 

314 

315 Parameters: 

316 

317 func: function 

318 that takes one argument an atoms object and returns a value that 

319 will be used for testing against the rest of the population. 

320 

321 min_std: int or float 

322 The minimum standard deviation, if the population has a lower 

323 std dev it is uniform. 

324 

325 pop: list, optional 

326 use this list of Atoms objects instead of the current population. 

327 """ 

328 if pop is None: 

329 pop = self.pop 

330 vals = [func(a) for a in pop] 

331 stddev = np.std(vals) 

332 if stddev < min_std: 

333 return True 

334 return False 

335 

336 def mass_extinction(self, ids): 

337 """Kills every candidate in the database with gaid in the 

338 supplied list of ids. Typically used on the main part of the current 

339 population if the diversity is to small. 

340 

341 Parameters: 

342 

343 ids: list 

344 list of ids of candidates to be killed. 

345 

346 """ 

347 for confid in ids: 

348 self.dc.kill_candidate(confid) 

349 self.pop = [] 

350 

351 

352class RandomPopulation(Population): 

353 def __init__(self, data_connection, population_size, 

354 comparator=None, logfile=None, exclude_used_pairs=False, 

355 bad_candidates=0, use_extinct=False): 

356 self.exclude_used_pairs = exclude_used_pairs 

357 self.bad_candidates = bad_candidates 

358 Population.__init__(self, data_connection, population_size, 

359 comparator, logfile, use_extinct) 

360 

361 def __initialize_pop__(self): 

362 """ Private method that initializes the population when 

363 the population is created. """ 

364 

365 # Get all relaxed candidates from the database 

366 ue = self.use_extinct 

367 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

368 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

369 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

370 

371 if len(all_cand) > 0: 

372 # Fill up the population with the self.pop_size most stable 

373 # unique candidates. 

374 ratings = [] 

375 best_raw = get_raw_score(all_cand[0]) 

376 i = 0 

377 while i < len(all_cand): 

378 c = all_cand[i] 

379 i += 1 

380 eq = False 

381 for a in self.pop: 

382 if self.comparator.looks_like(a, c): 

383 eq = True 

384 break 

385 if not eq: 

386 if len(self.pop) < self.pop_size - self.bad_candidates: 

387 self.pop.append(c) 

388 else: 

389 exp_fact = exp(get_raw_score(c) / best_raw) 

390 ratings.append([c, (exp_fact - 1) * self.rng.rand()]) 

391 ratings.sort(key=itemgetter(1), reverse=True) 

392 

393 for i in range(self.bad_candidates): 

394 self.pop.append(ratings[i][0]) 

395 

396 for a in self.pop: 

397 a.info['looks_like'] = count_looks_like(a, all_cand, 

398 self.comparator) 

399 

400 self.all_cand = all_cand 

401 self.__calc_participation__() 

402 

403 def update(self): 

404 """ The update method in Population will add to the end of 

405 the population, that can't be used here since we might have 

406 bad candidates that need to stay in the population, therefore 

407 just recalc the population every time. """ 

408 

409 self.pop = [] 

410 self.__initialize_pop__() 

411 

412 self._write_log() 

413 

414 def get_one_candidate(self): 

415 """Returns one candidates at random.""" 

416 if len(self.pop) < 1: 

417 self.update() 

418 

419 if len(self.pop) < 1: 

420 return None 

421 

422 t = self.rng.randint(len(self.pop)) 

423 c = self.pop[t] 

424 

425 return c.copy() 

426 

427 def get_two_candidates(self): 

428 """Returns two candidates at random.""" 

429 if len(self.pop) < 2: 

430 self.update() 

431 

432 if len(self.pop) < 2: 

433 return None 

434 

435 c1 = self.pop[0] 

436 c2 = self.pop[0] 

437 used_before = False 

438 while c1.info['confid'] == c2.info['confid'] and not used_before: 

439 t = self.rng.randint(len(self.pop)) 

440 c1 = self.pop[t] 

441 t = self.rng.randint(len(self.pop)) 

442 c2 = self.pop[t] 

443 

444 c1id = c1.info['confid'] 

445 c2id = c2.info['confid'] 

446 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and 

447 self.exclude_used_pairs) 

448 return (c1.copy(), c2.copy()) 

449 

450 

451class FitnessSharingPopulation(Population): 

452 """ Fitness sharing population that penalizes structures if they are 

453 too similar. This is determined by a distance measure 

454 

455 Parameters: 

456 

457 comp_key: string 

458 Key where the distance measure can be found in the 

459 atoms.info['key_value_pairs'] dictionary. 

460 

461 threshold: float or int 

462 Value above which no penalization of the fitness takes place 

463 

464 alpha_sh: float or int 

465 Determines the shape of the sharing function. 

466 Default is 1, which gives a linear sharing function. 

467 

468 """ 

469 def __init__(self, data_connection, population_size, 

470 comp_key, threshold, alpha_sh=1., 

471 comparator=None, logfile=None, use_extinct=False): 

472 self.comp_key = comp_key 

473 self.dt = threshold # dissimilarity threshold 

474 self.alpha_sh = alpha_sh 

475 self.fit_scaling = 1. 

476 

477 self.sh_cache = dict() 

478 

479 Population.__init__(self, data_connection, population_size, 

480 comparator, logfile, use_extinct) 

481 

482 def __get_fitness__(self, candidates): 

483 """Input should be sorted according to raw_score.""" 

484 max_s = get_raw_score(candidates[0]) 

485 min_s = get_raw_score(candidates[-1]) 

486 T = min_s - max_s 

487 

488 shared_fit = [] 

489 for c in candidates: 

490 sc = get_raw_score(c) 

491 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.)) 

492 m = 1. 

493 ck = c.info['key_value_pairs'][self.comp_key] 

494 for other in candidates: 

495 if other != c: 

496 name = tuple(sorted([c.info['confid'], 

497 other.info['confid']])) 

498 if name not in self.sh_cache: 

499 ok = other.info['key_value_pairs'][self.comp_key] 

500 d = abs(ck - ok) 

501 if d < self.dt: 

502 v = 1 - (d / self.dt)**self.alpha_sh 

503 self.sh_cache[name] = v 

504 else: 

505 self.sh_cache[name] = 0 

506 m += self.sh_cache[name] 

507 

508 shf = (obj_fit ** self.fit_scaling) / m 

509 shared_fit.append(shf) 

510 return shared_fit 

511 

512 def update(self): 

513 """ The update method in Population will add to the end of 

514 the population, that can't be used here since the shared fitness 

515 will change for all candidates when new are added, therefore 

516 just recalc the population every time. """ 

517 

518 self.pop = [] 

519 self.__initialize_pop__() 

520 

521 self._write_log() 

522 

523 def __initialize_pop__(self): 

524 # Get all relaxed candidates from the database 

525 ue = self.use_extinct 

526 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

527 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

528 

529 if len(all_cand) > 0: 

530 shared_fit = self.__get_fitness__(all_cand) 

531 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand), 

532 reverse=True)))[1] 

533 

534 # Fill up the population with the self.pop_size most stable 

535 # unique candidates. 

536 i = 0 

537 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

538 c = all_sorted[i] 

539 i += 1 

540 eq = False 

541 for a in self.pop: 

542 if self.comparator.looks_like(a, c): 

543 eq = True 

544 break 

545 if not eq: 

546 self.pop.append(c) 

547 

548 for a in self.pop: 

549 a.info['looks_like'] = count_looks_like(a, all_cand, 

550 self.comparator) 

551 self.all_cand = all_cand 

552 

553 def get_two_candidates(self): 

554 """ Returns two candidates for pairing employing the 

555 fitness criteria from 

556 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

557 and the roulete wheel selection scheme described in 

558 R.L. Johnston Dalton Transactions, 

559 Vol. 22, No. 22. (2003), pp. 4193-4207 

560 """ 

561 

562 if len(self.pop) < 2: 

563 self.update() 

564 

565 if len(self.pop) < 2: 

566 return None 

567 

568 fit = self.__get_fitness__(self.pop) 

569 fmax = max(fit) 

570 c1 = self.pop[0] 

571 c2 = self.pop[0] 

572 while c1.info['confid'] == c2.info['confid']: 

573 nnf = True 

574 while nnf: 

575 t = self.rng.randint(len(self.pop)) 

576 if fit[t] > self.rng.rand() * fmax: 

577 c1 = self.pop[t] 

578 nnf = False 

579 nnf = True 

580 while nnf: 

581 t = self.rng.randint(len(self.pop)) 

582 if fit[t] > self.rng.rand() * fmax: 

583 c2 = self.pop[t] 

584 nnf = False 

585 

586 return (c1.copy(), c2.copy()) 

587 

588 

589class RankFitnessPopulation(Population): 

590 """ Ranks the fitness relative to set variable to flatten the surface 

591 in a certain direction such that mating across variable is equally 

592 likely irrespective of raw_score. 

593 

594 Parameters: 

595 

596 variable_function: function 

597 A function that takes as input an Atoms object and returns 

598 the variable that differentiates the ranks. 

599 

600 exp_function: boolean 

601 If True use an exponential function for ranking the fitness. 

602 If False use the same as in Population. Default True. 

603 

604 exp_prefactor: float 

605 The prefactor used in the exponential fitness scaling function. 

606 Default 0.5 

607 """ 

608 def __init__(self, data_connection, population_size, variable_function, 

609 comparator=None, logfile=None, use_extinct=False, 

610 exp_function=True, exp_prefactor=0.5): 

611 self.exp_function = exp_function 

612 self.exp_prefactor = exp_prefactor 

613 self.vf = variable_function 

614 # The current fitness is set at each update of the population 

615 self.current_fitness = None 

616 

617 Population.__init__(self, data_connection, population_size, 

618 comparator, logfile, use_extinct) 

619 

620 def get_rank(self, rcand, key=None): 

621 # Set the initial order of the candidates, will need to 

622 # be returned in this order at the end of ranking. 

623 ordered = list(zip(range(len(rcand)), rcand)) 

624 

625 # Niche and rank candidates. 

626 rec_nic = [] 

627 rank_fit = [] 

628 for o, c in ordered: 

629 if o not in rec_nic: 

630 ntr = [] 

631 ce1 = self.vf(c) 

632 rec_nic.append(o) 

633 ntr.append([o, c]) 

634 for oother, cother in ordered: 

635 if oother not in rec_nic: 

636 ce2 = self.vf(cother) 

637 if ce1 == ce2: 

638 # put the now processed in oother 

639 # in rec_nic as well 

640 rec_nic.append(oother) 

641 ntr.append([oother, cother]) 

642 # Each niche is sorted according to raw_score and 

643 # assigned a fitness according to the ranking of 

644 # the candidates 

645 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key], 

646 reverse=True) 

647 start_rank = -1 

648 cor = 0 

649 for on, cn in ntr: 

650 rank = start_rank - cor 

651 rank_fit.append([on, cn, rank]) 

652 cor += 1 

653 # The original order is reformed 

654 rank_fit.sort(key=itemgetter(0), reverse=False) 

655 return np.array(list(zip(*rank_fit))[2]) 

656 

657 def __get_fitness__(self, candidates): 

658 expf = self.exp_function 

659 rfit = self.get_rank(candidates, key='raw_score') 

660 

661 if not expf: 

662 rmax = max(rfit) 

663 rmin = min(rfit) 

664 T = rmin - rmax 

665 # If using obj_rank probability, must have non-zero T val. 

666 # pop_size must be greater than number of permutations. 

667 # We test for this here 

668 msg = "Equal fitness for best and worst candidate in the " 

669 msg += "population! Fitness scaling is impossible! " 

670 msg += "Try with a larger population." 

671 assert T != 0., msg 

672 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.)) 

673 else: 

674 return self.exp_prefactor ** (-rfit - 1) 

675 

676 def update(self): 

677 """ The update method in Population will add to the end of 

678 the population, that can't be used here since the fitness 

679 will potentially change for all candidates when new are added, 

680 therefore just recalc the population every time. """ 

681 

682 self.pop = [] 

683 self.__initialize_pop__() 

684 self.current_fitness = self.__get_fitness__(self.pop) 

685 

686 self._write_log() 

687 

688 def __initialize_pop__(self): 

689 # Get all relaxed candidates from the database 

690 ue = self.use_extinct 

691 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

692 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

693 

694 if len(all_cand) > 0: 

695 fitf = self.__get_fitness__(all_cand) 

696 all_sorted = list(zip(fitf, all_cand)) 

697 all_sorted.sort(key=itemgetter(0), reverse=True) 

698 sort_cand = [] 

699 for _, t2 in all_sorted: 

700 sort_cand.append(t2) 

701 all_sorted = sort_cand 

702 

703 # Fill up the population with the self.pop_size most stable 

704 # unique candidates. 

705 i = 0 

706 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

707 c = all_sorted[i] 

708 c_vf = self.vf(c) 

709 i += 1 

710 eq = False 

711 for a in self.pop: 

712 a_vf = self.vf(a) 

713 # Only run comparator if the variable_function (self.vf) 

714 # returns the same. If it returns something different the 

715 # candidates are inherently different. 

716 # This is done to speed up. 

717 if a_vf == c_vf: 

718 if self.comparator.looks_like(a, c): 

719 eq = True 

720 break 

721 if not eq: 

722 self.pop.append(c) 

723 self.all_cand = all_cand 

724 

725 def get_two_candidates(self): 

726 """ Returns two candidates for pairing employing the 

727 roulete wheel selection scheme described in 

728 R.L. Johnston Dalton Transactions, 

729 Vol. 22, No. 22. (2003), pp. 4193-4207 

730 """ 

731 

732 if len(self.pop) < 2: 

733 self.update() 

734 

735 if len(self.pop) < 2: 

736 return None 

737 

738 # Use saved fitness 

739 fit = self.current_fitness 

740 fmax = max(fit) 

741 c1 = self.pop[0] 

742 c2 = self.pop[0] 

743 while c1.info['confid'] == c2.info['confid']: 

744 nnf = True 

745 while nnf: 

746 t = self.rng.randint(len(self.pop)) 

747 if fit[t] > self.rng.rand() * fmax: 

748 c1 = self.pop[t] 

749 nnf = False 

750 nnf = True 

751 while nnf: 

752 t = self.rng.randint(len(self.pop)) 

753 if fit[t] > self.rng.rand() * fmax: 

754 c2 = self.pop[t] 

755 nnf = False 

756 

757 return (c1.copy(), c2.copy()) 

758 

759 

760class MultiObjectivePopulation(RankFitnessPopulation): 

761 """ Allows for assignment of fitness based on a set of two variables 

762 such that fitness is ranked according to a Pareto-front of 

763 non-dominated candidates. 

764 

765 Parameters 

766 ---------- 

767 abs_data: list 

768 Set of key_value_pairs in atoms object for which fitness should 

769 be assigned based on absolute value. 

770 

771 rank_data: list 

772 Set of key_value_pairs in atoms object for which data should 

773 be ranked in order to ascribe fitness. 

774 

775 variable_function: function 

776 A function that takes as input an Atoms object and returns 

777 the variable that differentiates the ranks. Only use if 

778 data is ranked. 

779 

780 exp_function: boolean 

781 If True use an exponential function for ranking the fitness. 

782 If False use the same as in Population. Default True. 

783 

784 exp_prefactor: float 

785 The prefactor used in the exponential fitness scaling function. 

786 Default 0.5 

787 

788 """ 

789 

790 def __init__(self, data_connection, population_size, 

791 variable_function=None, comparator=None, logfile=None, 

792 use_extinct=False, abs_data=None, rank_data=None, 

793 exp_function=True, exp_prefactor=0.5): 

794 # The current fitness is set at each update of the population 

795 self.current_fitness = None 

796 

797 if rank_data is None: 

798 rank_data = [] 

799 self.rank_data = rank_data 

800 

801 if abs_data is None: 

802 abs_data = [] 

803 self.abs_data = abs_data 

804 

805 RankFitnessPopulation.__init__(self, data_connection, population_size, 

806 variable_function, comparator, logfile, 

807 use_extinct, exp_function, 

808 exp_prefactor) 

809 

810 def get_nonrank(self, nrcand, key=None): 

811 """"Returns a list of fitness values.""" 

812 nrc_list = [] 

813 for nrc in nrcand: 

814 nrc_list.append(nrc.info['key_value_pairs'][key]) 

815 return nrc_list 

816 

817 def __get_fitness__(self, candidates): 

818 # There are no defaults set for the datasets to be 

819 # used in this function, as such we test that the 

820 # user has specified at least two here. 

821 msg = "This is a multi-objective fitness function" 

822 msg += " so there must be at least two datasets" 

823 msg += " stated in the rank_data and abs_data variables" 

824 assert len(self.rank_data) + len(self.abs_data) >= 2, msg 

825 

826 expf = self.exp_function 

827 

828 all_fitnesses = [] 

829 used = set() 

830 for rd in self.rank_data: 

831 used.add(rd) 

832 # Build ranked fitness based on rd 

833 all_fitnesses.append(self.get_rank(candidates, key=rd)) 

834 

835 for d in self.abs_data: 

836 if d not in used: 

837 used.add(d) 

838 # Build fitness based on d 

839 all_fitnesses.append(self.get_nonrank(candidates, key=d)) 

840 

841 # Set the initial order of the ranks, will need to 

842 # be returned in this order at the end. 

843 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses)) 

844 mvf_rank = -1 # Start multi variable rank at -1. 

845 rec_vrc = [] # A record of already ranked candidates. 

846 mvf_list = [] # A list for all candidate ranks. 

847 # Sort by raw_score_1 in case this is different from 

848 # the stored raw_score() variable that all_cands are 

849 # sorted by. 

850 fordered.sort(key=itemgetter(1), reverse=True) 

851 # Niche candidates with equal or better raw_score to 

852 # current candidate. 

853 for a in fordered: 

854 order, rest = a[0], a[1:] 

855 if order not in rec_vrc: 

856 pff = [] 

857 pff2 = [] 

858 rec_vrc.append(order) 

859 pff.append((order, rest)) 

860 for b in fordered: 

861 border, brest = b[0], b[1:] 

862 if border not in rec_vrc: 

863 if np.any(np.array(brest) >= np.array(rest)): 

864 pff.append((border, brest)) 

865 # Remove any candidate from pff list that is dominated 

866 # by another in the list. 

867 for na in pff: 

868 norder, nrest = na[0], na[1:] 

869 dom = False 

870 for nb in pff: 

871 nborder, nbrest = nb[0], nb[1:] 

872 if norder != nborder: 

873 if np.all(np.array(nbrest) > np.array(nrest)): 

874 dom = True 

875 if not dom: 

876 pff2.append((norder, nrest)) 

877 # Assign pareto rank from -1 to -N niches. 

878 for ffa in pff2: 

879 fforder, ffrest = ffa[0], ffa[1:] 

880 rec_vrc.append(fforder) 

881 mvf_list.append((fforder, mvf_rank, ffrest)) 

882 mvf_rank = mvf_rank - 1 

883 # The original order is reformed 

884 mvf_list.sort(key=itemgetter(0), reverse=False) 

885 rfro = np.array(list(zip(*mvf_list))[1]) 

886 

887 if not expf: 

888 rmax = max(rfro) 

889 rmin = min(rfro) 

890 T = rmin - rmax 

891 # If using obj_rank probability, must have non-zero T val. 

892 # pop_size must be greater than number of permutations. 

893 # We test for this here 

894 msg = "Equal fitness for best and worst candidate in the " 

895 msg += "population! Fitness scaling is impossible! " 

896 msg += "Try with a larger population." 

897 assert T != 0., msg 

898 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.)) 

899 else: 

900 return self.exp_prefactor ** (-rfro - 1) 

901 

902 def __initialize_pop__(self): 

903 # Get all relaxed candidates from the database 

904 ue = self.use_extinct 

905 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

906 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True) 

907 

908 if len(all_cand) > 0: 

909 fitf = self.__get_fitness__(all_cand) 

910 all_sorted = list(zip(fitf, all_cand)) 

911 all_sorted.sort(key=itemgetter(0), reverse=True) 

912 sort_cand = [] 

913 for _, t2 in all_sorted: 

914 sort_cand.append(t2) 

915 all_sorted = sort_cand 

916 

917 # Fill up the population with the self.pop_size most stable 

918 # unique candidates. 

919 i = 0 

920 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

921 c = all_sorted[i] 

922 # Use variable_function to decide whether to run comparator 

923 # if the function has been defined by the user. This does not 

924 # need to be dependent on using the rank_data function. 

925 if self.vf is not None: 

926 c_vf = self.vf(c) 

927 i += 1 

928 eq = False 

929 for a in self.pop: 

930 if self.vf is not None: 

931 a_vf = self.vf(a) 

932 # Only run comparator if the variable_function 

933 # (self.vf) returns the same. If it returns something 

934 # different the candidates are inherently different. 

935 # This is done to speed up. 

936 if a_vf == c_vf: 

937 if self.comparator.looks_like(a, c): 

938 eq = True 

939 break 

940 else: 

941 if self.comparator.looks_like(a, c): 

942 eq = True 

943 break 

944 if not eq: 

945 self.pop.append(c) 

946 self.all_cand = all_cand