gofee.py 18.9 KB
Newer Older
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
1
import numpy as np
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
2
3
import pickle
from os.path import isfile
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
4

5
6
7
8
9
10
from surrogate.gpr import GPR
from population import population

from ase import Atoms
from ase.io import read, write, Trajectory
from ase.calculators.singlepoint import SinglePointCalculator
11
from ase.calculators.dftb import Dftb
12
13

from bfgslinesearch_zlim import BFGSLineSearch_zlim
14
from bfgslinesearch_constrained import BFGSLineSearch_constrained
15
16
from ase.ga.relax_attaches import VariansBreak
from parallel_utils import split, parallel_function_eval
17
18
19
20
21

from candidate_operations.candidate_generation import OperationSelector
from candidate_operations.basic_mutations import RattleMutation


22
23
24
25
26
27
28
from mpi4py import MPI
world = MPI.COMM_WORLD

import traceback
import sys


Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
29
def relax(structure, calc, Fmax=0.05, steps_max=200, dmax_cov=None):
30
31
32
33
34
35
36
    a = structure.copy()
    # Set calculator 
    a.set_calculator(calc)
    pos_init = a.get_positions()

    # Catch if linesearch fails
    try:
37
38
39
40
        dyn = BFGSLineSearch_constrained(a,
                                         logfile=None,
                                         pos_init=pos_init,
                                         dmax_cov=dmax_cov)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
41
        dyn.run(fmax = Fmax, steps = steps_max)
42
43
44
45
46
47
48
49
    except Exception as err:
        print('Error in surrogate-relaxation:', err, flush=True)
        traceback.print_exc()
        traceback.print_exc(file=sys.stderr)
    return a


class GOFEE():
50
51
    """GOFEE global structure search method.
        
52
53
54
55
56
57
58
59
    structures: Atoms-object, list of Atoms-objects or None.
    In initial structures from which to start the sesarch.
    If None, the startgenerator must be supplied.
    If less than Ninit structures is supplied, the remaining
    ones are generated using the startgenerator or by rattling
    the supplied structures, depending on wether the
    startgenerator is supplied.

60
61
62
63
    calc: ASE calculator specifying the energy-expression
    with respect to which the atomic coordinates are
    globally optimized.

64
65
66
    gpr: The Gaussian Process Regression model used as the
    surrogate model for the Potential energy surface.
    
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    startgenerator: Object used to generate initial random
    structures. Must be supplied if structures if structues=None.
    (This is the recommended way to initialize the search.)

    candidate_generator:

    trajectory: Name of trajectory to which all structures,
    evaluated during the search, is saved.

    kappa: How much to weigh predicted uncertainty in acquition
    function.

    max_steps: Number of search steps.

    Ninit: Number of initial structures. If len(structures) <
    Ninit, the remaining structures are generated using the
    startgenerator (if supplied) or by rattling the supplied
    'structures'.

    dmax_cov: Max distance that an atom is allowed to move during
    surrogate relaxation (in units of covalent distance).

    Ncandidates: Number of new cancidate structures generated and
    surrogate-relaxed in each search iteration.
91

92
    population_size: Size of population.
93

94
95
96
97
98
99
100
101
102
103
    dualpoint: boolean
    Whether to use dualpoint evaluation or not.

    min_certainty: float
    Max predicted uncertainty allowed for structures to be
    considdered for evaluation. (in units of the maximum possible
    uncertainty.)

    restart: str
    Filename for restart file.  Default value is *None*.
104
    """
105
106
107
108
109
    def __init__(self, structures=None,
                 calc=None,
                 gpr=None,
                 startgenerator=None,
                 candidate_generator=None,
110
                 trajectory='structures.traj',
111
                 kappa=2,
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
112
                 max_steps=200,
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
113
                 Ninit=10,
114
115
116
117
                 dmax_cov=3.5,
                 Ncandidates=30,
                 population_size=5,
                 dualpoint=True,
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
118
                 min_certainty=0.7,
119
                 restart=None):
120
        
121
122
123
124
        if structures is None:
            assert startgenerator is not None
            self.structures = None
        else:
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
            if isinstance(structures, Atoms):
                self.structures = [structures]
            elif isinstance(structures, list):
                assert isinstance(structures[0], Atoms)
                self.structures = structures
            elif isinstance(structures, str):
                self.structures = read(structures, index=':')
        
        if calc is not None:
            self.calc = calc
        else:
            assert structures is not None
            calc = structures[0].get_calculator()
            assert calc is not None and not isinstance(calc, SinglePointCalculator)
            print('Using calculator from supplied structure(s)')
            self.calc = calc

        if gpr is not None:
            self.gpr = gpr
        else:
            self.gpr = GPR()

147
        if startgenerator is None:
148
149
            assert structures is not None
            self.startgenerator = None
150
151
        else:
            self.startgenerator = startgenerator
152

153
154
155
156
157
158
159
160
161
        if startgenerator is not None:
            self.n_to_optimize = len(self.startgenerator.stoichiometry)
        else:
            self.n_to_optimize = len(self.structures[0])
            for constraint in self.structures[0].constraints:
                if isinstance(constraint, FixAtoms):
                    indices_fixed = constraint.get_indices()
                    self.n_to_optimize -= len(indices_fixed)
                    break
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
162
        
163
164
165
        if candidate_generator is not None:
            self.candidate_generator = candidate_generator
        else:
166
167
168
169
            rattle = RattleMutation(self.n_to_optimize,
                                    Nrattle=3,
                                    rattle_range=4)
            self.candidate_generator = OperationSelector([1.0],[rattle])
170
171
172
173
174
175
176
177
178

        # Initialize population
        self.population = population(population_size=population_size, gpr=self.gpr, similarity2equal=0.9999)

        # Define parallel communication
        self.comm = world.Dup()  # Important to avoid mpi-problems from call to ase.parallel in BFGS
        self.master = self.comm.rank == 0

        self.kappa = kappa
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
179
        self.max_steps = max_steps
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
180
        self.Ninit = Ninit
181
182
183
184
        self.dmax_cov = dmax_cov
        self.Ncandidates = Ncandidates
        self.dualpoint = dualpoint
        self.min_certainty = min_certainty
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
185
186
187
188
189
190
191
192
193
194
        self.restart = restart
        
        if isinstance(trajectory, str):
            self.trajectory = Trajectory(filename=trajectory, mode='a', master=self.master)
            if self.restart:
                self.traj_name = trajectory
        elif isinstance(trajectory, Trajectory):
            self.trajectory = trajectory
        else:
            assert trajectory is None
195

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
196
197
198
199
200
201
202
203
        if restart is None or not isfile(restart):
            self.initialize()
        else:
            self.read()
            self.comm.barrier()

    def initialize(self):
        self.steps = 0
204
205

    def get_initial_structures(self):
206
207
208
209
210
211
212
213
214
        """Method to prepare the initial structures for the search.
        
        The method makes sure that there are atleast self.Ninit
        initial structures.
        These structures are first of all the potentially supplied
        structures. If more structures are required, these are
        generated using self.startgenerator (if supplied), otherwise
        they are generated by heavily rattling the supplied structures.
        """
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
215
        
216
217
218
        # Collect potentially supplied structures and evaluate
        # energies and forces if not present.
        structures_init = []
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
219
        if self.structures is not None:
220
            for a in self.structures:
221
                a.info = {'origin': 'PreSupplied'}
222
223
224
225
226
227
                calc = a.get_calculator()
                if isinstance(calc, SinglePointCalculator):
                    if 'energy' in calc.results and 'forces' in calc.results:
                        # Add without evaluating.
                        structures_init.append(a)
                        continue
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
228
                a = self.evaluate(a)
229
                structures_init.append(a)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
230
        
231
        Nremaining = self.Ninit - len(structures_init)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
232
        
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
        if Nremaining > 0 and self.startgenerator is None:
            # Initialize rattle-mutation for all atoms.
            rattle = RattleMutation(self.n_to_optimize,
                                    Nrattle=self.n_to_optimize,
                                    rattle_range=2)

        # Generation of remaining initial-structures (up to self.Ninit).
        for i in range(Nremaining):
            if self.startgenerator is not None:
                a = self.startgenerator.get_new_candidate()
            else:
                # Perform two times rattle of all atoms.
                a0 = structures_init[i % len(structures_init)]
                a = rattle.get_new_candidate([a])
                a = rattle.get_new_candidate([a])
            a = self.evaluate(a)
            structures_init.append(a)

        # Potentially do a few relaxation steps.
        ### missing code ###
253
254
255
        
        for a in structures_init:
            self.write(a)
256
257
        self.gpr.memory.save_data(structures_init)
        self.population.add(structures_init)
258
259
                
    def run(self):
260
261
        """ Method to run the search.
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
262
        self.get_initial_structures()
263

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
264
        while self.steps < self.max_steps:
265
            self.print_master('\n ### steps: {} ###\n'.format(self.steps))
266
267
268
269
270
            self.train_surrogate()
            self.update_population()
            relaxed_candidates = self.get_surrogate_relaxed_candidates()

            kappa = self.kappa
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
271
272
273
            a_add = []
            for _ in range(5):
                try:
274
                    self.print_master('d1 trial', _)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
275
                    anew = self.select_with_acquisition(relaxed_candidates, kappa)
276
                    self.print_master('d2 trial', _)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
277
                    anew = self.evaluate(anew)
278
                    self.print_master('d3 trial', _)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
279
280
281
282
283
                    a_add.append(anew)
                    if self.dualpoint:
                        adp = self.get_dualpoint(anew)
                        adp = self.evaluate(adp)
                        a_add.append(adp)
284
                    self.print_master('d4 trial', _)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
285
286
287
288
289
290
                    break
                except Exception as err:
                    kappa /=2
                    if self.master:
                        traceback.print_exc(file=sys.stderr)
            self.gpr.memory.save_data(a_add)
291
292

            # Add structure to population
293
294
            index_lowest = np.argmin([a.get_potential_energy() for a in a_add])
            self.population.add([a_add[index_lowest]])
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
295
296
297
298

            # Save search state
            self.dump((self.steps, self.population, np.random.get_state()))
            
299
300
            if self.master:
                print('anew pred:', anew.info['key_value_pairs']['Epred'], anew.info['key_value_pairs']['Epred_std'])
301
                print('E_true:', [a.get_potential_energy() for a in a_add])
302
                print('pop:', [a.get_potential_energy() for a in self.population.pop])
303
304

            self.steps += 1
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
            
    def get_dualpoint(self, a, lmax=0.10, Fmax_flat=5):
        """Returns dual-point structure, i.e. the original structure
        perturbed slightly along the forces.
        
        lmax: The atom with the largest force will be displaced by
        this distance
        
        Fmax_flat: maximum atomic displacement. is increased linearely
        with force until Fmax = Fmax_flat, after which it remains
        constant as lmax.
        """
        F = a.get_forces()
        a_dp = a.copy()

        # Calculate and set new positions
        Fmax = np.sqrt((F**2).sum(axis=1).max())
        pos_displace = lmax * F*min(1/Fmax_flat, 1/Fmax)
        pos_dp = a.positions + pos_displace
        a_dp.set_positions(pos_dp)
        return a_dp

    def print_master(self, *args):
        self.comm.barrier()
        if self.master:
            print(*args, flush=True)

    def get_surrogate_relaxed_candidates(self):
333
334
335
336
337
        """ Method supplying a number of surrogate-relaxed new
        candidates. The method combines the generation of new
        candidates with subsequent surrogate relaxation.
        The tasks are parrlelized over all avaliable cores.
        """
338
339
340
341
342
343
344
345
346
347
348
349
350
        Njobs = self.Ncandidates
        task_split = split(Njobs, self.comm.size)
        def func1():
            return [self.generate_candidate() for i in task_split[self.comm.rank]]
        candidates = parallel_function_eval(self.comm, func1)
        
        def func2():
            return [self.surrogate_relaxation(candidates[i], Fmax=0.1, steps=200, kappa=self.kappa)
                    for i in task_split[self.comm.rank]]
        relaxed_candidates = parallel_function_eval(self.comm, func2)
        relaxed_candidates = self.certainty_filter(relaxed_candidates)

        #if (self.NsearchIter % self.Nuse_pop_as_candidates) == 0:
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
351
        if self.steps % 3 == 0:
352
353
354
355
356
357
358
359
            relaxed_candidates = self.population.pop_MLrelaxed + relaxed_candidates
        
        if self.master:
            Epred = np.array([a.info['key_value_pairs']['Epred'] for a in relaxed_candidates])
            Epred_std = np.array([a.info['key_value_pairs']['Epred_std'] for a in relaxed_candidates])
            fitness = Epred - self.kappa*Epred_std
            print(np.c_[Epred, Epred_std, fitness])
        return relaxed_candidates
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383

    def generate_candidate(self):
        """ Method to generate new candidate.
        """
        parents = self.population.get_structure_pair()
        a_mutated = self.candidate_generator.get_new_candidate(parents)
        return a_mutated

    def surrogate_relaxation(self, a, Fmax=0.1, steps=200, kappa=None):
        """ Method to carry out relaxations of new candidates in the
        surrogate potential.
        """
        calc = self.gpr.get_calculator(kappa)
        a_relaxed = relax(a, calc, dmax_cov=self.dmax_cov, Fmax=Fmax, steps_max=steps)

        # Evaluate uncertainty
        E, Estd = self.gpr.predict_energy(a_relaxed, eval_std=True)

        # Save prediction in info-dict
        a_relaxed.info['key_value_pairs']['Epred'] = E
        a_relaxed.info['key_value_pairs']['Epred_std'] = Estd
        a_relaxed.info['key_value_pairs']['kappa'] = self.kappa
        
        return a_relaxed
384
385
        
    def certainty_filter(self, structures):
386
387
388
389
        """ Method to filter away the most uncertain surrogate-relaxed
        candidates, which might otherewise get picked for first-principles
        evaluation, based on the very high uncertainty alone.
        """
390
391
392
393
394
395
396
397
398
399
400
401
402
        certainty = np.array([a.info['key_value_pairs']['Epred_std']
                              for a in structures]) / np.sqrt(self.gpr.K0)
        min_certainty = self.min_certainty
        for _ in range(5):
            filt = certainty < min_certainty
            if np.sum(filt.astype(int)) > 0:
                structures = [structures[i] for i in range(len(filt)) if filt[i]]
                break
            else:
                min_certainty = min_certainty + (1-min_certainty)/2
        return structures

    def update_population(self):
403
404
405
        """ Method to update the population with the new pirst-principles
        evaluated structures.
        """
406
407
408
409
410
411
412
413
414
415
416
        Njobs = len(self.population.pop)
        task_split = split(Njobs, self.comm.size)
        func = lambda: [self.surrogate_relaxation(self.population.pop[i],
                                                  Fmax=0.001, steps=200, kappa=None)
                        for i in task_split[self.comm.rank]]
        self.population.pop_MLrelaxed = parallel_function_eval(self.comm, func)
        if self.master:
            print('ML-relaxed pop forces:\n',
                  [(a.get_forces()**2).sum(axis=1).max()**0.5 for a in self.population.pop_MLrelaxed])

    def train_surrogate(self):
417
418
419
420
421
422
        """ Method to train the surrogate model.
        The method only performs hyperparameter optimization every 
        ten training instance, as carrying out the hyperparameter
        optimization is significantly more expensive than the basic
        training.
        """
423
        # Train
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
424
        if self.steps < 50 or (self.steps % 10) == 0:
425
426
427
428
429
430
431
432
            self.gpr.optimize_hyperparameters(comm=self.comm)
        else:
            self.gpr.train()
        if self.master:
            print('kernel:', list(np.exp(self.gpr.kernel.theta)))
            print('lml:', self.gpr.lml)

    def select_with_acquisition(self, structures, kappa):
433
434
435
436
        """ Method to select single most "promizing" candidate 
        for first-principles evaluation according to the acquisition
        function min(E-kappa*std(E)).
        """
437
438
439
440
441
442
443
444
445
        Epred = np.array([a.info['key_value_pairs']['Epred']
                          for a in structures])
        Epred_std = np.array([a.info['key_value_pairs']['Epred_std']
                              for a in structures])
        acquisition = Epred - kappa*Epred_std
        index_select = np.argmin(acquisition)
        return structures[index_select]

    def evaluate(self, a):
446
447
448
        """ Method to evaluate the energy and forces of the selacted
        candidate.
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
449
        a = self.comm.bcast(a, root=0)
450
451
452
        a.wrap()

        if isinstance(self.calc, Dftb):
453
            if self.master:
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
                try:
                    a.set_calculator(self.calc)
                    E = a.get_potential_energy()
                    F = a.get_forces()
                    results = {'energy': E, 'forces': F}
                    calc_sp = SinglePointCalculator(a, **results)
                    a.set_calculator(calc_sp)
                    success = True
                except:
                    print('dftb failed on master', flush=True)
                    success = False
            else:
                success = None
            success = self.comm.bcast(success, root=0)
            if success == False:
                write('fail.traj', a)
                print('Raising error rank', self.comm.rank, flush=True)
                raise RuntimeError('DFTB evaluation failed')
472
473
474
475
476
477
478
479
            a = self.comm.bcast(a, root=0)
        else:
            a.set_calculator(self.calc)
            E = a.get_potential_energy()
            F = a.get_forces()
            results = {'energy': E, 'forces': F}
            calc_sp = SinglePointCalculator(a, **results)
            a.set_calculator(calc_sp)
480
481
482
483
484
485

        self.write(a)

        return a

    def write(self, a):
486
487
        """ Method for writing new evaluated structures to file.
        """
488
489
490
        if self.trajectory is not None:
            self.trajectory.write(a)

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
491
    def dump(self, data):
492
493
494
        """ Method to save restart-file used if the search is
        restarted from some point in the search. 
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
495
496
        if self.comm.rank == 0 and self.restart is not None:
            pickle.dump(data, open(self.restart, "wb"), protocol=2)
497

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
498
    def read(self):
499
500
501
        """ Method to restart a search from the restart-file and the
        trajectory-file containing all structures evaluated so far.
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
502
503
504
505
        self.steps, self.population, random_state = pickle.load(open(self.restart, "rb"))
        np.random.set_state(random_state)
        training_structures = read(self.traj_name, index=':')
        self.gpr.memory.save_data(training_structures)