gpr.py 10.6 KB
Newer Older
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
1
import numpy as np
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
2
3
from scipy.linalg import cholesky, cho_solve, solve_triangular
from scipy.optimize import minimize
4
from scipy.optimize import fmin_l_bfgs_b
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
5

6
7
8
9
from surrogate.kernel import gauss_kernel, double_gauss_kernel
from surrogate.descriptor.fingerprint import Fingerprint
from surrogate.prior.prior import repulsive_prior
from surrogate.gpr_calculator import gpr_calculator
10

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
11
import warnings
12
13
14
15
16
17

class gpr_memory():
    def __init__(self, descriptor, prior):
        self.descriptor = descriptor
        self.prior = prior

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
18
        self.initialize_data()
19

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    def initialize_data(self):
        self.energies = None
        self.features = None
        self.prior_values = None
        
    def get_data(self):
        return self.energies, self.features, self.prior_values
        
    def save_data(self, atoms_list, add_data=True):
        if not add_data:
            self.initialize_data()
        
        self.save_energies(atoms_list)
        self.save_features(atoms_list)
        self.save_prior_values(atoms_list)
35

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
36
37
38
    def save_energies(self, atoms_list):
        energies_save = np.array([a.get_potential_energy() for a in atoms_list])
        if self.energies is None:
39
40
41
42
            self.energies = energies_save
        else:
            self.energies = np.r_[self.energies, energies_save]
    
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
43
44
45
    def save_features(self, atoms_list):
        features_save = self.descriptor.get_featureMat(atoms_list)
        if self.features is None:
46
            self.features = features_save
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
47
        else:
48
49
            self.features = np.r_[self.features, features_save]

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
50
51
52
53
54
55
56
57
58
59
    def save_prior_values(self, atoms_list):
        if self.prior is not None:
            prior_values_save = np.array([self.prior.energy(a) for a in atoms_list])
            if self.prior_values is None:
                self.prior_values = prior_values_save
            else:
                self.prior_values = np.r_[self.prior_values, prior_values_save]
        else:
            self.prior_values = 0

60
class GPR():
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
61
62
63
64
65
66
67
68
69
70
    """Gaussian Process Regression
    
    Parameters:
    
    descriptor:
    
    kernel:
    
    prior:
    """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
71
    def __init__(self, descriptor=None, kernel='double', prior=None, n_restarts_optimizer=1):
72
73
74
75
76
77
78
79
80
        if descriptor is None:
            self.descriptor = Fingerprint()
        else:
            self.descriptor = descriptor

        if kernel is 'single':
            self.kernel = gauss_kernel()
        elif kernel is 'double':
            self.kernel = double_gauss_kernel()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
81
82
        else:
            self.kernel = kernel
83
84
85
86
87
88

        if prior is None:
            self.prior = repulsive_prior()
        else:
            self.prior = prior

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
89
90
        self.n_restarts_optimizer = n_restarts_optimizer

91
        self.memory = gpr_memory(self.descriptor, self.prior)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
92

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
93
    def predict_energy(self, a, eval_std=False):
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
94
95
96
        x = self.descriptor.get_feature(a)
        k = self.kernel.kernel_vector(x, self.X)

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
97
98
99
100
101
        E = np.dot(k,self.alpha) + self.bias + self.prior.energy(a)

        if eval_std:
            # Lines 5 and 6 in GPML
            vk = np.dot(self.K_inv, k)
102
103
            #K0 = self.kernel.kernel(x,x)
            E_std = np.sqrt(self.K0 - np.dot(k, vk))
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
            return E, E_std
        else:
            return E

    def predict_forces(self, a, eval_std=False):

        # Calculate descriptor and its gradient
        x = self.descriptor.get_feature(a)
        x_ddr = self.descriptor.get_featureGradient(a).T

        # Calculate kernel and its derivative
        k_ddx = self.kernel.kernel_jacobian(x, self.X)
        k_ddr = np.dot(k_ddx, x_ddr)

        F = -np.dot(k_ddr.T, self.alpha) + self.prior.forces(a)

        if eval_std:
            k = self.kernel.kernel_vector(x, self.X)
            vk = np.dot(self.K_inv, k)
123
124
            #K0 = self.kernel.kernel_value(x,x)
            g = self.K0 - np.dot(k.T, vk)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
125
            assert g >= 0
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
126
127
128
            F_std = 1/np.sqrt(g) * np.dot(k_ddr.T, vk)
            #F_std = -1/np.sqrt(g) * np.dot(k_ddr.T, vk)
            return F.reshape((-1,3)), F_std.reshape(-1,3)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
129
        else:
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
130
            return F.reshape(-1,3)
131

132
    def update_bias(self):
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
133
134
135
136
137
        self.bias = np.mean(self.memory.energies - self.memory.prior_values)

    def train(self, atoms_list=None):
        if atoms_list is not None:
            self.memory.save_data(atoms_list)
138
139

        self.update_bias()
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
140
141
        self.E, self.X, self.prior_values = self.memory.get_data()
        self.Y = self.E - self.prior_values - self.bias
142
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
143
144
        K = self.kernel(self.X)
        L = cholesky(K, lower=True)
145
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
146
147
        self.alpha = cho_solve((L, True), self.Y)
        self.K_inv = cho_solve((L, True), np.eye(K.shape[0]))
148
        self.K0 = self.kernel.kernel_value(self.X[0], self.X[0])
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
149
150
151
152
    
    def optimize_hyperparameters(self, atoms_list=None, comm=None):
        if self.n_restarts_optimizer == 0:
            self.train(atoms_list)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
153

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
154
155
        if atoms_list is not None:
            self.memory.save_data(atoms_list)
156
157

        self.update_bias()
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
158
159
        self.E, self.X, self.prior_values = self.memory.get_data()
        self.Y = self.E - self.prior_values - self.bias
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
160

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
161
162
163
164
        results = []
        for i in range(self.n_restarts_optimizer):
            theta_initial = np.random.uniform(self.kernel.theta_bounds[:, 0],
                                              self.kernel.theta_bounds[:, 1])
165
166
167
168
169
170
171
172
173
174
            if i == 0:
                # Make sure that the previously currently choosen
                # hyperparameters are always tried as initial values.
                if comm is not None:
                    # But only on a single communicator, if multiple are present.
                    if comm.rank == 0:
                        theta_initial = self.kernel.theta
                else:
                    theta_initial = self.kernel.theta
                        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
175
176
177
            res = self.constrained_optimization(theta_initial)
            results.append(res)
        index_min = np.argmin(np.array([r[1] for r in results]))
178
179
180
181
182
        result_min = results[index_min]
        #print('rank:', comm.rank, 'theta_init:', theta_initial)
        #comm.barrier()
        #print('rank:', comm.rank, 'lml:', -result_min[1], 'kernel:', result_min[0])
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
183
        if comm is not None:
184
185
186
187
188
189
190
191
192
193
194
        # Find best hyperparameters among all communicators and broadcast.
            results_all = comm.gather(result_min, root=0)
            if comm.rank == 0:
                index_all_min = np.argmin(np.array([r[1] for r in results_all]))
                result_min = results_all[index_all_min]
            else:
                result_min = None
            result_min = comm.bcast(result_min, root=0)
                
        self.kernel.theta = result_min[0]
        self.lml = -result_min[1]
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
195

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
196
        self.train()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
197
    
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
198
199
200
    def neg_log_marginal_likelihood(self, theta=None, eval_gradient=True):
        if theta is not None:
            self.kernel.theta = theta
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
201

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
202
203
204
205
        if eval_gradient:
            K, K_gradient = self.kernel(self.X, eval_gradient)
        else:
            K = self.kernel(self.X)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
206

207
208
209
210
211
212
213
214
        """
        try:
            L = cholesky(K, lower=True)
        except Exception as err:
            print('gpr: err:\n',err)
            print('gpr: K:\n',K)
            print(np.any(np.isinf(K)))
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
215
216
217
218
219
220
221
        L = cholesky(K, lower=True)
        alpha = cho_solve((L, True), self.Y)

        #log_likelihood = -0.5 * np.einsum("i,i", Y, alpha)
        lml = -0.5 * np.dot(self.Y, alpha)
        lml -= np.sum(np.log(np.diag(L)))
        lml -= K.shape[0]/2 * np.log(2*np.pi)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
222
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
223
224
225
226
227
228
229
230
231
232
233
        if eval_gradient:
            # Equation (5.9) in GPML
            K_inv = cho_solve((L, True), np.eye(K.shape[0]))
            tmp = np.einsum("i,j->ij", alpha, alpha) - K_inv

            lml_gradient = 0.5*np.einsum("ij,kij->k", tmp, K_gradient)
            return -lml, -lml_gradient
        else:
            return -lml

    def constrained_optimization(self, theta_initial):
234
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
235
236
237
238
239
240
241
242
243
        result = minimize(self.neg_log_marginal_likelihood,
                          x0=theta_initial,
                          bounds=self.kernel.theta_bounds,
                          method='L-BFGS-B',
                          jac=True,
                          options={'gtol': 1e-2, 'ftol': 1e-4})
        if not result.success:
            warnings.warn(f"L_BFGS_B terminated with state: {result.message}")
        return result.x, result.fun
244
245
246
247
248
249
250
251
252
253
        """
        theta_opt, func_min, convergence_dict = \
            fmin_l_bfgs_b(self.neg_log_marginal_likelihood,
                          theta_initial,
                          bounds=self.kernel.theta_bounds)
        if convergence_dict["warnflag"] != 0:
            warnings.warn("fmin_l_bfgs_b terminated abnormally with the "
                          " state: %s" % convergence_dict)
        return theta_opt, func_min
    
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
254
255
256
257
258
259
260
261
262
263
264
265
266
267

    def numerical_neg_lml(self, dx=1e-4):
        N_data = self.X.shape[0]
        theta = np.copy(self.kernel.theta)
        N_hyper = len(theta)
        lml_ddTheta = np.zeros((N_hyper))
        for i in range(N_hyper):
            theta_up = np.copy(theta)
            theta_down = np.copy(theta)
            theta_up[i] += 0.5*dx
            theta_down[i] -= 0.5*dx

            lml_up = self.neg_log_marginal_likelihood(theta_up, eval_gradient=False)
            lml_down = self.neg_log_marginal_likelihood(theta_down, eval_gradient=False)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
268
            lml_ddTheta[i] = (lml_up - lml_down)/dx
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
269
        return lml_ddTheta
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

    def numerical_forces(self, a, dx=1e-4, eval_std=False):
        Na, Nd = a.positions.shape
        if not eval_std:
            F = np.zeros((Na,Nd))
            for ia in range(Na):
                for idim in range(Nd):
                    a_up = a.copy()
                    a_down = a.copy()
                    a_up.positions[ia,idim] += 0.5*dx
                    a_down.positions[ia,idim] -= 0.5*dx
                    
                    E_up = self.predict_energy(a_up)
                    E_down = self.predict_energy(a_down)
                    F[ia,idim] = -(E_up - E_down)/dx
            return F
        else:
            F = np.zeros((Na,Nd))
            Fstd = np.zeros((Na,Nd))
            for ia in range(Na):
                for idim in range(Nd):
                    a_up = a.copy()
                    a_down = a.copy()
                    a_up.positions[ia,idim] += 0.5*dx
                    a_down.positions[ia,idim] -= 0.5*dx
                    
                    E_up, Estd_up = self.predict_energy(a_up, eval_std=True)
                    E_down, Estd_down = self.predict_energy(a_down, eval_std=True)
                    F[ia,idim] = -(E_up - E_down)/dx
299
                    Fstd[ia,idim] = -(Estd_up - Estd_down)/dx
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
300
            return F, Fstd
301
302
303

    def get_calculator(self, kappa):
        return gpr_calculator(self, kappa)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
304