super_clas_sortem.py 6.96 KB
Newer Older
Jonathan Juhl's avatar
Jonathan Juhl committed
1
2
3
4
5

from os.path import join,isdir,getsize,isfile
from os import mkdir,listdir
import tensorflow as tf
import numpy as np
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
6
7
8
import csv
from mrc_loader_sortem import mrc_loader 
import ast
Jonathan Juhl's avatar
Jonathan Juhl committed
9
10
11
class super_class:


Jonathan Juhl's avatar
all    
Jonathan Juhl committed
12
    def __init__(self,work_dir,or_star=None,proj_star=None,bytes_list=None):
Jonathan Juhl's avatar
Jonathan Juhl committed
13
14
            if not isdir(work_dir):
                mkdir(work_dir)
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
15
16
17
18
19
20
21
            
       
            with open(join(work_dir,'parameters.csv'), 'r', newline='') as file:
                writer = csv.reader(file, delimiter = '\t')
                parameters = list(writer)[0]
            self.or_star = or_star
            self.proj_star = proj_star
Jonathan Juhl's avatar
Jonathan Juhl committed
22
            self.work_dir = work_dir
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
23
            self.bytes_list = None
Jonathan Juhl's avatar
Jonathan Juhl committed
24
25
26
27
28
29
30
            self.refined = join(work_dir,'refined')
            self.unrefined = join(work_dir,'unrefined')
            self.particle_stack_dir = join(work_dir,'particle_stack_dir')
            self.models = join(work_dir,'models')
            self.star_files = join(work_dir,'output_star_files')
            self.prediction_data = join(work_dir,'prediction_data')
            self.results = join(work_dir,'gui_display')
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
31
32
33
34
35
36
37
38
39
40
41
42
43
     
         
            self.batch_size = int(parameters[0])  # batch size for gpu
            self.predict_batch_size = int(parameters[1])
            self.num_parts = int(parameters[2]) # number of sub images of the protein to compute
    
            self.num_cpus = int(parameters[3]) # number of cpus to assign for file reading and preprocessing.
            self.num_gpus =  int(parameters[4]) # number of graphic cards to predict and train on images
            self.validate_interval = int(parameters[5])
            self.gpu_list =  str(parameters[6]).split() # list of gpu devices to use. 

            if  str(parameters[7]) == 'True': # to use float16 appropiately or not.
                self.half_precision = True
Jonathan Juhl's avatar
Jonathan Juhl committed
44
                self.precision = tf.float16
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
45
46
            else:
                self.half_precision = False
Jonathan Juhl's avatar
Jonathan Juhl committed
47
48
                self.precision = tf.float32
    
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
49
            
Jonathan Juhl's avatar
Jonathan Juhl committed
50
            if  str(parameters[8]) == 'True':
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
51
52
53
54
                self.verbose =  True # perform statistics on known dataset
            else:
                self.verbose  = False

Jonathan Juhl's avatar
Jonathan Juhl committed
55
56
57
            self.epochs = int(parameters[9])
     

Jonathan Juhl's avatar
all    
Jonathan Juhl committed
58
59
60
            if self.half_precision: # set the model to comptue in float16 (speeds computation up to 2.5 times) if float16 is True
              policy =  tf.keras.mixed_precision.Policy('mixed_float16')
              mixed_precision.set_global_policy(policy)
Jonathan Juhl's avatar
Jonathan Juhl committed
61
62
63
64
65
66
              
            self.max_particles = int(parameters[10])
            self.star =                 str(parameters[11]).split()
            self.lr  = float(parameters[12])
            if  str(parameters[13]) == 'True':
                self.angels  =  True # perform statistics on known dataset
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
67
            else:
Jonathan Juhl's avatar
Jonathan Juhl committed
68
69
70
71
                self.angels  = False
    
            if  str(parameters[14]) == 'True':
                self.ctf  =  True # perform statistics on known dataset
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
72
            else:
Jonathan Juhl's avatar
Jonathan Juhl committed
73
74
75
76
77
78
79
                self.ctf  = False
     
            if  str(parameters[15]) == 'True':
                self.noise  =  True # perform statistics on known dataset
            else:
                self.noise  = False
          
Jonathan Juhl's avatar
Jonathan Juhl committed
80
81
82
            self.batch_size_mean =         int(parameters[16])
            self.interpolation_num_samples =         int(parameters[17])
            self.batch_size_angels = int(parameters[18])
Jonathan Juhl's avatar
Jonathan Juhl committed
83
          
Jonathan Juhl's avatar
Jonathan Juhl committed
84
85
86
87
88
89
            self.angular_cluster = int(parameters[19])
            self.feature_batch = int(parameters[20])
      
            self.bytes_pr_record = int(parameters[22]) # the number of bytes the image
            self.depth  = int(parameters[23]) # the number of bytes the image
            self.width = int(parameters[24]) # particle image size
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
90
       
Jonathan Juhl's avatar
Jonathan Juhl committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
       
            if not isdir(self.work_dir):
                    mkdir(self.work_dir)
            if not isdir( self.particle_stack_dir ):
                mkdir( self.particle_stack_dir )
            if not isdir(  self.unrefined ):
                     mkdir(     self.unrefined )
            if not isdir( self.refined):
                     mkdir(  self.refined)
            if not isdir(self.star_files ):
                mkdir(self.star_files )
            if not isdir(self.results ):
                mkdir(self.results )
            if not isdir(self.models):
                mkdir(self.models)

Jonathan Juhl's avatar
Jonathan Juhl committed
107
108
109
    def generator(self,path,byte,batch_size,scale):
        
        gen_prot = mrc_loader(path,self.particle_stack_dir,byte,self.width,self.precision,self.num_cpus,large_rescale=128,verbose=self.verbose,batch_size=batch_size)
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
110
        # make strategy ( multi gpu and single gpu and generate images).
Jonathan Juhl's avatar
Jonathan Juhl committed
111

Jonathan Juhl's avatar
Jonathan Juhl committed
112
        strategy,dist_dataset = self.make_strategy(gen_prot)
Jonathan Juhl's avatar
Jonathan Juhl committed
113

Jonathan Juhl's avatar
Jonathan Juhl committed
114
        return strategy,dist_dataset
Jonathan Juhl's avatar
Jonathan Juhl committed
115
116
117
118
119
120
121
122
123
124
125
126
            
    def check_dir(self,folder,substring):
        wordlist = list(listdir(folder ))
        combined = '\t'.join(wordlist)

        if substring in combined:

            return False

        else:
            return True

Jonathan Juhl's avatar
all    
Jonathan Juhl committed
127
128
129
130
131
132
133
134
135
136
    def apply_grad(self,loss,variables,tape):

        if self.half_precision: 
            loss = self.opt.get_scaled_loss(loss)
        
        gradients = tape.gradient(loss,variables)
        if  self.half_precision:
            gradients = self.opt.get_unscaled_gradients(gradients)
        self.opt.apply_gradients(zip(gradients, variables))
    
Jonathan Juhl's avatar
Jonathan Juhl committed
137
    def make_strategy(self,gen_prot):
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
138
139
140
141
142
143
144
145
        
        # check how many gpus to use
        if self.num_gpus > 1: 
            if  self.gpu_list == []: # if devices not listed which gpus to use then take from gpu:0-4
                gpu_list = []
                for i in range(self.num_gpus):
                        self.gpu_list.append("/device:GPU:%s" %i) 
                self.gpu_list  = gpu_list 
Jonathan Juhl's avatar
Jonathan Juhl committed
146
            gen = gen_prot.generate() # get mrc files, and generate images or permuted images
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
147
148
149
150
151

            strategy = tf.distribute.MirroredStrategy(self.gpu_list)
            dist_dataset = strategy.experimental_distribute_dataset(gen)
        else:

Jonathan Juhl's avatar
Jonathan Juhl committed
152
            gen = gen_prot.generate()
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
153
154
155
156
157
158
159
160
161
162
163
164
165
         
            if  self.gpu_list == []:
                gpu_list = []
                for i in range(self.num_gpus):
                        gpu_list.append("/device:GPU:%s" %i)   
                self.gpu_list  = gpu_list
            strategy =  tf.distribute.OneDeviceStrategy(self.gpu_list[0])
            dist_dataset = strategy.experimental_distribute_dataset(gen)
      
        return strategy,dist_dataset    
    def optimizer(self,total_steps): # use adam optimizer
        gs = tf.cast(tf.compat.v1.train.get_or_create_global_step(),tf.float32) # make a global step schedule
     
Jonathan Juhl's avatar
Jonathan Juhl committed
166
        lr = (self.lr)* (1 + tf.cos(gs / total_steps * np.pi)) # the cosine decay learning rate, the weights of the model are updated slower and slower.
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
167
        
168
        opt = tf.keras.optimizers.Adam(lr,beta_1=0.5,beta_2=0.999) # the optimizer 
Jonathan Juhl's avatar
all    
Jonathan Juhl committed
169
170
171
172
        
        if self.half_precision: # 
            opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt,loss_scale=123) # convert optimizer to handel float16
        return opt