Commit eb6e00c2 authored by Jonathan Juhl's avatar Jonathan Juhl
Browse files

all

parent 843de6f4
......@@ -4,41 +4,24 @@ import numpy as np
from super_clas_sortem import super_class
from os.path import join,getsize,isdir,isfile,dirname,basename
from os import listdir,rename
from fac_sortem import DynAE
import csv
#from fac_sortem import DynAE
from os import listdir,mkdir
import glob
from os.path import isabs
from fac_sortem import DynAE
class control_flow:
def __init__(self,num_gpus,gpu_list,num_cpus,num_clusters,star,deep_NN_batch_size,workdir,half_precision,max_particles,epochs,transfer_learning,verbose):
self.workdir = workdir
self.verbose = verbose
if not isdir(workdir): # check if all dirs are their
mkdir(workdir)
if not isdir(join(workdir,'gui_display')):
mkdir(join(workdir,'gui_display'))
if not isdir(join(workdir,'particle_stack_dir')):
mkdir(join(workdir,'particle_stack_dir'))
np.save(join(join(workdir,'gui_display'),'parameters.npy'),np.asarray([0.0,0.0,"preprocessing",0.0,0.0,0,0,'poul_nissen.jpg']))
def __init__(self,parameter_file_path):
self.epochs = epochs
if not isinstance(star, list) and isdir(star):
star_files = glob.glob(star)
if star_files == []:
print("no star files in directory. You must point to atleast one star file to run Sortinator.")
exit()
star_files = [join(star,i) for i in star_files]
else:
star_files = star
def get_star_file_parameters(star_files,path):
super_class.__init__(self
,parameter_file_path
)
def get_star_file_parameters(star_files):
counter = []
f = 0
......@@ -46,7 +29,9 @@ class control_flow:
star_files = np.unique(star_files)
labels_list = []
count = 0
for z in star_files:
#print(z)
c = z[::-1].split('/',1)[1][::-1]
#c = '/emcc/au587640/cryosparc2_data/P139/'
......@@ -78,8 +63,15 @@ class control_flow:
take_this = i
header = take_this
name = header.index('_rlnImageName')
voltage = header.index('_rlnVoltage')
defocusU = header.index('_rlnDefocusU')
defocusV = header.index('_rlnDefocusV')
defocusAngle = header.index('_rlnDefocusAngle')
abberation = header.index('_rlnSphericalAberration')
amp_contrast = header.index('_rlnAmplitudeContrast')
phase_shift = header.index('_rlnPhaseShift')
ctf_params = []
if self.verbose:
try:
class_num = header.index('_rlnClassNumber')
......@@ -94,44 +86,79 @@ class control_flow:
if len(header)== len(row.split()):
f+=1
current_name = row.split()[name].split('@')[1]
if len(names) != 0:
if names[-1] != current_name:
names.append(join(c,current_name))
else:
names.append(join(c,current_name))
if self.verbose:
labels_list.append(int(row.split()[class_num]))
labels_list.append(int(row.split()[class_num]))
V = float(row.split()[voltage])
electron_volts = (1.23*10**3)/np.sqrt(V*(V*10**(-7)*1.96+1))
abberation_d = float(row.split()[abberation])
amp_contrast_d = float(row.split()[amp_contrast])
ctf_params.append([float(row.split()[phase_shift]),float(row.split()[defocusU]),float(row.split()[defocusV]),float(row.split()[defocusAngle])])
current_id = row.split()[name].split('@')[0]
names.append(join(c,current_name))
np.save(join(join(self.workdir, 'particle_stack_dir'),'labels'),np.asarray(labels_list))
np.save(join(self.particle_stack_dir,'depth.npy'),f)
np.save(join(self.particle_stack_dir,'names.npy'),names)
np.save(join(self.particle_stack_dir,'electron_volts.npy'),electron_volts)
np.save(join(self.particle_stack_dir,'spherical_abberation.npy'),abberation_d)
np.save(join(self.particle_stack_dir,'amplitude_contrast.npy'),amp_contrast_d)
np.save(join(self.particle_stack_dir,'ctf_params.npy'),np.asarray(ctf_params))
np.save(join(self.particle_stack_dir,'labels.npy'),np.asarray(labels_list))
return f,np.unique(names)
if not isinstance(self.star, list) and isdir(star):
star_files = glob.glob(self.star)
if star_files == []:
print("no star files in directory. You must point to atleast one star file to run Sortinator.")
exit()
star_files = [join(star,i) for i in star_files]
else:
star_files = self.star
if not isfile(join(self.particle_stack_dir,'depth.npy')):
depth,mrc_paths = get_star_file_parameters(star_files)
else:
depth = np.load(join(self.particle_stack_dir,'depth.npy'))
mrc_paths = np.load(join(self.particle_stack_dir,'names.npy'))
length,bytes_pr_record = self.get_parameters(mrc_paths[0])
depth,mrc_paths = get_star_file_parameters(star_files,star)
length,bytes_pr_record = self.get_parameters(mrc_paths)
self.add_params(parameter_file_path,'current_image.png',bytes_pr_record,depth,length)
DynAE( mrc_paths,
workdir,
length,
bytes_pr_record,
num_cpus,
num_gpus,
depth,
num_clusters,
gpu_list,
half_precision,
deep_NN_batch_size,
transfer_learning,
max_particles,
epochs,
verbose)
DynAE(parameter_file_path,mrc_paths)
self.write_star_file(star_files,labels)
final_labels = np.load(join(self.refined,'final_labels.npy'))
self.write_star_file(star_files,final_labels)
def add_params(self,parameter_file_path,current_image,binary,num_particles,width):
with open(join(parameter_file_path,'parameters.csv'), 'r', newline='') as file:
writer = csv.reader(file, delimiter = '\t')
parameters = list(writer)[0]
parameters[-1] = width
parameters[-2] = num_particles
parameters[-3] = binary
parameters[-4] = current_image
with open(join(parameter_file_path,'parameters.csv'), 'w', newline='') as file:
writer = csv.writer(file, delimiter = '\t')
writer.writerow(parameters)
def write_star_file(self,star_files,labels):
names = []
......@@ -150,7 +177,7 @@ class control_flow:
names.append(row.split())
for index,i in enumerate(np.unique(labels)):
f = open(join(join(self.workdir,'output_star_files'),'cluster_%s.star' %index), 'w')
f = open(join(self.star_files,'cluster_%s.star' %index), 'w')
with f:
......@@ -185,7 +212,7 @@ class control_flow:
def get_parameters(self,paths):
width,length,depth,record_size_new = self.read(paths[0])
width,length,depth,record_size_new = self.read(paths)
return length,record_size_new
This diff is collapsed.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import LogNorm
from scipy.stats.kde import gaussian_kde
a = np.random.multivariate_normal([0,0], [[ 1, 0], [0, 0.01]], check_valid='warn', tol=1e-8,size=[10000])
b = np.random.multivariate_normal([0,0], [[ 1, 0], [0, 0.01]], check_valid='warn', tol=1e-8,size=[10000])
c = np.random.multivariate_normal([0,0], [[ 1, 0], [0, 0.01]], check_valid='warn', tol=1e-8,size=[10000])
s = np.concatenate([a,b,c],axis=0)
x = s[:,0]
y = s[:,1]
H, xedges, yedges =np.histogram2d(x, y, bins=100)
plt.pcolormesh(xedges, yedges , H ,alpha=0.9)
import matplotlib
matplotlib.use('Agg')
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, UpSampling2D,Activation,GlobalAveragePooling2D,PReLU,Conv2DTranspose,BatchNormalization,Flatten
import matplotlib.pyplot as plt
plt.switch_backend('agg')
from mrc_loader_sortem import mrc_loader
import numpy as np
from os.path import join
from os.path import isfile
from super_clas_sortem import super_class
from models import Unet
class Denoise(super_class):
def __init__(self,parameter_file_path,not_projected_star_files,projected_star_files,bytes_pr_record,validate=None,val_bytes=None):
super_class.__init__(self ,parameter_file_path)
self.not_projected_star_files = not_projected_star_files
self.validate = validate
self.val_bytes = val_bytes
self.projected_star_files = projected_star_files
self.paths = [self.not_projected_star_files,self.projected_star_files]
self.bytes_pr_record = bytes_pr_record
num_steps = (self.max_particles/(self.batch_size*self.num_gpus))*self.epochs
self.steps = num_steps
self.opt = self.optimizer(self.steps)
self.unet = Unet()
self.train()
@tf.function
def mask(self,image):
image = tf.squeeze(image)
x = tf.range(64)
s = tf.random.shuffle(x)
_,y = tf.meshgrid(x,x)
rs = tf.reduce_sum(tf.cast(tf.equal(y,tf.roll(s,int(64/2),axis=0)),self.precision)*image,axis=2)
selected_pixels = tf.cast(tf.equal(y,s),self.precision)
not_selected_pixels = tf.cast(tf.not_equal(y,s),self.precision)
m_image = tf.transpose(tf.transpose(tf.stack([selected_pixels]*self.batch_size,axis=0),perm=[1,0,2])*rs,perm=[1,0,2])
return m_image+not_selected_pixels*image,selected_pixels
@tf.function
def loss(self,image,estimate,mask):
a = Flatten()(image)
b = Flatten()(estimate)
c = tf.reshape(mask,[-1])
mean = tf.reduce_mean(((a-b)*c)**2)
return mean
def plotlib(self,image,raw_image):
s = np.concatenate(np.split(image,image.shape[0]),axis=1)
t = np.concatenate(np.split(raw_image,raw_image.shape[0]),axis=1)
plt.imshow(np.squeeze(np.concatenate([s,t],axis=2)),cmap='gray')
plt.savefig(join(self.results,'image_signal.png'),)
@tf.function
def predict_net_low(self,raw_data):
stage3_img = self.unet(raw_data)
return stage3_img
@tf.function
def train_net_L(self,raw_data_image):
raw_data_image = tf.cast(raw_data_image,self.precision)
swaped_pixels,mask = self.mask(raw_data_image)
#plt.imshow(swaped_pixels[0])
#plt.savefig('test.png');exit()
swaped_pixels = tf.expand_dims(swaped_pixels,axis=-1)
with tf.GradientTape() as tape:
estimate= self.unet(swaped_pixels)
loss= self.loss(raw_data_image,estimate,mask)
variables = self.unet.trainable_weights
self.apply_grad(loss,variables,tape)
return loss
def train(self):
strategy,distribute = self.generator('contrastive',self.validate,self.val_bytes,self.batch_size)
dis = iter(distribute)
if self.validate != None:
strategy_validate,distribute_val = self.generator('predict',self.validate,self.val_bytes,self.predict_batch_size)
dis_val = iter(distribute_val)
pred_data = next(dis_val)
if self.verbose:
pred_data,y = pred_data
if not isfile(join(self.models,'unet.index')):
ite = 1
while True:
raw_data_image,perm = next(dis)
loss = strategy.run(self.train_net_L,args=(perm,))
if ite % self.validate_interval == 0 and self.validate:
validation_images = strategy_validate.run(self.predict_net_low,args=(pred_data,))
self.plotlib(validation_images,pred_data)
self.unet.save_weights(join(self.models,'unet'))
print("step:%i of %i" %(ite,self.steps),loss.numpy())
ite +=1
if self.steps > ite:
break
\ No newline at end of file
......@@ -2,6 +2,10 @@
import argparse
from execute_sortem import control_flow
import csv
from os import mkdir
from os.path import isdir,join
def main():
parser = argparse.ArgumentParser(description='Run sortinator.')
......@@ -13,7 +17,10 @@ def main():
parser.add_argument('--star', type=str, nargs='+',
help='list of path to the star files, wild cards are accepted. The star file must refer to the .mrc files')
parser.add_argument('--ab', type=int,default=200,
help='deep learning model training batch')
help='deep learning model training batch')
parser.add_argument('--pb', type=int,default=200,
help='deep learning model training batch')
parser.add_argument('--o', type=str,default='./results',
help='output directory')
parser.add_argument('--f16', type=str,default="False",
......@@ -26,14 +33,40 @@ def main():
parser.add_argument('--log', type=str,default="False",help='log all possible values to file (loss, pca_components,NMI,Recall,false positives,false negatives.')
parser.add_argument('--num_classes',type=int,default=4,help='the number subparts in the refinement process.')
parser.add_argument('--num_parts',type=int,default=2,help='the number subparts in the refinement process. This is to improve accuracy for highly similar proteins')
parser.add_argument('--resize',type=int,default=256,help='Image to resize to for training and inference. If the image is less than 256 consider downsizing. Minimum size is 128')
parser.add_argument('--part_resize',type=int,default=128,help='When refining the components the image size is usually 128')
parser.add_argument('--early_stopping',type=float,default=0.001,help='when the model has converged. Stops early if max iter not reached.')
parser.add_argument('--dbscan',type=int,default=50000,help='The number of samples to use for dbscan. This scales a lot with memory. Default is 50000')
parser.add_argument('--lr',type=float,default=0.002,help='The learning rate of the model')
parser.add_argument('--vi',type=float,default=40,help='The validation interval')
parser.add_argument('--eps',type=float,default=0.01,help='The epsilon of the data.')
parser.add_argument('--nb',type=int,default=10,help='Nearest neighbours')
parser.add_argument('--pr',type=str,default='False',help='pretrain the signal model')
parser.add_argument('--mb',type=int,default=1,help='pretrain the signal model')
parser.add_argument('--ss',type=int,default=20,help='pretrain the signal model')
parser.add_argument('--rs',type=int,default=32,help='pretrain the signal model')
parser.add_argument('--val', type=str, nargs='+',help='Use a validation set which is different from the training set.')
parser.add_argument('--rab', type=int, default=5,help='The Denoiser to Train for the image denoising.')
parser.add_argument('--lam',type=int,default=0.2,help='The lambda weight value for the pretraining loss of the autoencoder')
args = parser.parse_args()
num_gpus = args.num_gpus
if args.gpu_list == None:
......@@ -42,52 +75,52 @@ def main():
if len(args.gpu_list) != args.num_gpus:
print("the list of gpus to use is not the same length as the number of gpus specified. The number of gpus will be changed to match list length")
num_gpus = len(args.gpu_list)
gpu_list = args.gpu_list
gpu_list = []
for s in range(num_gpus):
gpu_list.append('/GPU:%i' %s)
else:
gpu_list = []
num_gpus = len(args.gpu_list)
for s in range(num_gpus):
gpu_list.append('/GPU:%i' %s)
batch_size = args.ab
pred_batch_size = args.pb
num_cpus = args.num_cpus
star = args.star
train_light = args.tr
deep_NN_batch_size = args.ab
epochs = args.epochs
output = args.o
half_precision = args.f16
verbose = args.log
max_particles = args.mp
num_clusters = args.num_classes
if train_light =='False':
train_light = False
else:
train_light = True
num_clusters = args.num_parts
binary = 0
num_particles = 0
width = 0
s1 = ' '.join(gpu_list)
l = [args.ab,args.pb,args.num_parts,args.num_cpus,args.num_gpus,args.vi,' '.join(gpu_list),args.f16,args.tr,args.epochs,args.log,args.mp,args.eps,args.nb,' '.join(args.star),args.dbscan,args.lam,args.lr,args.val,args.mb,args.ss,args.rs,args.pr,'current_image',binary,num_particles,width]
if not isdir(args.o):
mkdir(args.o)
with open(join(args.o,'parameters.csv'), 'w', newline='') as file:
writer = csv.writer(file, delimiter = '\t')
writer.writerow(l)
if verbose == 'True':
verbose = True
else:
verbose = False
if half_precision == 'False':
half_precision = False
else:
half_precision = True
control_flow(num_gpus,
gpu_list,
num_cpus,
num_clusters,
star,
deep_NN_batch_size,
output,
half_precision,
max_particles,
epochs,
train_light,
verbose)
control_flow(args.o)
exit()
#if __name__ == "__main__":
......
from super_clas_sortem import MaxBlurPooling2D,super_class
from tensorflow.keras import Model
import tensorflow as tf
from tensorflow.keras.layers import Layer, Flatten, Lambda, InputSpec, Input,LeakyReLU,Conv2DTranspose, Dense,Conv2D,GlobalAveragePooling2D,BatchNormalization,Activation,UpSampling2D,Conv3D,Conv3DTranspose,LeakyReLU,Dense,UpSampling3D,MaxPool3D,MaxPool2D
from utils_sortem import Spectral_norm,Instance_norm,transform_3D
def AdaIn_3D(inputs,s1,b1):
b= tf.shape(inputs)[0]
w= tf.shape(inputs)[1]
h= tf.shape(inputs)[2]
d= tf.shape(inputs)[3]
c= tf.shape(inputs)[4]
tmp = Flatten()(inputs)
mean = tf.reduce_mean(tmp,axis=1)
std = tf.math.reduce_std(tmp,axis=1)
out = s1*((inputs - mean)/std) + b1
out = tf.reshape(out,[b,w,h,d,c])
return out
def AdaIn_2D(inputs,s1,b1):
b= tf.shape(inputs)[0]
w= tf.shape(inputs)[1]
h= tf.shape(inputs)[2]
c= tf.shape(inputs)[3]
inputs = Flatten()(inputs)
mean = tf.reduce_mean(inputs,axis=1)
std = tf.math.reduce_std(inputs,axis=1)
out = s1*((inputs - mean)/std) + b1
out = tf.reshape(out,[b,w,h,c])
class Double_Dense(Model):
def __init__(self,channels):
super(Double_Dense,self).__init__()
self.dense_1 = Dense(channels)
self.dense_2 = Dense(channels)
def call(self,x):
x = Flatten()(x)
s = self.dense_1(x)
c = self.dense_2(x)
return s,c
class Generator_AdaIN_Noise(Model):
def __init__(self,gf_dim=32):
self.gf_dim = gf_dim
super(Generator_AdaIN_Noise,self).__init__()
self.zmap_0 = Double_Dense( self.gf_dim * 32)
self.h0 = AdaIn_2D
self.h0_a = LeakyReLU()
self.h1= Conv2DTranspose( self.gf_dim * 32,3,strides=2)
self.z_map_1= Double_Dense(self.gf_dim * 32)
self.h1_aI = AdaIn_2D
self.h1 = LeakyReLU()
self.h2 = Conv2DTranspose(self.gf_dim * 16,3,strides=2)
self.h2_a = LeakyReLU()
self.z_map_2= Double_Dense(self.gf_dim * 16)
self.h2_aI = AdaIn_2D
self.h2 = LeakyReLU()
self.h5 = Conv2DTranspose(self.gf_dim*4,4,strides=2)
self.z_map_4 = Double_Dense(self.gf_dim*4)
self.h6 = AdaIn_2D
self.h6_a = LeakyReLU()
self.h7 = Conv2DTranspose(self.gf_dim*2,4,strides=2)
self.z_map_5 = Double_Dense(self.gf_dim*2)
self.h7_in = AdaIn_2D
self.h7_a = LeakyReLU()
self.h8 = Conv2DTranspose(self.gf_dim,4,strides=2)
self.z_map_6 =