Commit 8820cbc1 authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

added fall long case, script for plain xgb model

parent 5760e7af
......@@ -2,12 +2,8 @@
import numpy as np
import pandas as pd
import config as cfg
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import RobustScaler, MaxAbsScaler, QuantileTransformer
from sklearn.metrics import roc_auc_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tools import classifiers, data_loader
from pathlib import Path
from sklearn.base import BaseEstimator, TransformerMixin
......
......@@ -30,25 +30,28 @@ CLUSTERS_DIR = Path.joinpath(ROOT_DIR, 'models/clusters')
COMPLETE_DIR = Path.joinpath(ROOT_DIR, 'models/complete')
COMPLETE_TF_DIR = Path.joinpath(ROOT_DIR, 'models/complete/tensorflow')
COMPLETE_XGB_DIR = Path.joinpath(ROOT_DIR, 'models/complete/xgboost')
COMPLETE_CAT_DIR = Path.joinpath(ROOT_DIR, 'models/complete/catboost')
COMPLETE_RF_DIR = Path.joinpath(ROOT_DIR, 'models/complete/random_forest')
COMPLETE_EMB_DIR = Path.joinpath(ROOT_DIR, 'models/complete/embeddings')
COMPLETE_EMB_FULL_DIR = Path.joinpath(ROOT_DIR, 'models/complete/embeddings_full')
FALL_DIR = Path.joinpath(ROOT_DIR, 'models/fall')
FALL_TF_DIR = Path.joinpath(ROOT_DIR, 'models/fall/tensorflow')
FALL_XGB_DIR = Path.joinpath(ROOT_DIR, 'models/fall/xgboost')
FALL_CAT_DIR = Path.joinpath(ROOT_DIR, 'models/fall/catboost')
FALL_RF_DIR = Path.joinpath(ROOT_DIR, 'models/fall/random_forest')
FALL_EMB_DIR = Path.joinpath(ROOT_DIR, 'models/fall/embeddings')
SUCCESS_DIR = Path.joinpath(ROOT_DIR, 'models/success')
SUCCESS_TF_DIR = Path.joinpath(ROOT_DIR, 'models/success/tensorflow')
SUCCESS_XGB_DIR = Path.joinpath(ROOT_DIR, 'models/success/xgboost')
SUCCESS_CAT_DIR = Path.joinpath(ROOT_DIR, 'models/success/catboost')
SUCCESS_RF_DIR = Path.joinpath(ROOT_DIR, 'models/success/random_forest')
SUCCESS_EMB_DIR = Path.joinpath(ROOT_DIR, 'models/success/embeddings')
COMPLIANCE_DIR = Path.joinpath(ROOT_DIR, 'models/compliance')
COMPLIANCE_TF_DIR = Path.joinpath(ROOT_DIR, 'models/compliance/tensorflow')
COMPLIANCE_XGB_DIR = Path.joinpath(ROOT_DIR, 'models/compliance/xgboost')
COMPLIANCE_RF_DIR = Path.joinpath(ROOT_DIR, 'models/compliance/random_forest')
COMPLIANCE_EMB_DIR = Path.joinpath(ROOT_DIR, 'models/compliance/embeddings')
FALL_SHORT_DIR = Path.joinpath(ROOT_DIR, 'models/fall_short')
FALL_SHORT_TF_DIR = Path.joinpath(ROOT_DIR, 'models/fall_short/tensorflow')
FALL_SHORT_XGB_DIR = Path.joinpath(ROOT_DIR, 'models/fall_short/xgboost')
FALL_SHORT_RF_DIR = Path.joinpath(ROOT_DIR, 'models/fall_short/random_forest')
FALL_SHORT_EMB_DIR = Path.joinpath(ROOT_DIR, 'models/fall_short/embeddings')
FALL_LONG_DIR = Path.joinpath(ROOT_DIR, 'models/fall_long')
FALL_LONG_TF_DIR = Path.joinpath(ROOT_DIR, 'models/fall_long/tensorflow')
FALL_LONG_XGB_DIR = Path.joinpath(ROOT_DIR, 'models/fall_long/xgboost')
FALL_LONG_RF_DIR = Path.joinpath(ROOT_DIR, 'models/fall_long/random_forest')
FALL_LONG_EMB_DIR = Path.joinpath(ROOT_DIR, 'models/fall_long/embeddings')
GENERAL_FEATURES = ['Gender', 'Age', 'Cluster']
ATS_RESOLUTION = 50
EX_RESOLUTION = 9
ATS_DELIMITER = 6
THRESHOLD_WEEKS = 8
THRESHOLD_TRAINING = 10
......
#!/usr/bin/env python
import config as cfg
from tools import file_reader, file_writer, feature_maker
from tools import preprocessor, neural_embedder
from utility import embedder
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.decomposition import PCA
CASES = ["Complete", "Success", "Fall"]
USE_ATS_NAMES = True
def main():
cl = file_reader.read_csv(cfg.INTERIM_DATA_DIR, 'cl.csv',
converters={'CitizenId': str, 'Cluster': int})
df = file_reader.read_csv(cfg.INTERIM_DATA_DIR, 'screenings.csv',
converters={'CitizenId': str})
fd = file_reader.read_pickle(cfg.INTERIM_DATA_DIR, 'fd.pkl')
longterm_fall_df = make_longterm_fall(df, fd)
file_writer.write_csv(longterm_fall_df, cfg.PROCESSED_DATA_DIR, f'longterm_fall.csv')
for case in CASES:
df_full = make_dataset_full(cl, df, case)
file_writer.write_csv(df_full, cfg.PROCESSED_DATA_DIR, f'{case.lower()}.csv')
df_count = make_dataset_count(case)
file_writer.write_csv(df_count, cfg.PROCESSED_DATA_DIR, f'{case.lower()}_count.csv')
df_emb = make_dataset_emb(case)
file_writer.write_csv(df_emb, cfg.PROCESSED_DATA_DIR, f'{case.lower()}_emb.csv')
def make_longterm_fall(df, fd):
fd = fd.drop_duplicates(["CitizenId", "Date"])
df = preprocessor.split_cat_columns(df, col='Ats', tag='Ats',
resolution=cfg.ATS_RESOLUTION)
df = feature_maker.make_longterm_fall_feature(df, fd)
return df
def make_dataset_full(cl: pd.DataFrame, df: pd.DataFrame, case: str):
df['Cluster'] = cl['Cluster']
df = preprocessor.split_cat_columns(df, col='Ats', tag='Ats',
resolution=cfg.ATS_RESOLUTION)
if case == "Complete":
df = feature_maker.make_complete_feature(df)
elif case == "Success":
df = feature_maker.make_success_feature(df)
else:
df = feature_maker.make_fall_feature(df)
ats_cols = df.filter(regex='Ats', axis=1)
general_cols = df[['Gender', 'BirthYear', 'Cluster', 'LoanPeriod']]
df = pd.concat([general_cols, ats_cols, df[[case]]], axis=1)
if USE_ATS_NAMES:
df = preprocessor.replace_ats_values(df)
return df
def make_dataset_count(case: str):
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'{case.lower()}.csv',
converters=ats)
if USE_ATS_NAMES:
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, 'Ats_')
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + [case]]
df = df.drop(['Ats_0'], axis=1)
return df
def make_dataset_emb(case: str):
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'{case.lower()}.csv',
converters=ats)
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = case
train_ratio = 0.9
X_train, X_val, y_train, y_val, labels = preprocessor.prepare_data_for_embedder(df_to_enc,
target_name,
train_ratio)
if case == "Complete":
artifacts_path = cfg.COMPLETE_EMB_DIR
epochs = 5
elif case == "Success":
artifacts_path = cfg.SUCCESS_EMB_DIR
epochs = 5
else:
artifacts_path = cfg.FALL_EMB_DIR
epochs = 20
params = {"df": df_to_enc,
"target_name": target_name,
"train_ratio": train_ratio,
"network_layers": ([128]),
"epochs": epochs,
"batch_size": 128,
"verbose": False,
"artifacts_path": artifacts_path}
network = neural_embedder.NeuralEmbedder(**params)
network.fit(X_train, y_train, X_val, y_val)
network.save_model()
embedded_weights = network.get_embedded_weights()
network.save_weights(embedded_weights)
network.save_labels(labels)
network.make_visualizations_from_network(extension='png')
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
embedded_df = df.iloc[:, n_numerical_cols:df.shape[1]-1]
for index in range(embedded_df.shape[1]):
column = embedded_df.columns[index]
labels_column = labels[index]
embeddings_column = embedded_weights[index]
pca = PCA(n_components=1)
Y = pca.fit_transform(embeddings_column)
y_array = np.concatenate(Y)
mapping = dict(zip(labels_column.classes_, y_array))
file_writer.write_mapping(mapping,
Path.joinpath(cfg.PROCESSED_DATA_DIR, 'embeddings'),
f'{case.lower()}_{column}.csv')
df[column] = df[column].replace(to_replace=mapping)
return df
if __name__ == "__main__":
main()
\ No newline at end of file
#!/usr/bin/env python
import config as cfg
from tools import file_reader, file_writer
from tools import preprocessor
from utility import embedder
import pandas as pd
import numpy as np
USE_CAT_NAMES = True
def main():
make_complete_count()
make_compliance_count()
make_fall_short_count()
make_fall_long_count()
def make_complete_count():
case = 'Complete'
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'complete.csv',
converters=ats)
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
if USE_CAT_NAMES:
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + df[[case]]]
df = df.drop(['Ats_0'], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'complete_count.csv')
def make_compliance_count():
case = 'Compliance'
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'compliance.csv',
converters=ats)
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
if USE_CAT_NAMES:
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + df[[case]]]
df = df.drop(['Ats_0'], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'compliance_count.csv')
def make_fall_short_count():
case = 'FallShort'
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'fall_short.csv',
converters=ats)
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
if USE_CAT_NAMES:
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + df[[case]]]
df = df.drop(['Ats_0'], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'fall_short_count.csv')
def make_fall_long_count():
case = 'FallLong'
ex = {str(i)+'Ex':str for i in range(1, cfg.EX_RESOLUTION+1)}
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
converters = {**ex, **ats}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'fall_long.csv',
converters=converters)
num_cols = embedder.get_numerical_cols(df, case)
# Extract exercises
cols_ex = [str(i)+'Ex' for i in range(1, cfg.EX_RESOLUTION+1)]
unique_ex = [df[f'{i}Ex'].unique() for i in range(1, cfg.EX_RESOLUTION+1)]
unique_ex = list(set(np.concatenate(unique_ex)))
df_ex = preprocessor.extract_cat_count(df, unique_ex, cols_ex, 'Ex_')
# Extract ats
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, 'Ats_')
# Merge dataframes
df = pd.concat([df, df_ex, df_ats], axis=1)
ex_columns = ['Ex_' + ex for ex in unique_ex]
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ex_columns + ats_columns + [case]]
df = df.drop(['Ex_0', 'Ats_0'], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'fall_long_count.csv')
if __name__ == "__main__":
main()
\ No newline at end of file
#!/usr/bin/env python
import config as cfg
from tools import file_reader, file_writer, feature_maker
from tools import preprocessor, neural_embedder
from utility import embedder
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.decomposition import PCA
def main():
make_complete_emb()
make_compliance_emb()
make_fall_short_emb()
make_fall_long_emb()
def make_complete_emb():
case = 'Complete'
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
'complete.csv',
converters=ats)
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = case
train_ratio = 0.9
X_train, X_val, y_train, y_val, labels = preprocessor.prepare_data_for_embedder(df_to_enc,
target_name,
train_ratio)
artifacts_path = cfg.COMPLETE_EMB_DIR
epochs = 5
params = {"df": df_to_enc,
"target_name": target_name,
"train_ratio": train_ratio,
"network_layers": ([128]),
"epochs": epochs,
"batch_size": 128,
"verbose": False,
"artifacts_path": artifacts_path}
network = neural_embedder.NeuralEmbedder(**params)
network.fit(X_train, y_train, X_val, y_val)
network.save_model()
embedded_weights = network.get_embedded_weights()
network.save_weights(embedded_weights)
network.save_labels(labels)
network.make_visualizations_from_network(extension='png')
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
embedded_df = df.iloc[:, n_numerical_cols:df.shape[1]-1]
for index in range(embedded_df.shape[1]):
column = embedded_df.columns[index]
labels_column = labels[index]
embeddings_column = embedded_weights[index]
pca = PCA(n_components=1)
Y = pca.fit_transform(embeddings_column)
y_array = np.concatenate(Y)
mapping = dict(zip(labels_column.classes_, y_array))
file_writer.write_mapping(mapping,
Path.joinpath(cfg.PROCESSED_DATA_DIR, 'embeddings'),
f'complete_{column}.csv')
df[column] = df[column].replace(to_replace=mapping)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'complete_emb.csv')
def make_compliance_emb():
case = 'Compliance'
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'compliance.csv',
converters=ats)
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = case
train_ratio = 0.9
X_train, X_val, y_train, y_val, labels = preprocessor.prepare_data_for_embedder(df_to_enc,
target_name,
train_ratio)
artifacts_path = cfg.COMPLIANCE_EMB_DIR
epochs = 5
params = {"df": df_to_enc,
"target_name": target_name,
"train_ratio": train_ratio,
"network_layers": ([128]),
"epochs": epochs,
"batch_size": 128,
"verbose": False,
"artifacts_path": artifacts_path}
network = neural_embedder.NeuralEmbedder(**params)
network.fit(X_train, y_train, X_val, y_val)
network.save_model()
embedded_weights = network.get_embedded_weights()
network.save_weights(embedded_weights)
network.save_labels(labels)
network.make_visualizations_from_network(extension='png')
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
embedded_df = df.iloc[:, n_numerical_cols:df.shape[1]-1]
for index in range(embedded_df.shape[1]):
column = embedded_df.columns[index]
labels_column = labels[index]
embeddings_column = embedded_weights[index]
pca = PCA(n_components=1)
Y = pca.fit_transform(embeddings_column)
y_array = np.concatenate(Y)
mapping = dict(zip(labels_column.classes_, y_array))
file_writer.write_mapping(mapping,
Path.joinpath(cfg.PROCESSED_DATA_DIR, 'embeddings'),
f'compliance_{column}.csv')
df[column] = df[column].replace(to_replace=mapping)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'compliance_emb.csv')
def make_fall_short_emb():
case = 'FallShort'
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'fall_short.csv',
converters=ats)
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = case
train_ratio = 0.9
X_train, X_val, y_train, y_val, labels = preprocessor.prepare_data_for_embedder(df_to_enc,
target_name,
train_ratio)
artifacts_path = cfg.FALL_SHORT_EMB_DIR
epochs = 5
params = {"df": df_to_enc,
"target_name": target_name,
"train_ratio": train_ratio,
"network_layers": ([128]),
"epochs": epochs,
"batch_size": 128,
"verbose": False,
"artifacts_path": artifacts_path}
network = neural_embedder.NeuralEmbedder(**params)
network.fit(X_train, y_train, X_val, y_val)
network.save_model()
embedded_weights = network.get_embedded_weights()
network.save_weights(embedded_weights)
network.save_labels(labels)
network.make_visualizations_from_network(extension='png')
emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
embedded_df = df.iloc[:, n_numerical_cols:df.shape[1]-1]
for index in range(embedded_df.shape[1]):
column = embedded_df.columns[index]
labels_column = labels[index]
embeddings_column = embedded_weights[index]
pca = PCA(n_components=1)
Y = pca.fit_transform(embeddings_column)
y_array = np.concatenate(Y)
mapping = dict(zip(labels_column.classes_, y_array))
file_writer.write_mapping(mapping,
Path.joinpath(cfg.PROCESSED_DATA_DIR, 'embeddings'),
f'fall_short_{column}.csv')
df[column] = df[column].replace(to_replace=mapping)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'fall_short_emb.csv')
def make_fall_long_emb():
case = 'FallLong'
ex = {str(i)+'Ex':str for i in range(1, cfg.EX_RESOLUTION+1)}
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
converters = {**ex, **ats}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'fall_long.csv',
converters=converters)
emb_cols = df.filter(regex='((\d+)[Ats|Ex])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = case
train_ratio = 0.9
X_train, X_val, y_train, y_val, labels = preprocessor.prepare_data_for_embedder(df_to_enc,
target_name,
train_ratio)
artifacts_path = cfg.FALL_LONG_EMB_DIR
epochs = 5
params = {"df": df_to_enc,
"target_name": target_name,
"train_ratio": train_ratio,
"network_layers": ([128]),
"epochs": epochs,
"batch_size": 128,
"verbose": False,
"artifacts_path": artifacts_path}
network = neural_embedder.NeuralEmbedder(**params)
network.fit(X_train, y_train, X_val, y_val)
network.save_model()
embedded_weights = network.get_embedded_weights()
network.save_weights(embedded_weights)
network.save_labels(labels)
network.make_visualizations_from_network(extension='png')
emb_cols = df.filter(regex='((\d+)[Ats|Ex])\w+', axis=1)
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
embedded_df = df.iloc[:, n_numerical_cols:df.shape[1]-1]
for index in range(embedded_df.shape[1]):
column = embedded_df.columns[index]
labels_column = labels[index]
embeddings_column = embedded_weights[index]
pca = PCA(n_components=1)
Y = pca.fit_transform(embeddings_column)
y_array = np.concatenate(Y)
mapping = dict(zip(labels_column.classes_, y_array))
file_writer.write_mapping(mapping,
Path.joinpath(cfg.PROCESSED_DATA_DIR, 'embeddings'),
f'fall_long_{column}.csv')
df[column] = df[column].replace(to_replace=mapping)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'fall_long_emb.csv')