make_models_scaling.py 3.13 KB
Newer Older
1
2
3
4
5
6
#!/usr/bin/env python
import numpy as np
import pandas as pd
import config as cfg
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import RobustScaler, MaxAbsScaler, QuantileTransformer
7
from tools import classifiers, data_loader
8
9
10
from pathlib import Path
from sklearn.base import BaseEstimator, TransformerMixin

Christian Marius Lillelund's avatar
Christian Marius Lillelund committed
11
CASES = ["Complete", "Success", "Fall"]
12
COMPLETE_FILENAME = "complete_emb.csv"
Christian Marius Lillelund's avatar
Christian Marius Lillelund committed
13
SUCCESS_FILENAME = "success_emb.csv"
14
15
16
17
18
19
20
21
22
23
24
FALL_FILENAME = "fall_emb.csv"

class DummyScaler(BaseEstimator, TransformerMixin):
    def fit_transform(self, X):
        return np.array(X)

def main():
    for case in CASES:
        if case == "Complete":
            dl = data_loader.CompleteDataLoader(COMPLETE_FILENAME).load_data()
            X, y = dl.get_data()
Christian Marius Lillelund's avatar
Christian Marius Lillelund committed
25
26
27
        elif case == "Success":
            dl = data_loader.SuccessDataLoader(SUCCESS_FILENAME).load_data()
            X, y = dl.get_data()
28
29
30
31
32
33
34
        else:
            dl = data_loader.FallDataLoader(FALL_FILENAME).load_data()
            X, y = dl.get_data()
        df = pd.concat([X, y], axis=1)
        emb_cols = df.filter(regex='((\d+)[Ats])\w+', axis=1)
        n_scale_cols = X.shape[1] - emb_cols.shape[1]
        clf_names = ["MLP", "SVM", "RF", "XGB", "LR", "KNN"]
35
36
37
        clfs = [classifiers.train_mlp_cv, classifiers.train_svm_cv,
                classifiers.train_rf_cv, classifiers.train_xgb_cv,
                classifiers.train_lr_cv, classifiers.train_knn_cv]
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
        scaler_names = ["None", "Standard", "MinMax", "MinMaxRange", "Robust",
                "MaxAbs", "QuantileTransformer", "QuantileTransformerNorm"]
        scalers = [DummyScaler(), StandardScaler(), MinMaxScaler(), MinMaxScaler((-1, 1)), RobustScaler(),
                MaxAbsScaler(), QuantileTransformer(), QuantileTransformer(random_state=0),
                QuantileTransformer(output_distribution='normal', random_state=0)]
        output_filename = f"{case} scaling results.txt"
        with open(Path.joinpath(cfg.REPORTS_DIR, output_filename), "w+") as text_file:
            text_file.write(f"{case} case using {len(clfs)} clfs and {len(scalers)} scalers\n\n")
        for clf_name, clf in zip(clf_names, clfs):
            for scaler_name, scaler in zip(scaler_names, scalers):
                with open(Path.joinpath(cfg.REPORTS_DIR, output_filename), "a") as text_file:
                    text_file.write(f"Results for {clf_name}, {scaler_name}:\n")
                X_sc = pd.DataFrame(scaler.fit_transform(X.iloc[:,:n_scale_cols]))
                X_new = pd.concat([X_sc, X.iloc[:,n_scale_cols:]], axis=1)
                _, result_acc, result_pre, result_recall, result_rocauc, _ = clf(X_new, y)
                with open(Path.joinpath(cfg.REPORTS_DIR, output_filename), "a") as text_file:
                    text_file.write(f"Accuracy: {round(np.mean(result_acc), 3)}\n")
                    text_file.write(f"Precision: {round(np.mean(result_pre), 3)}\n")
                    text_file.write(f"Recall: {round(np.mean(result_recall), 3)}\n")
                    text_file.write(f"ROCAUC: {round(np.mean(result_rocauc), 3)}")
                    text_file.write("\n\n")

if __name__ == '__main__':
    main()