Commit 6acfde7e authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

fixed error in ats cleaning

parent 6684e69d
Pipeline #39268 failed with stage
in 2 minutes and 16 seconds
......@@ -3,7 +3,7 @@
import numpy as np
import pandas as pd
import config as cfg
from tools import file_reader, file_writer, feature_maker, preprocessor
from tools import file_reader, file_writer, feature_maker
from utility import data_dto, dataset
def main():
......
......@@ -124,7 +124,7 @@ class Cleaner2019(BaseCleaner):
ats = remove_rows_with_old_dates(ats, cfg.RETURN_DATE)
ats = remove_deprecated_device_data(ats)
ats = remove_tainted_histories(ats)
ats = drop_invalid_devices(ats, iso_classes)
ats = drop_invalid_devices(ats, ic)
return ats
def clean_fall_data(self, fd):
......
import pandas as pd
import config as cfg
from tools import file_writer
from lime.lime_tabular import LimeTabularExplainer
def train_and_save_explainer(model_data_and_results, device_features, X_devices,
......
......@@ -6,9 +6,7 @@ import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from ast import literal_eval
from sklearn.preprocessing import MinMaxScaler
import datetime
import numpy as np
import config as cfg
from tools import feature_maker
def get_ats_list(ats):
df = pd.DataFrame(ats.groupby(['CitizenId'])['DevISOClass'].apply(lambda x: ",".join(x))).reset_index()
......@@ -29,7 +27,7 @@ def split_categorial_columns(df: pd.DataFrame, col: str, tag: str, resolution: i
return df
def create_improve_split(features, threshold):
X = mf.make_improve_feature_absolute(features, threshold)
X = feature_maker.make_improve_feature_absolute(features, threshold)
y = X["Improve"]
X_train, X_test, y_train, y_test = train_test_split(X, y,
......
......@@ -18,6 +18,7 @@ def make_cross_val(clf, X, y, n_splits=5, shuffle=True, random_state=0):
model_results_roc = list()
model_results_acc = list()
model_results_auc = list()
models = list()
total_confusion_matrix = np.zeros(shape=(2, 2))
cv = StratifiedKFold(n_splits, shuffle, random_state)
......@@ -48,6 +49,9 @@ def make_cross_val(clf, X, y, n_splits=5, shuffle=True, random_state=0):
# Get normalized CM and add to total
cm = metrics.get_confusion_matrix(y_test, y_pred)
total_confusion_matrix = np.add(total_confusion_matrix, cm)
# Save model
models.append(model)
# Compute collective results
mean_model_auc = mean(model_results_auc)
......@@ -55,7 +59,7 @@ def make_cross_val(clf, X, y, n_splits=5, shuffle=True, random_state=0):
mean_model_acc = mean(model_results_acc)
total_confusion_matrix = total_confusion_matrix / n_splits
return mean_model_auc, std_model_auc, mean_model_acc, total_confusion_matrix, model
return mean_model_auc, std_model_auc, mean_model_acc, total_confusion_matrix, models[len(models)]
# From https://stackoverflow.com/questions/56781373/how-to-calculate-auc-for-random-forest-model-in-sklearn
def train_and_predict(clf, X_train, X_test, y_train):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment