Commit 7a57387f authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

Added code to track number of completes

parent 7a47d581
Pipeline #25663 failed with stage
in 2 minutes and 19 seconds
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import datetime as dt
import src.models.feature_maker as fm
pd.reset_option('^display.', silent=True)
df = pd.read_csv('../data/interim/timeseries.csv')
df = fm.assign_number_completed(df)
```
%% Cell type:code id: tags:
``` python
df.loc[df['NumberCompleted'] == 7].drop_duplicates(subset='CitizenId')
```
%% Output
CitizenId PatientId Sex Age NumberScreening StartDate \\n1562 4236583454361 40703 1 84 14 2018-11-06 \n\n EndDate LastStatusDate NumberWeeks MeanEvaluation ... \\n1562 2018-12-07 2018-12-07 4.43 4.2 ... \n\n PhysicsStartReason PhysicsEnd PhysicsDifference PhysicsReason \\n1562 Ingen 48.0 20.0 Ingen \n\n PhysicsIndicator RehabIndicator \\n1562 1 2.07 \n\n Exercises NumberExercises \\n1562 ['303451', '303453', '303452', '303454', '3034... 8 \n\n LastStatus NumberCompleted \n1562 Significantprogress 7.0 \n\n[1 rows x 54 columns]
%% Cell type:code id: tags:
``` python
df.loc[df['CitizenId'] == 4236583454361]
```
%% Output
CitizenId PatientId Sex Age NumberScreening StartDate \\n1549 4236583454361 40703 1 82 1 2016-11-23 \n1550 4236583454361 40703 1 82 2 2016-11-23 \n1551 4236583454361 40703 1 83 3 2017-01-04 \n1552 4236583454361 40703 1 83 4 2017-09-28 \n1553 4236583454361 40703 1 83 5 2017-10-23 \n1554 4236583454361 40703 1 83 6 2017-12-14 \n1555 4236583454361 40703 1 84 7 2018-02-01 \n1556 4236583454361 40703 1 84 8 2018-03-05 \n1557 4236583454361 40703 1 84 9 2018-04-09 \n1558 4236583454361 40703 1 84 10 2018-05-31 \n1559 4236583454361 40703 1 84 11 2018-07-02 \n1560 4236583454361 40703 1 84 12 2018-08-06 \n1561 4236583454361 40703 1 84 13 2018-10-01 \n1562 4236583454361 40703 1 84 14 2018-11-06 \n\n EndDate LastStatusDate NumberWeeks MeanEvaluation ... \\n1549 2016-11-23 2020-08-14 0.00 2.0 ... \n1550 2017-01-04 2020-08-14 6.00 2.4 ... \n1551 2017-09-25 2017-09-25 37.71 2.0 ... \n1552 2017-10-23 2017-10-23 3.57 3.6 ... \n1553 2017-12-14 2017-10-23 7.43 3.6 ... \n1554 2018-02-01 2018-02-01 7.00 3.3 ... \n1555 2018-03-05 2018-02-01 4.57 3.1 ... \n1556 2018-04-09 2020-08-14 5.00 4.0 ... \n1557 2018-05-31 2018-05-29 7.43 3.5 ... \n1558 2018-07-02 2020-08-14 4.57 3.9 ... \n1559 2018-08-06 2018-08-06 5.00 3.6 ... \n1560 2018-10-01 2018-08-06 8.00 4.0 ... \n1561 2018-11-06 2020-08-14 5.14 3.7 ... \n1562 2018-12-07 2018-12-07 4.43 4.2 ... \n\n PhysicsStartReason PhysicsEnd PhysicsDifference \\n1549 Ingen 17.0 0.0 \n1550 Ingen 13.0 -4.0 \n1551 Ingen 16.0 3.0 \n1552 Ingen 42.0 26.0 \n1553 Ingen 29.0 -13.0 \n1554 Andet 29.0 0.0 \n1555 Ingen 27.0 -2.0 \n1556 Manglende motivation 27.0 0.0 \n1557 Ingen 29.0 2.0 \n1558 Ingen 25.0 -4.0 \n1559 Ingen forklaring 31.0 6.0 \n1560 Ingen 25.0 -6.0 \n1561 Ingen forklaring 28.0 3.0 \n1562 Ingen 48.0 20.0 \n\n PhysicsReason PhysicsIndicator RehabIndicator \\n1549 Ingen 0 1.76 \n1550 Ingen 0 1.76 \n1551 Ingen 1 3.08 \n1552 Ingen 1 4.31 \n1553 Andet 0 1.33 \n1554 Ingen 0 1.90 \n1555 Manglende motivation 0 1.28 \n1556 Ingen 0 1.74 \n1557 Ingen 1 1.30 \n1558 Ingen forklaring 0 1.03 \n1559 Ingen 1 3.52 \n1560 Ingen forklaring 0 1.16 \n1561 Ingen 1 2.00 \n1562 Ingen 1 2.07 \n\n Exercises NumberExercises \\n1549 ['257883', '257884', '257885', '257886'] 4 \n1550 ['257883', '257884', '257885', '257886'] 4 \n1551 ['260933', '260934', '260935', '260936'] 4 \n1552 ['274332', '274333', '274336', '274334', '2743... 7 \n1553 ['275394', '275396', '275398', '275395', '2753... 8 \n1554 ['278311', '278312', '278313', '278314', '2783... 7 \n1555 ['281129', '281130', '281131', '281132', '2811... 7 \n1556 ['283143', '283144', '283145', '283146', '2831... 7 \n1557 ['285287', '285288', '285289', '285290', '2852... 7 \n1558 ['289757', '289758', '289759', '289760', '2897... 7 \n1559 ['291949', '291950', '291951', '291952', '2919... 9 \n1560 ['293890', '293891', '293893', '293896', '2938... 7 \n1561 ['299590', '299591', '299592', '299593', '2995... 9 \n1562 ['303451', '303453', '303452', '303454', '3034... 8 \n\n LastStatus NumberCompleted \n1549 None 0.0 \n1550 None 1.0 \n1551 Active 1.0 \n1552 SignificantProgress 2.0 \n1553 SignificantProgress 2.0 \n1554 SignificantProgress 3.0 \n1555 SignificantProgress 3.0 \n1556 None 4.0 \n1557 ReActivated 4.0 \n1558 None 5.0 \n1559 SignificantProgress 5.0 \n1560 SignificantProgress 6.0 \n1561 None 6.0 \n1562 Significantprogress 7.0 \n\n[14 rows x 54 columns]
%% Cell type:code id: tags:
```
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 1000)
df = pd.read_csv('../data/interim/timeseries.csv')
df['NumberWeeksSum'] = df.groupby('CitizenId')['NumberWeeks'].transform(pd.Series.cumsum)
df['NumberTrainingSum'] = df.groupby('CitizenId')['NumberTraining'].transform(pd.Series.cumsum)
df['NeedsStartBaseline'] = df.groupby('CitizenId')["NeedsStart"].transform('first')
df = df[df['NeedsStartBaseline'] != 0]
#df = df.loc[df.CitizenId == 659524639842]
'''
conditions = "(x.NumberWeeksSum >= 8) & (x.NumberTrainingSum >= 7)"
filter = df.groupby('CitizenId').apply(lambda x: pd.eval(conditions))
filter.index = filter.index.droplevel(0)
#dex = df.merge(df[filter.values], left_on=['CitizenId'],
# right_on=['CitizenId'], how='left').dropna().index
df['HasCompleted'] = (filter).astype(int)
df[['CitizenId', 'NumberWeeksSum', 'NumberTrainingSum', 'HasCompleted']].iloc[:50]
#df.groupby('CitizenId')['NumberWeeksSum', 'NumberTrainingSum'].apply(set)
#df_completed = df.loc[(df['NumberWeeksSum'] >= 8) & (df['NumberTrainingSum'] >= 7)] \
# .drop_duplicates(subset='CitizenId').reset_index(drop=True)
df['G'] = df.groupby('CitizenId').NumberWeeksSum.apply(lambda x : (x.diff().ge(8)))
df['res'] = df.groupby([df.CitizenId, df.G.cumsum()]).G.apply(lambda x : (~x).cumsum())
df[['CitizenId', 'NumberWeeksSum', 'G', 'res']]
'''
threshold_weeks = 8
threshold_training = 7
cumsum_weeks = 0
cumsum_training = 0
group = 0
for group_name, df_group in df.groupby('CitizenId'):
group = 0
items_weeks = df.loc[df.CitizenId == group_name].NumberWeeks.iteritems()
items_training = df.loc[df.CitizenId == group_name].NumberTraining.iteritems()
for (row_week, row_train) in zip(items_weeks, items_training):
cumsum_weeks += row_week[1]
cumsum_training += row_train[1]
if cumsum_weeks >= threshold_weeks and cumsum_training >= threshold_training:
cumsum_weeks = 0
cumsum_training = 0
group += 1
df.loc[row_week[0], 'NumberCompleted'] = group
```
%% Cell type:code id: tags:
```
df[['CitizenId', 'NumberWeeksSum', 'NumberTrainingSum', 'HasCompleted']].iloc[:50]
df = df.loc[df.CitizenId == 659524639842]
df[['CitizenId', 'NumberWeeks', 'NumberCompleted']]
```
%% Output
CitizenId NumberWeeksSum NumberTrainingSum HasCompleted\n0 979449323793 0.00 1 0\n1 979449323793 5.00 4 0\n2 984026211384 0.00 0 0\n3 984026211384 2.86 5 0\n4 3582595558917 0.00 0 0\n5 3582595558917 3.00 5 0\n6 3582595558917 5.43 10 0\n7 1633195440927 0.00 0 0\n8 1633195440927 7.00 15 0\n9 1633195440927 11.00 22 1\n10 1633195440927 16.00 31 1\n11 2934953410074 0.00 0 0\n12 2934953410074 4.00 7 0\n13 1300365886341 0.00 0 0\n14 1300365886341 6.71 0 0\n15 3085778845518 0.00 1 0\n16 3085778845518 4.71 4 0\n17 3085778845518 6.71 8 0\n18 3085778845518 10.71 14 1\n19 3085778845518 14.85 15 1\n20 3085778845518 18.99 19 1\n21 3085778845518 22.42 26 1\n22 3085778845518 25.99 32 1\n23 3085778845518 28.42 33 1\n24 3085778845518 31.13 37 1\n25 3085778845518 31.13 38 1\n26 3085778845518 35.13 45 1\n27 3908688551748 0.00 1 0\n28 3908688551748 5.43 6 0\n29 4074234519288 0.00 0 0\n30 4074234519288 6.00 6 0\n31 4074234519288 9.29 7 1\n32 4074234519288 73.15 19 1\n33 4074234519288 79.15 23 1\n34 4074234519288 85.15 27 1\n35 4710921810897 0.00 0 0\n36 4710921810897 3.29 5 0\n37 4710921810897 3.29 6 0\n38 4710921810897 3.29 7 0\n39 4710921810897 3.29 8 0\n40 4710921810897 3.29 9 0\n41 4710921810897 3.29 10 0\n42 4071037059972 0.00 0 0\n43 4071037059972 3.00 6 0\n44 4071037059972 8.00 11 1\n45 3097545850329 0.00 0 0\n46 3097545850329 3.57 6 0\n47 3097545850329 8.14 12 1\n48 2938150487985 0.00 0 0\n49 2938150487985 2.57 5 0
CitizenId NumberWeeks NumberCompleted\n10 659524639842 0.00 0.0\n11 659524639842 1.57 0.0\n12 659524639842 5.00 0.0\n13 659524639842 1.00 0.0\n14 659524639842 3.29 1.0\n15 659524639842 8.00 2.0\n16 659524639842 0.00 2.0\n17 659524639842 5.57 2.0\n18 659524639842 7.43 3.0\n19 659524639842 26.00 4.0
%% Cell type:code id: tags:
```
cumsum += value
if cumsum >= threshold:
cumsum = 0
df.loc[idx, 'CitizenWeekSum'] = cumsum
else:
df.loc[idx, 'CitizenWeekSum'] = cumsum
```
......
......@@ -31,7 +31,7 @@ DEVICE_FEATURES = ['HasRollator',
COMPLETES_FEATURES = ['NumberWeeksSum', 'NumberTrainingSum']
CUMULATIVE_FEATURES = ['NeedsStartBaseline',
TRAINING_FEATURES = ['NeedsStartBaseline',
'MeanEvaluationMean',
'StdEvaluationMean',
'NumberTrainingWeekMean',
......@@ -39,19 +39,20 @@ CUMULATIVE_FEATURES = ['NeedsStartBaseline',
'NumberCancelsSum',
'MeanTimeBetweenCancelsMean',
'MeanNumberCancelsWeekMean',
'NumberAtsMean',
'NeedsDifferenceMean',
'PhysicsDifferenceMean',
'NumberExercisesMean']
ATS_FEATURES = ['NumberAtsMean',
'HadRollator',
'HadShowerStool',
'HadRaisedToiletSeat',
'HadEmergencySystem',
'HadSeatCushion',
'HadWheelchair',
'HadBedWithEngine',
'NeedsDifferenceMean',
'PhysicsDifferenceMean',
'NumberExercisesMean']
'HadBedWithEngine']
TRAINING_FEATURES = ['NumberWeeks',
TRAINING_FEATURES_SCR = ['NumberWeeks',
'MeanEvaluation',
'StdEvaluation',
'MinEvaluation',
......
#!/usr/bin/env python
"""
Authors: Cecilie Moriat, Tenna Rasmussen, Christian Fischer Pedersen
Date: 20th March, 2020
"""
# Imports
# Internal
import data.file_reader as file_reader
import data.file_writer as file_writer
import data.parser as parser
import data.cleaner as cleaner
import models.feature_maker as fm
import models.preprocessor as pp
import models.explainer as explainer
import models.plot as plot
import models.predictors as predictors
import models.classifiers as clfs
import models.cross_validator as cv
import config as cfg
import utility.helper_func as hf
import utility.data_dto as data_dto
# External
import numpy as np
import pandas as pd
def run():
df = file_reader.read_features_csv(cfg.INTERIM_DATA_DIR, 'timeseries.csv')
df = fm.make_citizen_ats(df)
df = fm.make_completes_feature(df)
df = df.sample(frac=1, random_state=0)
y = df["Completes"]
X = df.drop(['Completes'], axis=1)
X = X[cfg.GENERAL_FEATURES + cfg.ATS_FEATURES]
X = pp.replace_numerical_inf(X)
save_model(X, cfg.PROCESSED_DATA_DIR, f'X_completes_ats.csv')
save_model(y, cfg.PROCESSED_DATA_DIR, f'y_completes_ats.csv')
train_rf(X, y)
def train_rf(X, y):
clf = clfs.get_classifier("Random Forest")
mean_auc, std_auc, mean_acc, cm, model = cv.make_cross_val(clf, X, y)
print(f"Mean AUC: {np.round(mean_auc, 3)}")
print(f"Std AUC: {np.round(std_auc, 3)}")
print(f"Mean ACC: {np.round(mean_acc, 3)}")
print(str(cm))
def save_model(df, dir, filename):
file_writer.write_csv(df, dir, filename)
if __name__ == "__main__":
run()
\ No newline at end of file
......@@ -29,7 +29,8 @@ import pandas as pd
def run():
df = file_reader.read_features_csv(cfg.INTERIM_DATA_DIR, 'timeseries.csv')
df = fm.make_citizen_features(df)
df = fm.make_citizen_training(df)
df = fm.make_citizen_ats(df)
df = fm.make_completes_feature(df)
df = pp.drop_rows_with_no_needs_start_bl(df)
......@@ -43,7 +44,7 @@ def run():
y = df["Completes"]
X = df.drop(['Completes'], axis=1)
X = X[cfg.GENERAL_FEATURES + cfg.CUMULATIVE_FEATURES]
X = X[cfg.GENERAL_FEATURES + cfg.TRAINING_FEATURES + cfg.ATS_FEATURES]
X = pp.replace_numerical_inf(X)
save_model(X, cfg.PROCESSED_DATA_DIR, f'X_completes.csv')
......
......@@ -39,7 +39,7 @@ def run():
y = df["Needs"]
X = df.drop(['Needs'], axis=1)
X = X[cfg.GENERAL_FEATURES + cfg.DEVICE_FEATURES + cfg.TRAINING_FEATURES]
X = X[cfg.GENERAL_FEATURES + cfg.DEVICE_FEATURES + cfg.TRAINING_FEATURES_SCR]
X = pp.encode_vector_dummy(X, cfg.LIST_COLS)
X = pp.replace_numerical_inf(X)
......@@ -68,7 +68,7 @@ def run_features_absolute():
y = ft_threshold["Needs"]
ft_threshold.drop(['Needs'], axis=1, inplace=True)
X = ft_threshold[cfg.GENERAL_FEATURES + cfg.DEVICE_FEATURES
+ cfg.DATE_COLS + cfg.TRAINING_FEATURES]
+ cfg.DATE_COLS + cfg.TRAINING_FEATURES_SCR]
X = pp.convert_dates(X, cfg.DATE_COLS)
X = pp.encode_vector_dummy(X, cfg.LIST_COLS)
X = pp.replace_numerical_inf(X)
......
......@@ -34,6 +34,26 @@ def make_completes_feature(df):
return df_all
def assign_number_completed(df):
threshold_weeks = 8
threshold_training = 7
cumsum_weeks = 0
cumsum_training = 0
group = 0
for group_name, _ in df.groupby('CitizenId'):
group = 0
items_weeks = df.loc[df.CitizenId == group_name].NumberWeeks.iteritems()
items_training = df.loc[df.CitizenId == group_name].NumberTraining.iteritems()
for (row_week, row_train) in zip(items_weeks, items_training):
cumsum_weeks += row_week[1]
cumsum_training += row_train[1]
if cumsum_weeks >= threshold_weeks and cumsum_training >= threshold_training:
cumsum_weeks = 0
cumsum_training = 0
group += 1
df.loc[row_week[0], 'NumberCompleted'] = group
return df
def make_needs_feature_absolute(df, threshold):
df['Needs'] = df.apply(lambda x: 1 if x['NeedsEnd']
<= (x['NeedsStart'] - threshold) else 0, axis=1)
......@@ -50,8 +70,7 @@ def get_needs_diff(row):
else:
raise ValueError('NeedsStartBaseline was zero, cannot compute needs')
def make_citizen_features(df):
citizen_grp = df.groupby('CitizenId')
def make_citizen_training(df):
df['NumberWeeksSum'] = get_col_cumsum(df, 'NumberWeeks')
df['NumberTrainingSum'] = get_col_cumsum(df, 'NumberTraining')
df['NeedsStartBaseline'] = get_col_first(df, 'NeedsStart')
......@@ -62,6 +81,14 @@ def make_citizen_features(df):
df['NumberCancelsSum'] = get_col_cumsum(df, 'NumberCancels')
df['MeanTimeBetweenCancelsMean'] = get_col_mean(df, 'MeanTimeBetweenCancels')
df['MeanNumberCancelsWeekMean'] = get_col_mean(df, 'MeanNumberCancelsWeek')
df['NeedsDifferenceMean'] = get_col_mean(df, 'NeedsDifference')
df['PhysicsDifferenceMean'] = get_col_mean(df, 'PhysicsDifference')
df['NumberExercisesMean'] = get_col_mean(df, 'NumberExercises')
return df
def make_citizen_ats(df):
df['NumberWeeksSum'] = get_col_cumsum(df, 'NumberWeeks')
df['NumberTrainingSum'] = get_col_cumsum(df, 'NumberTraining')
df['NumberAtsMean'] = get_col_mean(df, 'NumberAts')
df['HadRollator'] = get_col_max(df, 'HasRollator')
df['HadShowerStool'] = get_col_max(df, 'HasShowerStool')
......@@ -70,9 +97,6 @@ def make_citizen_features(df):
df['HadSeatCushion'] = get_col_max(df, 'HasSeatCushion')
df['HadWheelchair'] = get_col_max(df, 'HasWheelchair')
df['HadBedWithEngine'] = get_col_max(df, 'HasBedWithEngine')
df['NeedsDifferenceMean'] = get_col_mean(df, 'NeedsDifference')
df['PhysicsDifferenceMean'] = get_col_mean(df, 'PhysicsDifference')
df['NumberExercisesMean'] = get_col_mean(df, 'NumberExercises')
return df
def get_col_cumsum(df, col):
......@@ -167,13 +191,13 @@ def make_window_features(id, data):
ats = get_ats(citizen_data.ats, end_date)
window_features['NumberAts'] = len(ats)
window_features['ATS'] = [ats]
window_features['HasRollator'] = has_ats_id(ats, '120606')
window_features['HasShowerStool'] = has_ats_id(ats, '093307')
window_features['HasRaisedToiletSeat'] = has_ats_id(ats, '091203|091212|091215|091218')
window_features['HasEmergencySystem'] = has_ats_id(ats, '222718')
window_features['HasSeatCushion'] = has_ats_id(ats, '043303|043306')
window_features['HasWheelchair'] = has_ats_id(ats, '122203')
window_features['HasBedWithEngine'] = has_ats_id(ats, '181210')
window_features['HasRollator'] = has_ats_id(ats, ['120606'])
window_features['HasShowerStool'] = has_ats_id(ats, ['093307'])
window_features['HasRaisedToiletSeat'] = has_ats_id(ats, ['091203', '091212', '091215', '091218'])
window_features['HasEmergencySystem'] = has_ats_id(ats, ['222718'])
window_features['HasSeatCushion'] = has_ats_id(ats, ['043303','043306'])
window_features['HasWheelchair'] = has_ats_id(ats, ['122203'])
window_features['HasBedWithEngine'] = has_ats_id(ats, ['181210'])
window_features['Cluster'] = get_cluster(citizen_data.cl)
window_features['NeedsStart'] = pre_screening.NeedForHelpScore
......@@ -262,7 +286,7 @@ def get_cancels_week(tcw):
return cancels_week
def has_ats_id(ats, device_id):
return device_id in ats
return any(x in ats for x in device_id)
def get_cluster(cl):
if not cl.empty:
......
'''
#!/usr/bin/env python
"""
Authors: Cecilie Moriat, Tenna Rasmussen, Christian Fischer Pedersen
Date: 20th March, 2020
"""
# Imports
# Internal
import pickle
from pathlib import Path
import dill
from lime.lime_tabular import LimeTabularExplainer
from sklearn import metrics
from src import data_loader, data_cleaner
from src.globals.paths import *
from src.modelling import models, visualise, logistic_regression, handle_features
from src.clustering import device_clusters
import numpy as np
from multiprocessing import Process
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import pandas as pd
def run():
print("Running")
feature_subset = ["Age",
'NumberATsRunning',
'Sex',
'RehabIndicator',
'NeedsStart',
'PhysicsStart']
feature_subset_two_screenings = ['Age',
'NumberATsRunning',
'Sex',
'RehabIndicator',
'NeedsStart',
'PhysicsStart',
'NeedsDif',
'MeanEvaluation',
'StdEvaluation',
'MinEvaluation',
'MaxEvaluation',
'nTrainingPrWeek',
'nTrainingPrWeekMax',
'nTrainingPrWeekMin',
'TimeBetweenTrainingsAvg',
'nCancellationsPrWeekAVG',
'nCancellationsPrWeekMax',
'nCancellationsPrWeekMin',
'NeedsEnd',
'PhysicsEnd',
'PhysicsDiff']
# create features
# handle_features.create_all()
# visualise.plot_granularity()
# load list of relevant dummy cols for the three feature vectors
with open(str(Path.joinpath(FEATURES_DIR, 'needs_dummy_cols.txt')), "rb") as fp:
needs_dummy_cols = pickle.load(fp)
with open(str(Path.joinpath(FEATURES_DIR, 'successful_programme_1_dummy_cols.txt')), "rb") as fp:
sp_1_dummy_cols = pickle.load(fp)
with open(str(Path.joinpath(FEATURES_DIR, 'successful_programme_2_dummy_cols.txt')), "rb") as fp:
sp_2_dummy_cols = pickle.load(fp)
dummy_subset = # fill out with subsets below
# log reg needs 8 ['NeedsStart', 'DevicesCount_091218', 'DevicesCount_123106', 'DevicesCount_122218', 'DevicesCount_180903', 'DevicesCount_120606', 'DevicesCount_091233', 'DevicesCount_220309', 'DevicesCount_181503', 'DevicesCount_043306', 'DevicesCount_180315', 'DevicesCount_123603', 'Sex', 'NumberATsRunning', 'DevicesCount_120306']
# log reg sp2 ['nTrainingPrWeek', 'DevicesUnique_181503', 'DevicesUnique_222718', 'NumberATsRunning', 'DevicesUnique_123109', 'DevicesUnique_221830', 'TimeBetweenTrainingsAvg', 'DevicesUnique_093307', 'DevicesUnique_181226', 'DevicesUnique_122218', 'DevicesUnique_222712', 'DevicesUnique_122434', 'DevicesUnique_122439']
# ran for needs 8 ['RehabIndicator', 'DevicesUnique_091203', 'DevicesUnique_091218', 'DevicesUnique_181218', 'DevicesUnique_122218', 'DevicesUnique_180315', 'DevicesUnique_123612', 'DevicesUnique_093304', 'DevicesUnique_090903', 'DevicesUnique_123604', 'DevicesUnique_181503', 'DevicesUnique_222712', 'DevicesUnique_123109', 'DevicesUnique_091233', 'DevicesUnique_123603']
# ran for sp2 ['nTrainingPrWeek', 'HasRollator', 'MinEvaluation', 'Sex', 'TimeBetweenTrainingsAvg', 'MaxEvaluation', 'NeedsEnd']
X_train, y_train = handle_features.load_needs_train(8) # or load succ pro
X_test, y_test = handle_features.load_needs_test(8) # or load succ pro
model = models.get_classifier('RandomForest')
model.fit(X_train[dummy_subset], y_train)
explainer = LimeTabularExplainer(training_data=X_train[dummy_subset].values,
feature_names=dummy_subset,
class_names=model.classes_,
mode='classification',
discretize_continuous=True)
models.explain_predictions(X_test, dummy_subset, model, explainer)
#visualise.plot_roc_curve2(lr, X_test_needs_0[dummy_subset], y_test_needs_0, feature_subset, sp_2_dummy_cols)
return