Commit f04df37c authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

extended api with ats and ex, added fall test case

parent 90164d63
Pipeline #48151 passed with stage
in 3 minutes and 8 seconds
%% Cell type:code id: tags:
```
import pandas as pd
import numpy as np
import datetime as dt
from tools import preprocessor, data_loader
import config as cfg
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
tf.get_logger().setLevel('ERROR')
# Set dataset
filename = "success.csv"
filename = "compliance.csv"
converters = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
# Create data loader, prepare data
dl = data_loader.SuccessDataLoader(file_name=filename, converters=converters).load_data()
dl = data_loader.ComplianceDataLoader(file_name=filename, converters=converters).load_data()
X, y = dl.get_data()
df = pd.concat([X, y], axis=1)
# Add age feature
df['Age'] = df['BirthYear'].apply(lambda x: 121-x)
```
%% Cell type:code id: tags:
```
df.head()
```
%% Output
Gender BirthYear Cluster LoanPeriod NumberAts 1Ats 2Ats \\n0 0 45 11.0 315.0 2 Rollatorer Brusestole \n1 0 45 14.0 141.0 8 Rollatorer Brusestole \n2 0 45 14.0 142.0 8 Rollatorer Brusestole \n3 0 45 14.0 159.0 9 Rollatorer Brusestole \n4 0 45 14.0 243.0 9 Rollatorer Brusestole \n\n 3Ats 4Ats 5Ats ... 43Ats 44Ats 45Ats 46Ats \\n0 0 0 0 ... 0 0 0 0 \n1 RamperMobile Vendehjælpemidler Gangborde ... 0 0 0 0 \n2 RamperMobile Vendehjælpemidler Gangborde ... 0 0 0 0 \n3 RamperMobile Vendehjælpemidler Gangborde ... 0 0 0 0 \n4 RamperMobile Vendehjælpemidler Gangborde ... 0 0 0 0 \n\n 47Ats 48Ats 49Ats 50Ats Success Age \n0 0 0 0 0 1 76 \n1 0 0 0 0 1 76 \n2 0 0 0 0 1 76 \n3 0 0 0 0 1 76 \n4 0 0 0 0 1 76 \n\n[5 rows x 57 columns]
%% Cell type:code id: tags:
```
df.Success.value_counts()
df.Compliance.value_counts()
```
%% Output
0 906\n1 637\nName: Success, dtype: int64
%% Cell type:code id: tags:
```
import seaborn as sns
var = df['Success']
var = df['Compliance']
varValue = var.value_counts()
plt.figure()
plt.bar(varValue.index, varValue)
plt.xticks(varValue.index, varValue.index.values)
plt.ylabel("Frequency")
plt.title('Success')
file_name = f"Success bar.pdf"
plt.title('Compliance')
file_name = f"Compliance bar.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches = "tight")
```
%% Output
%% Cell type:code id: tags:
```
plot = sns.scatterplot(data=df, x="Age", y="NumberAts", hue="Success")
plot = sns.scatterplot(data=df, x="Age", y="NumberAts", hue="Compliance")
plt.title("Scatter plot of NumberAts vs Age")
fig = plot.get_figure()
file_name = f"Success scatter NumberAts Age.pdf"
file_name = f"Compliance scatter NumberAts Age.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches="tight")
```
%% Output
%% Cell type:code id: tags:
```
g = sns.FacetGrid(df, col="Success")
g = sns.FacetGrid(df, col="Compliance")
g.map(sns.distplot, "Age", bins=25)
g.fig.suptitle("Number of citizens who have success given age")
g.fig.suptitle("Number of citizens who have compliance given age")
g.fig.subplots_adjust(top=.8)
file_name = f"Success facetgrid age.pdf"
file_name = f"Compliance facetgrid age.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches="tight")
```
%% Output
C:\Users\cml\miniconda3\envs\py38-air\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
C:\Users\cml\miniconda3\envs\py38-air\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
%% Cell type:code id: tags:
```
def get_ats_list(df):
all_ats = []
ats_cols = [f"{i}Ats" for i in range(1, cfg.ATS_RESOLUTION+1)]
for ats_col in ats_cols:
for ats_string in df[ats_col]:
for ats in ats_string.split(","):
if ats != "0":
all_ats.append(ats)
return all_ats
ats_no_success = pd.Series(get_ats_list(df.loc[df['Success'] == 0]))
ats_success = pd.Series(get_ats_list(df.loc[df['Success'] == 1]))
ats_no_compliance = pd.Series(get_ats_list(df.loc[df['Compliance'] == 0]))
ats_compliance = pd.Series(get_ats_list(df.loc[df['Compliance'] == 1]))
df_ats_no_success = pd.DataFrame(ats_no_success.value_counts()[:20], columns=['No success quantity'])
df_ats_success = pd.DataFrame(ats_success.value_counts()[:20], columns=['Success quantity'])
df_ats_no_compliance = pd.DataFrame(ats_no_compliance.value_counts()[:20], columns=['No compliance quantity'])
df_ats_compliance = pd.DataFrame(ats_compliance.value_counts()[:20], columns=['Compliance quantity'])
ats_df = pd.concat([df_ats_no_success, df_ats_success], axis=1).fillna(0)
ats_df = pd.concat([df_ats_no_compliance, df_ats_compliance], axis=1).fillna(0)
ats_df.index.names = ['Ats']
ats_df = ats_df.reset_index()
ats_df['No success quantity'] = ats_df['No success quantity'] / len(ats_no_success)
ats_df['Success quantity'] = ats_df['Success quantity'] / len(ats_success)
ats_df['No success quantity'] = ats_df['No success quantity'] / len(ats_no_compliance)
ats_df['Success quantity'] = ats_df['Success quantity'] / len(ats_compliance)
```
%% Cell type:code id: tags:
```
plt.bar(ats_df["Ats"], ats_df["No success quantity"], label="No success")
plt.bar(ats_df["Ats"], ats_df["Success quantity"], bottom=ats_df["No success quantity"], label="Success")
plt.bar(ats_df["Ats"], ats_df["No compliance quantity"], label="No compliance")
plt.bar(ats_df["Ats"], ats_df["Compliance quantity"], bottom=ats_df["No compliance quantity"], label="Compliance")
plt.legend()
plt.xticks(rotation=90)
plt.ylabel("Scaled ats usage")
plt.title('Scaled plot of ats usage for success')
file_name = f"Success scaled ats usage.pdf"
plt.title('Scaled plot of ats usage for compliance')
file_name = f"Compliance scaled ats usage.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches="tight")
```
%% Output
......
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from tools import preprocessor, data_loader
import config as cfg
import tensorflow as tf
import seaborn as sns
from pathlib import Path
tf.get_logger().setLevel('ERROR')
# Set dataset
filename = "fall.csv"
converters = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
# Create data loader, prepare data
dl = data_loader.FallDataLoader(file_name=filename, converters=converters).load_data()
X, y = dl.get_data()
df = pd.concat([X, y], axis=1)
# Add age feature
df['Age'] = df['BirthYear'].apply(lambda x: 120-x)
df['Age'] = df['BirthYear'].apply(lambda x: 121-x)
```
%% Cell type:code id: tags:
``` python
df.head()
```
%% Output
Gender BirthYear Cluster LoanPeriod NumberAts 1Ats \\n0 0 21 5.0 516.0 15 Brusestole \n1 0 45 11.0 315.0 2 Rollatorer \n2 0 45 14.0 141.0 8 Rollatorer \n3 0 45 14.0 142.0 8 Rollatorer \n4 0 45 14.0 159.0 9 Rollatorer \n\n 2Ats 3Ats 4Ats 5Ats \\n0 ToiletforhøjereStativ Gangborde Gangborde Nødalarmsystemer \n1 Brusestole 0 0 0 \n2 Brusestole RamperMobile Vendehjælpemidler Gangborde \n3 Brusestole RamperMobile Vendehjælpemidler Gangborde \n4 Brusestole RamperMobile Vendehjælpemidler Gangborde \n\n ... 43Ats 44Ats 45Ats 46Ats 47Ats 48Ats 49Ats 50Ats Fall Age \n0 ... 0 0 0 0 0 0 0 0 0 99 \n1 ... 0 0 0 0 0 0 0 0 0 75 \n2 ... 0 0 0 0 0 0 0 0 0 75 \n3 ... 0 0 0 0 0 0 0 0 0 75 \n4 ... 0 0 0 0 0 0 0 0 0 75 \n\n[5 rows x 57 columns]
%% Cell type:code id: tags:
``` python
df.Fall.value_counts()
```
%% Output
0 1622\n1 522\nName: Fall, dtype: int64
%% Cell type:code id: tags:
``` python
import seaborn as sns
var = df['Fall']
varValue = var.value_counts()
plt.figure()
plt.bar(varValue.index, varValue)
plt.xticks(varValue.index, varValue.index.values)
plt.ylabel("Frequency")
plt.title('Fall')
file_name = f"Fall bar.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches = "tight")
```
%% Output
%% Cell type:code id: tags:
``` python
plot = sns.scatterplot(data=df, x="Age", y="NumberAts", hue="Fall")
plt.title("Scatter plot of NumberAts vs Age")
fig = plot.get_figure()
file_name = f"Fall scatter NumberAts Age.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches = "tight")
```
%% Output
%% Cell type:code id: tags:
``` python
g = sns.FacetGrid(df, col = "Fall", margin_titles=True)
g.map(sns.distplot, "Age", bins = 25)
g.fig.suptitle("Number of citizens who fall given age")
file_name = f"Fall facetgrid age.pdf"
g.fig.subplots_adjust(top=.8)
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches = "tight")
```
%% Output
C:\Users\cml\miniconda3\envs\py38-air\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
C:\Users\cml\miniconda3\envs\py38-air\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
%% Cell type:code id: tags:
``` python
def get_ats_list(df):
all_ats = []
ats_cols = [f"{i}Ats" for i in range(1, cfg.ATS_RESOLUTION+1)]
for ats_col in ats_cols:
for ats_string in df[ats_col]:
for ats in ats_string.split(","):
if ats != "0":
all_ats.append(ats)
return all_ats
ats_no_fall = pd.Series(get_ats_list(df.loc[df['Fall'] == 0]))
ats_fall = pd.Series(get_ats_list(df.loc[df['Fall'] == 1]))
a = pd.DataFrame(ats_no_fall.value_counts()[:20], columns=['No fall quantity'])
b = pd.DataFrame(ats_fall.value_counts()[:20], columns=['Fall quantity'])
ats_df = pd.concat([a, b], axis=1).fillna(0)
ats_df.index.names = ['Ats']
ats_df = ats_df.reset_index()
ats_df['No fall quantity'] = ats_df['No fall quantity'] / len(ats_no_fall)
ats_df['Fall quantity'] = ats_df['Fall quantity'] / len(ats_fall)
```
%% Cell type:code id: tags:
``` python
plt.bar(ats_df["Ats"], ats_df["No fall quantity"], label="No fall")
plt.bar(ats_df["Ats"], ats_df["Fall quantity"], bottom=ats_df["No fall quantity"], label="Fall")
plt.legend()
plt.xticks(rotation=90)
plt.ylabel("Scaled ats usage")
plt.title('Scaled plot of ats usage for fall')
file_name = f"Fall scaled ats usage.pdf"
plt.savefig(Path.joinpath(cfg.REPORTS_PLOTS_DIR, file_name), dpi=300, bbox_inches = "tight")
```
%% Output
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -8,7 +8,7 @@ tf.get_logger().setLevel('ERROR')
from pathlib import Path
NUM_ITER = 10
CASES = ["Complete", "Success", "Fall"]
CASES = ["Complete", "Success", "Fall", "Fall_test"]
class Result:
def __init__(self, name, result):
......@@ -24,6 +24,7 @@ class CVResult:
ATS_COLS = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)] \
+ ['Cluster', 'LoanPeriod', 'NumberAts']
EX_COLS = [str(i)+'Ex' for i in range(1, cfg.EX_RESOLUTION+1)] + ['NumberEx']
CLF_NAMES = ["MLP", "LR", "XGB", "RF", "SVM", "KNN"]
CLASSIFIERS = {
"MLP": classifiers.train_mlp_cv,
......@@ -48,10 +49,19 @@ def load_fall():
converters=converters)
return df
def load_success():
def load_compliance():
converters = {str(i)+'Ats':str for i in range(1,11)}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
'success.csv',
'compliance.csv',
converters=converters)
return df
def load_fall_test():
ex = {str(i)+'Ex':str for i in range(1, cfg.EX_RESOLUTION+1)}
ats = {str(i)+'Ats':str for i in range(1, cfg.ATS_RESOLUTION+1)}
converters = {**ex, **ats}
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
'fall_test.csv',
converters=converters)
return df
......@@ -60,23 +70,28 @@ def main():
results_filename = f"{case} baseline results.txt"
# Version 1
with open(Path.joinpath(cfg.REPORTS_DIR, results_filename), "w+") as text_file:
text_file.write(f"{case} version 1 - without Ats columns")
text_file.write(f"{case} version 1 - without Ats and/or Ex columns")
if case == "Complete":
df = load_complete()
X = df.drop(['Complete'], axis=1)
y = df['Complete']
X = X.drop(ATS_COLS, axis=1)
elif case == "Success":
df = load_success()
X = df.drop(['Success'], axis=1)
y = df['Success']
elif case == "Compliance":
df = load_compliance()
X = df.drop(['Compliance'], axis=1)
y = df['Compliance']
X = X.drop(ATS_COLS, axis=1)
else:
elif case == "Fall":
df = load_fall()
X = df.drop(['Fall'], axis=1)
y = df['Fall']
X = X.drop(ATS_COLS, axis=1)
else:
df = load_fall_test()
X = df.drop(['Fall'], axis=1)
y = df['Fall']
X = X.drop(ATS_COLS + EX_COLS, axis=1)
X = np.array(X)
y = np.array(y)
......@@ -89,7 +104,7 @@ def main():
results_mean.append(result_mean)
results_std.append(result_std)
make_plots(y, zip(CLF_NAMES, y_pred_probas, results_mean, results_std),
case, 1, "without Ats columns")
case, 1, "without Ats and/or Ex columns")
# Version 2
with open(Path.joinpath(cfg.REPORTS_DIR, results_filename), "a") as text_file:
......@@ -100,20 +115,25 @@ def main():
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR, 'complete_emb.csv')
X = df.drop(['Complete'], axis=1)
y = df['Complete']
elif case == "Success":
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR, 'success_emb.csv')
X = df.drop(['Success'], axis=1)
y = df['Success']
else:
elif case == "Compliance":
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR, 'compliance_emb.csv')
X = df.drop(['Compliance'], axis=1)
y = df['Compliance']
elif case == "Fall":