Commit 90164d63 authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

added ex names, model without cv

parent 24daad4e
Pipeline #47872 passed with stage
in 2 minutes and 55 seconds
......@@ -6,8 +6,6 @@ from utility import embedder
import pandas as pd
import numpy as np
USE_CAT_NAMES = True
def main():
make_complete_count()
make_compliance_count()
......@@ -24,18 +22,10 @@ def make_complete_count():
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
if USE_CAT_NAMES:
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + df[[case]]]
df = df.drop(['Ats_0'], axis=1)
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, 'Ats_')
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'complete_count.csv')
......@@ -45,22 +35,13 @@ def make_compliance_count():
df = file_reader.read_csv(cfg.PROCESSED_DATA_DIR,
f'compliance.csv',
converters=ats)
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
if USE_CAT_NAMES:
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + df[[case]]]
df = df.drop(['Ats_0'], axis=1)
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, 'Ats_')
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'compliance_count.csv')
......@@ -74,18 +55,10 @@ def make_fall_short_count():
cols_ats = [str(i)+'Ats' for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = [df[f'{i}Ats'].unique() for i in range(1, cfg.ATS_RESOLUTION+1)]
unique_ats = list(set(np.concatenate(unique_ats)))
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, '')
if USE_CAT_NAMES:
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
df = df.drop('0', axis=1)
else:
num_cols = embedder.get_numerical_cols(df, case)
df = pd.concat([df, df_ats], axis=1)
ats_columns = ['Ats_' + ats for ats in unique_ats]
df = df[num_cols + ats_columns + df[[case]]]
df = df.drop(['Ats_0'], axis=1)
df_ats = preprocessor.extract_cat_count(df, unique_ats, cols_ats, 'Ats_')
df = df.drop(cols_ats, axis=1)
df = pd.concat([df.drop(case, axis=1), df_ats, df[[case]]], axis=1)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, 'fall_short_count.csv')
......
#!/usr/bin/env python
import config as cfg
from data.make_dataset_count import USE_CAT_NAMES
from tools import file_reader, file_writer, feature_maker
from tools import preprocessor
import pandas as pd
......@@ -31,7 +32,7 @@ def make_complete_case(df, clusters):
if USE_CAT_NAMES:
df = preprocessor.replace_ats_values(df)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, f'complete.csv')
def make_compliance_case(df, clusters):
......@@ -46,7 +47,7 @@ def make_compliance_case(df, clusters):
if USE_CAT_NAMES:
df = preprocessor.replace_ats_values(df)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, f'compliance.csv')
def make_fall_short_case(df, clusters):
......@@ -61,7 +62,7 @@ def make_fall_short_case(df, clusters):
if USE_CAT_NAMES:
df = preprocessor.replace_ats_values(df)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, f'fall_short.csv')
def make_fall_long_case(df, clusters, fall_data):
......@@ -80,11 +81,11 @@ def make_fall_long_case(df, clusters, fall_data):
'NumberWeeksNoTraining', 'NumberCancels',
'NumberCancelsWeek', 'Needs', 'Physics']]
df = pd.concat([general_cols, ats_ex_cols, df[['FallLong']]], axis=1)
if USE_CAT_NAMES:
df = preprocessor.replace_ats_values(df)
df = preprocessor.replace_ex_values(df)
file_writer.write_csv(df, cfg.PROCESSED_DATA_DIR, f'fall_long.csv')
if __name__ == "__main__":
......
......@@ -15,16 +15,16 @@ CASES = ["Complete", "Compliance", "FallShort", "FallLong"]
def main():
for case in CASES:
if case == "Complete":
df = file_reader.read_csv(DATA_DIR, 'complete_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'complete_count.csv')
model_dir = cfg.COMPLETE_XGB_DIR
elif case == "Compliance":
df = file_reader.read_csv(DATA_DIR, 'compliance_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'compliance_count.csv')
model_dir = cfg.COMPLIANCE_XGB_DIR
elif case == "FallShort":
df = file_reader.read_csv(DATA_DIR, 'fall_short_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'fall_short_count.csv')
model_dir = cfg.FALL_SHORT_XGB_DIR
else:
df = file_reader.read_csv(DATA_DIR, 'fall_long_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'fall_long_count.csv')
model_dir = cfg.FALL_LONG_XGB_DIR
df = df.sample(frac=1, random_state=0).reset_index(drop=True)
......
......@@ -15,16 +15,16 @@ CASES = ["Complete", "Compliance", "FallShort", "FallLong"]
def main():
for case in CASES:
if case == "Complete":
df = file_reader.read_csv(DATA_DIR, 'complete_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'complete_count.csv')
model_dir = cfg.COMPLETE_XGB_DIR
elif case == "Compliance":
df = file_reader.read_csv(DATA_DIR, 'compliance_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'compliance_count.csv')
model_dir = cfg.COMPLIANCE_XGB_DIR
elif case == "FallShort":
df = file_reader.read_csv(DATA_DIR, 'fall_short_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'fall_short_count.csv')
model_dir = cfg.FALL_SHORT_XGB_DIR
else:
df = file_reader.read_csv(DATA_DIR, 'fall_long_emb.csv')
df = file_reader.read_csv(DATA_DIR, 'fall_long_count.csv')
model_dir = cfg.FALL_LONG_XGB_DIR
df = df.sample(frac=1, random_state=0).reset_index(drop=True)
......
......@@ -155,6 +155,43 @@ def encode_vector_label(data: List[np.ndarray],
return data_encoded, labels_encoded
def replace_ex_values(df: pd.DataFrame):
df = df.replace(to_replace={
'7730': 'RygliggendeBækkenløft',
'8043': 'RygliggendeBækkenløftMedHjælp',
'8044': 'RygliggendeBækkenløftSmåStep',
'8045': 'RygliggendeBækkenløftEtBenStrakt',
'8046': 'RulleOmPåSidenMedHjælp',
'8047': 'RulleOmPåSiden',
'8048': 'RulleOmPåSidenKopIHånden',
'8049': 'RulleOmPåSidenBeggeHænderPåKrop',
'8050': 'LiggendeTilSiddendeMStøtte',
'8051': 'LiggendeTilSiddendeSelvhjulpet',
'8052': 'LiggendeTilSiddendeKopIHånden',
'8053': 'LiggendeTilSiddendeBeggeHænderPåKop',
'8054': 'SiddeMedLetStøtte',
'8055': 'SiddeUdenStøtte10Sekunder',
'8056': 'SiddeUdenStøtte30Til60Sekunder',
'8057': 'SiddeUdenStøtteMereEnd60Sekunder',
'8058': 'SkalBrugeArmlæn',
'8059': 'MindreEnd8Oprejsninger',
'8060': '8Til13Oprejsninger',
'8061': 'MereEnd13Oprejsninger',
'8062': 'StåUdenStøtte',
'8063': 'StåMedSamledeFødder',
'8064': 'StåPåTæer',
'8065': 'Tandemstående',
'8066': 'GårMedGangredskabOgStøtte',
'8067': 'GårMedGangredskab',
'8068': 'GårUdenGangredskabMenMedStøtte',
'8069': 'GårUdenGangredskabOgUdenStøtte',
'8070': 'MereEnd16Sek',
'8071': '12Komma1Til16Sek',
'8072': '8Til12Sek',
'8073': 'MindreEnd8Sek',
'8074': 'SelvstændigGangfunktion',
'8075': 'SamleKuglepenOpFraGulvet',
'8076': 'Gå4SkridtBaglæns',
'8077': 'Hoppe'})
return df
def replace_ats_values(df: pd.DataFrame):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment