test_risk_model.py 3.24 KB
Newer Older
1
2
3
import pandas as pd
import numpy as np
from tools import preprocessor, file_reader
4
import paths as pt
5
6
7
8
9
10
import os
import csv
import joblib
from pathlib import Path

def main():
11
    model = file_reader.read_joblib(pt.RISK_XGB_DIR,
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
                                'fall_test_xgboost.joblib')

    for gender in range(0, 2):
        input_data = {"Gender": [gender],
                      "BirthYear": [72],
                      "Cluster": [10],
                      "LoanPeriod": [360],
                      "NumberSplit": [0],
                      "NumberScreening": [2],
                      "NumberWeeks": [3],
                      "MeanEvaluation": [4],
                      "NumberFalls": [1],
                      "NumberTraining": [8],
                      "NumberTrainingWeek": [1],
                      "TimeBetweenTraining": [3.5],
                      "NumberWeeksNoTraining": [1],
                      "Needs": [40],
                      "Physics": [43],
                      "Ex": ["8058,8062,8066"],
                      "Ats": ["222718,093307,181210"]}
        
        new_data_df = pd.DataFrame.from_dict(input_data)
        new_data_df['NumberAts'] = len(new_data_df['Ats'][0].split(","))
        new_data_df['NumberEx'] = len(new_data_df['Ex'][0].split(","))
        
        df = preprocessor.split_cat_columns(new_data_df, col_to_split='Ats',
                                            tag='Ats',
39
                                            resolution=10)
40
41
42
        
        df = preprocessor.split_cat_columns(df, col_to_split='Ex',
                                            tag='Ex',
43
                                            resolution=10)
44
        
45
46
        cols_ats = [str(i)+'Ats' for i in range(1, 10+1)]
        cols_ex = [str(i)+'Ex' for i in range(1, 9+1)]
47
48
49
50
51
52
53
54
55
        header_list = ['Gender', 'BirthYear', "Cluster",
                       "LoanPeriod", "NumberSplit", "NumberScreening",
                       "NumberWeeks", "MeanEvaluation", "NumberFalls",
                       "NumberTraining", "NumberTrainingWeek", "TimeBetweenTraining",
                       "NumberWeeksNoTraining", "NumberCancels", "NumberCancelsWeek",
                       "Needs", "Physics", "NumberAts", "NumberEx"] + cols_ats + cols_ex
        df = df.reindex(columns=header_list)
        df = df.fillna('0')
        
56
57
        for i in range(1, 10+1):
            path = Path.joinpath(pt.PROCESSED_DATA_DIR, 'embeddings')
58
59
60
61
62
            embedding = file_reader.read_embedding(path, f'fall_test_{i}Ats.csv')
            column = f'{i}Ats'
            df[column] = df[column].replace(to_replace=embedding)
            df[column] = pd.to_numeric(df[column])
            
63
64
        for i in range(1, 9+1):
            path = Path.joinpath(pt.PROCESSED_DATA_DIR, 'embeddings')
65
66
67
68
69
70
71
72
73
74
75
76
77
            embedding = file_reader.read_embedding(path, f'fall_test_{i}Ex.csv')
            column = f'{i}Ex'
            df[column] = df[column].replace(to_replace=embedding)
            df[column] = pd.to_numeric(df[column])
            
        prediction = model.predict(df)
        probability = model.predict_proba(df).max()

        print(f"Using gender {gender}, predicted " +
              f"{int(prediction[0])} with probability {round(float(probability), 3)*100}%")
        
if __name__ == "__main__":
    main()