Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christian Fischer Pedersen
AIR
Commits
e8b5e276
Commit
e8b5e276
authored
Dec 17, 2021
by
Christian Marius Lillelund
Browse files
improved scripts
parent
86f84d7c
Changes
14
Hide whitespace changes
Inline
Side-by-side
ml/notebooks/Alarm_EDA.ipynb
View file @
e8b5e276
...
...
@@ -1183,7 +1183,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.
8
"
"version": "3.8.
11
"
},
"orig_nbformat": 4
},
...
...
ml/src/api/main.py
View file @
e8b5e276
...
...
@@ -11,8 +11,8 @@ import os
import
csv
import
joblib
import
pandas
as
pd
import
numpy
as
np
import
io
from
pathlib
import
Path
from
typing
import
List
,
Optional
from
fastapi
import
Depends
,
FastAPI
,
HTTPException
,
Request
from
fastapi.responses
import
JSONResponse
...
...
@@ -96,14 +96,12 @@ class InputData(pydantic.BaseModel):
class
AlarmOutputData
(
pydantic
.
BaseModel
):
EventTimes
:
list
SurvivalProbs
:
list
HazardProb
s
:
list
AlarmArgument
s
:
list
class
TrainingOutputData
(
pydantic
.
BaseModel
):
CompleteProb
:
float
FallProb
:
float
Compliance
:
int
CompleteArguments
:
list
FallArguments
:
list
@
app
.
get
(
'/'
)
def
index
():
...
...
@@ -188,13 +186,20 @@ def predict_alarm(incoming_data: InputData):
label_encoders
=
read_pickle
(
"alarm_labels.pkl"
)
df_for_alarm
=
add_label_encoding
(
df
.
copy
(),
label_encoders
,
ats_resolution
)
surv_probs
=
model
.
predict_survival_function
(
df_for_alarm
,
return_array
=
True
)
hazard_probs
=
model
.
predict_cumulative_hazard_function
(
df_for_alarm
,
return_array
=
True
)
surv_func
=
model
.
predict_survival_function
(
df_for_alarm
,
return_array
=
True
)
event_times
=
[
int
(
x
)
for
x
in
model
.
event_times_
]
surv_probs
=
[
float
(
x
)
for
x
in
surv_func
[
0
]]
df_surv
=
pd
.
DataFrame
({
'SurvProbs'
:
surv_probs
[
0
]},
index
=
event_times
)
index_at_one_year
=
event_times
.
index
(
365
)
drop_after_one_year
=
1
-
float
(
df_surv
.
iloc
[
index_at_one_year
][
0
])
alarm_arguments
=
generate_alarm_arguments
(
df
,
ats_resolution
,
drop_after_one_year
)
return
{
'EventTimes'
:
[
int
(
x
)
for
x
in
model
.
event_times
_
]
,
'SurvivalProbs'
:
[
float
(
x
)
for
x
in
surv_probs
[
0
]]
,
'
HazardProbs'
:
[
float
(
x
)
for
x
in
hazard_probs
[
0
]]
'EventTimes'
:
event_times
,
'SurvivalProbs'
:
surv_probs
,
'
AlarmArguments'
:
alarm_arguments
}
@
app
.
post
(
'/predict_training'
,
response_model
=
TrainingOutputData
,
tags
=
[
"ai"
])
...
...
@@ -220,16 +225,13 @@ def predict_training(incoming_data: InputData):
else
:
compliance_prob
=
0
compliance
=
0
if
compliance_prob
<
0.5
else
1
complete_arguments
=
generate_arguments
(
df
,
ats_resolution
,
"Complete"
,
float
(
complete_prob
))
fall_arguments
=
generate_arguments
(
df
,
ats_resolution
,
"Fall"
,
float
(
fall_prob
))
complete_arguments
=
generate_complete_arguments
(
df
,
ats_resolution
,
complete_prob
)
return
{
'CompleteProb'
:
float
(
complete_prob
),
'FallProb'
:
float
(
fall_prob
),
'Compliance'
:
int
(
compliance
),
'CompleteArguments'
:
complete_arguments
,
'FallArguments'
:
fall_arguments
,
'CompleteArguments'
:
complete_arguments
}
def
validate_data
(
incoming_data
:
InputData
):
...
...
@@ -267,9 +269,8 @@ def add_label_encoding(df: pd.DataFrame, encoders, ats_resolution: int) -> pd.Da
df
.
loc
[:,
col_name
]
=
le
.
transform
(
df
.
loc
[:,
col_name
].
astype
(
str
))
return
df
def
generate_arguments
(
df
:
pd
.
DataFrame
,
ats_resolution
:
int
,
case
:
str
,
prob
:
float
):
def
generate_
complete_
arguments
(
df
:
pd
.
DataFrame
,
ats_resolution
:
int
,
prob
:
float
):
arguments
=
list
()
gender_argument
=
"Kvinder"
if
int
(
df
.
iloc
[
0
].
Gender
)
==
0
else
"Mænd"
arguments
.
append
(
gender_argument
)
...
...
@@ -293,9 +294,37 @@ def generate_arguments(df: pd.DataFrame, ats_resolution: int, case: str, prob: f
loan_period_argument
=
f
"og en gennemsnitlig låneperiode på
{
loan_period
}
dage"
arguments
.
append
(
loan_period_argument
)
arguments
.
append
(
"gennemfører"
if
case
==
"Complete"
else
"falder"
)
arguments
.
append
(
"gennemfører"
)
arguments
.
append
(
f
"med
{
int
(
round
(
prob
*
100
,
0
))
}
% sandsynlighed"
)
return
arguments
def
generate_alarm_arguments
(
df
:
pd
.
DataFrame
,
ats_resolution
:
int
,
pct_drop
:
float
):
arguments
=
list
()
gender_argument
=
"Kvinder"
if
int
(
df
.
iloc
[
0
].
Gender
)
==
0
else
"Mænd"
arguments
.
append
(
gender_argument
)
age_argument
=
f
"på
{
121
-
int
(
df
.
iloc
[
0
].
BirthYear
)
}
år"
arguments
.
append
(
age_argument
)
number_ats
=
int
(
df
.
iloc
[
0
].
NumberAts
)
if
int
(
number_ats
)
<
1
:
arguments
.
append
(
"uden hjælpemidler i eget hjem"
)
else
:
arguments
.
append
(
"med følgende hjælpemidler i eget hjem:"
)
for
i
in
range
(
1
,
ats_resolution
+
1
):
ats_name
=
get_ats_name_from_hmi
(
df
.
iloc
[
0
][
f
'
{
i
}
Ats'
])
if
ats_name
!=
""
:
arguments
.
append
(
f
'Et
{
i
}
. hjælpemiddel af typen
{
ats_name
}
'
)
else
:
arguments
.
append
(
f
'Uden et
{
i
}
. hjælpemiddel.'
)
loan_period
=
int
(
df
.
iloc
[
0
].
LoanPeriod
)
loan_period_argument
=
f
"og en gennemsnitlig låneperiode på
{
loan_period
}
dage"
arguments
.
append
(
loan_period_argument
)
arguments
.
append
(
"får efter et år en nødalarm"
)
arguments
.
append
(
f
"med
{
int
(
round
(
pct_drop
*
100
,
0
))
}
% sandsynlighed"
)
return
arguments
def
load_settings
(
file_name
):
...
...
ml/src/data/load_and_clean_data.py
View file @
e8b5e276
...
...
@@ -4,7 +4,7 @@ load_and_clean_data.py
Script to load the raw data and then clean it.
"""
from
tools
import
file_writer
,
raw_loader
,
cleaner
from
tools
import
raw_loader
,
cleaner
from
utility.data
import
write_pickle
import
paths
as
pt
...
...
@@ -45,4 +45,4 @@ def main():
write_pickle
(
ic
,
pt
.
INTERIM_DATA_DIR
,
'ic.pkl'
)
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/data/make_alarm_data.py
View file @
e8b5e276
...
...
@@ -136,4 +136,4 @@ def main():
write_csv
(
df_alarm
,
file_path
,
file_name
)
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/model/train_alarm_model.py
View file @
e8b5e276
#!/usr/bin/env python
import
paths
as
pt
from
pathlib
import
Path
from
tools
import
file_writer
,
data_loader
from
sklearn.preprocessing
import
LabelEncoder
from
tools
import
data_loader
from
utility.settings
import
load_settings
from
sksurv.ensemble
import
RandomSurvivalForest
from
io
import
BytesIO
import
shutil
import
pandas
as
pd
import
numpy
as
np
def
main
():
data_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"data.yaml"
)
target_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"alarm.yaml"
)
ats_resolution
=
data_settings
[
'ats_resolution'
]
dl
=
data_loader
.
AlarmDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
"alarm_
data.pkl
"
,
"alarm_
emb.csv
"
,
target_settings
).
load_data
()
X
,
y
=
dl
.
get_data
()
labels_enc
=
dict
()
ats_cols
=
[
f
"
{
i
}
Ats"
for
i
in
range
(
1
,
ats_resolution
+
1
)]
for
col_name
in
ats_cols
:
le
=
LabelEncoder
()
le
.
fit
(
X
.
loc
[:,
col_name
].
astype
(
str
))
labels_enc
[
col_name
]
=
le
X
.
loc
[:,
col_name
]
=
le
.
transform
(
X
.
loc
[:,
col_name
].
astype
(
str
))
X
=
X
[:
1000
]
y
=
y
[:
1000
]
model
=
RandomSurvivalForest
(
n_estimators
=
200
,
max_depth
=
3
,
n_jobs
=-
1
,
random_state
=
0
)
model
.
fit
(
X
,
y
)
sample
=
X
.
iloc
[
10
]
surv_probs
=
model
.
predict_survival_function
([
sample
],
return_array
=
True
)
event_times
=
[
int
(
x
)
for
x
in
model
.
event_times_
]
with
open
(
Path
.
joinpath
(
pt
.
MODELS_DIR
,
"alarm_labels.pkl"
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
labels_enc
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
with
open
(
Path
.
joinpath
(
pt
.
MODELS_DIR
,
"alarm_rsf.joblib"
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_joblib
(
model
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
df
=
pd
.
DataFrame
({
'SurvProbs'
:
surv_probs
[
0
]},
index
=
event_times
)
index_at_year
=
event_times
.
index
(
365
)
drop_after_one_year
=
1
-
float
(
df
.
iloc
[
index_at_year
][
0
])
print
(
int
(
round
(
drop_after_one_year
*
100
,
0
)))
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
ml/src/model/train_rf_models.py
View file @
e8b5e276
...
...
@@ -58,4 +58,4 @@ def main():
shutil
.
copyfileobj
(
outfile
,
fd
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
main
()
ml/src/tools/raw_loader.py
View file @
e8b5e276
...
...
@@ -69,8 +69,10 @@ class RawLoader2021(BaseRawLoader2021):
df_aa
[
'Seq'
]
=
df_aa
.
groupby
([
'ID'
,
'Kategori ISO nummer'
]).
cumcount
()
df_aa
=
df_aa
[[
'ID'
,
'Birth Year'
,
'Gender'
,
'Kategori ISO nummer'
,
'Kørselsdato'
,
'Seq'
]]
df_aa
[
'LendDate'
]
=
df_aa
.
apply
(
lambda
x
:
x
[
'Kørselsdato'
]
if
x
[
'Seq'
]
%
2
==
0
else
pd
.
NaT
,
axis
=
1
)
df_aa
[
'ReturnDate'
]
=
df_aa
.
apply
(
lambda
x
:
x
[
'Kørselsdato'
]
if
x
[
'Seq'
]
%
2
==
1
else
pd
.
NaT
,
axis
=
1
)
df_aa
[
'LendDate'
]
=
df_aa
.
apply
(
lambda
x
:
x
[
'Kørselsdato'
]
if
x
[
'Seq'
]
%
2
==
0
else
pd
.
NaT
,
axis
=
1
)
df_aa
[
'ReturnDate'
]
=
df_aa
.
apply
(
lambda
x
:
x
[
'Kørselsdato'
]
if
x
[
'Seq'
]
%
2
==
1
else
pd
.
NaT
,
axis
=
1
)
df_aa
[
'ReturnDate'
]
=
df_aa
.
groupby
([
'ID'
,
'Kategori ISO nummer'
])[
'ReturnDate'
].
shift
(
-
1
)
df_aa
=
df_aa
.
dropna
(
subset
=
[
'LendDate'
,
'ReturnDate'
],
thresh
=
1
)
...
...
@@ -349,4 +351,4 @@ class RawLoader2021(BaseRawLoader2021):
df
[
'Gender'
]
=
pd
.
Series
.
astype
(
df
[
'Gender'
],
dtype
=
str
)
df
[
'BirthYear'
]
=
pd
.
Series
.
astype
(
df
[
'BirthYear'
],
dtype
=
int
)
return
df
\ No newline at end of file
return
df
ml/src/tuning/run_all_tune_scripts.py
View file @
e8b5e276
...
...
@@ -20,4 +20,4 @@ def main():
tune_compliance_xgb_wb
.
main
()
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/tuning/tune_alarm_boost_wb.py
View file @
e8b5e276
...
...
@@ -4,15 +4,15 @@ tune_alarm_boost_wb.py
Grad. boost tune script for Alarm case on WanDB
"""
from
utility.settings
import
load_settings
from
sksurv.ensemble
import
GradientBoostingSurvivalAnalysis
from
sksurv.metrics
import
(
concordance_index_censored
,
concordance_index_ipcw
,
integrated_brier_score
)
from
sklearn.model_selection
import
KFold
from
utility.settings
import
load_settings
import
numpy
as
np
from
tools
import
data_loader
import
paths
as
pt
import
numpy
as
np
import
os
os
.
environ
[
"WANDB_SILENT"
]
=
"true"
...
...
@@ -128,4 +128,4 @@ def train_model():
wandb
.
log
({
"brier_score"
:
brier_score_mean
})
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/tuning/tune_alarm_rsf_wb.py
View file @
e8b5e276
...
...
@@ -66,7 +66,6 @@ def train_model():
config
=
wandb
.
config
# Load data
data_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"data.yaml"
)
target_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"alarm.yaml"
)
dl
=
data_loader
.
AlarmDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
"alarm_emb.csv"
,
...
...
@@ -115,4 +114,4 @@ def train_model():
wandb
.
log
({
"brier_score"
:
brier_score_mean
})
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/tuning/tune_complete_rf_wb.py
View file @
e8b5e276
...
...
@@ -98,9 +98,9 @@ def train_model():
res_validate
=
cross_validate
(
model
,
X
,
y
,
cv
=
skf
,
scoring
=
metrics
)
# Evaluate performance
accuracy
=
res_validate
[
f
'test_accuracy'
]
avg_prec
=
res_validate
[
f
'test_average_precision'
]
f1
=
res_validate
[
f
'test_f1'
]
accuracy
=
res_validate
[
'test_accuracy'
]
avg_prec
=
res_validate
[
'test_average_precision'
]
f1
=
res_validate
[
'test_f1'
]
# Log to wandb
wandb
.
log
({
"accuracy"
:
accuracy
})
...
...
@@ -108,4 +108,4 @@ def train_model():
wandb
.
log
({
"f1"
:
f1
})
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/tuning/tune_complete_xgb_wb.py
View file @
e8b5e276
...
...
@@ -122,9 +122,9 @@ def train_model():
res_validate
=
cross_validate
(
model
,
X
,
y
,
cv
=
skf
,
scoring
=
metrics
)
# Evaluate performance
accuracy
=
res_validate
[
f
'test_accuracy'
]
avg_prec
=
res_validate
[
f
'test_average_precision'
]
f1
=
res_validate
[
f
'test_f1'
]
accuracy
=
res_validate
[
'test_accuracy'
]
avg_prec
=
res_validate
[
'test_average_precision'
]
f1
=
res_validate
[
'test_f1'
]
# Log to wandb
wandb
.
log
({
"accuracy"
:
accuracy
})
...
...
@@ -132,4 +132,4 @@ def train_model():
wandb
.
log
({
"f1"
:
f1
})
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/tuning/tune_compliance_rf_wb.py
View file @
e8b5e276
...
...
@@ -98,9 +98,9 @@ def train_model():
res_validate
=
cross_validate
(
model
,
X
,
y
,
cv
=
skf
,
scoring
=
metrics
)
# Evaluate performance
accuracy
=
res_validate
[
f
'test_accuracy'
]
avg_prec
=
res_validate
[
f
'test_average_precision'
]
f1
=
res_validate
[
f
'test_f1'
]
accuracy
=
res_validate
[
'test_accuracy'
]
avg_prec
=
res_validate
[
'test_average_precision'
]
f1
=
res_validate
[
'test_f1'
]
# Log to wandb
wandb
.
log
({
"accuracy"
:
accuracy
})
...
...
@@ -108,4 +108,4 @@ def train_model():
wandb
.
log
({
"f1"
:
f1
})
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
ml/src/tuning/tune_compliance_xgb_wb.py
View file @
e8b5e276
...
...
@@ -122,9 +122,9 @@ def train_model():
res_validate
=
cross_validate
(
model
,
X
,
y
,
cv
=
skf
,
scoring
=
metrics
)
# Evaluate performance
accuracy
=
res_validate
[
f
'test_accuracy'
]
avg_prec
=
res_validate
[
f
'test_average_precision'
]
f1
=
res_validate
[
f
'test_f1'
]
accuracy
=
res_validate
[
'test_accuracy'
]
avg_prec
=
res_validate
[
'test_average_precision'
]
f1
=
res_validate
[
'test_f1'
]
# Log to wandb
wandb
.
log
({
"accuracy"
:
accuracy
})
...
...
@@ -132,4 +132,4 @@ def train_model():
wandb
.
log
({
"f1"
:
f1
})
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment