Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christian Fischer Pedersen
AIR
Commits
611cdee2
Commit
611cdee2
authored
Dec 20, 2021
by
Christian Marius Lillelund
Browse files
updated final model training scripts
parent
3839a8c1
Pipeline
#100347
passed with stage
in 4 minutes and 18 seconds
Changes
8
Pipelines
1
Show whitespace changes
Inline
Side-by-side
ml/src/analysis/evaluate_
baseline
.py
→
ml/src/analysis/evaluate_
classification_cases
.py
View file @
611cdee2
"""
evaluate_
baseline
.py
evaluate_
classification_cases
.py
====================================
Script to make a baseline evaluation of cases
Script to make a baseline evaluation of
classification
cases
using CV with different datasets and classifers.
"""
...
...
ml/src/analysis/evaluate_survival_case.py
0 → 100644
View file @
611cdee2
"""
evaluate_survival_case.py
====================================
Script to make a baseline evaluation of the survival case using CV.
"""
import
numpy
as
np
import
paths
as
pt
from
typing
import
List
from
tools
import
file_writer
,
preprocessor
,
data_loader
from
pathlib
import
Path
import
csv
from
utility.settings
import
load_settings
from
utility.metrics
import
compute_mean
,
compute_std
from
io
import
BytesIO
import
shutil
def
main
():
pass
if
__name__
==
'__main__'
:
main
()
ml/src/api/main.py
View file @
611cdee2
...
...
@@ -6,12 +6,11 @@ The main FastAPI module class.
import
uvicorn
import
pydantic
import
shap
import
os
import
csv
import
joblib
import
pandas
as
pd
import
numpy
as
np
import
xgboost
as
xgb
import
io
from
typing
import
List
,
Optional
from
fastapi
import
Depends
,
FastAPI
,
HTTPException
,
Request
...
...
@@ -196,6 +195,8 @@ def predict_alarm(incoming_data: InputData):
drop_after_one_year
=
1
-
float
(
df_surv
.
iloc
[
index_at_one_year
][
0
])
alarm_arguments
=
generate_alarm_arguments
(
df
,
ats_resolution
,
drop_after_one_year
)
# TODO: Use predict() to predict time to event
return
{
'EventTimes'
:
event_times
,
'SurvivalProbs'
:
surv_probs
,
...
...
@@ -210,16 +211,16 @@ def predict_training(incoming_data: InputData):
data
=
validate_data
(
incoming_data
)
df
=
prepare_data
(
data
,
ats_resolution
)
complete_model
=
read_joblib
(
f
'complete_xgb.joblib'
)
compliance_model
=
read_joblib
(
f
'compliance_xgb.joblib'
)
fall_model
=
read_joblib
(
f
'fall_xgb.joblib'
)
complete_model
=
xgb
.
XGBClassifier
()
complete_model
.
load_model
(
read_joblib
(
f
'complete_xgb.joblib'
))
compliance_model
=
xgb
.
XGBClassifier
()
compliance_model
.
load_model
(
read_joblib
(
f
'compliance_xgb.joblib'
))
df_for_complete
=
add_embedding
(
df
.
copy
(),
'complete'
,
ats_resolution
)
df_for_compliance
=
add_embedding
(
df
.
copy
(),
'compliance'
,
ats_resolution
)
df_for_fall
=
add_embedding
(
df
.
copy
(),
'fall'
,
ats_resolution
)
complete_prob
=
complete_model
.
predict_proba
(
df_for_complete
).
flatten
()[
1
]
fall_prob
=
fall_model
.
predict_proba
(
df_for_fall
).
flatten
()[
1
]
if
complete_prob
>
0.5
:
compliance_prob
=
compliance_model
.
predict_proba
(
df_for_compliance
).
flatten
()[
1
]
else
:
...
...
@@ -229,7 +230,6 @@ def predict_training(incoming_data: InputData):
return
{
'CompleteProb'
:
float
(
complete_prob
),
'FallProb'
:
float
(
fall_prob
),
'Compliance'
:
int
(
compliance
),
'CompleteArguments'
:
complete_arguments
}
...
...
ml/src/model/train_alarm_model.py
View file @
611cdee2
#!/usr/bin/env python
"""
train_alarm_model.py
====================================
Script to train the model for the Alarm case.
"""
import
paths
as
pt
from
tools
import
data_loader
,
file_writer
from
utility.settings
import
load_settings
from
sksurv.ensemble
import
RandomSurvivalForest
import
pandas
as
pd
import
numpy
as
np
from
sksurv.ensemble
import
GradientBoostingSurvivalAnalysis
from
pathlib
import
Path
from
io
import
BytesIO
import
shutil
...
...
@@ -16,12 +19,18 @@ def main():
target_settings
).
load_data
()
X
,
y
=
dl
.
get_data
()
model
=
RandomSurvivalForest
(
n_estimators
=
200
,
max_depth
=
3
,
n_jobs
=-
1
,
random_state
=
0
)
model
=
GradientBoostingSurvivalAnalysis
(
n_estimators
=
200
,
learning_rate
=
0.1
,
max_depth
=
14
,
loss
=
'coxph'
,
min_samples_split
=
4
,
max_features
=
'sqrt'
,
dropout_rate
=
0.5
,
random_state
=
0
)
model
.
fit
(
X
,
y
)
file_path
=
pt
.
MODELS_DIR
file_name
=
f
'alarm.joblib'
file_name
=
f
'alarm
_gradboost
.joblib'
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_joblib
(
model
,
outfile
)
...
...
ml/src/model/train_complete_model.py
0 → 100644
View file @
611cdee2
"""
train_complete_model.py
====================================
Script to train the model for the Complete case.
"""
import
paths
as
pt
from
tools
import
file_writer
,
data_loader
from
utility.settings
import
load_settings
import
xgboost
as
xgb
from
pathlib
import
Path
from
io
import
BytesIO
import
shutil
def
main
():
target_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"complete.yaml"
)
dl
=
data_loader
.
CompleteDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
"complete_emb.csv"
,
target_settings
).
load_data
()
X
,
y
=
dl
.
get_data
()
params
=
{
"n_estimators"
:
100
,
"booster"
:
"gbtree"
,
"max_depth"
:
7
,
"gamma"
:
1
,
"colsample_bytree"
:
0.6
,
"min_child_weight"
:
7
,
"reg_alpha"
:
128
,
"reg_lambda"
:
0.9
,
"learning_rate"
:
0.1
,
"subsample"
:
1
,
"use_label_encoder"
:
False
,
"eval_metric"
:
"logloss"
,
"objective"
:
"binary:logistic"
,
"random_state"
:
0
}
model
=
xgb
.
XGBClassifier
(
**
params
)
model
.
fit
(
X
,
y
)
file_path
=
pt
.
MODELS_DIR
file_name
=
f
'complete_xgboost.joblib'
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_joblib
(
model
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
if
__name__
==
'__main__'
:
main
()
ml/src/model/train_compliance_model.py
0 → 100644
View file @
611cdee2
"""
train_compliance_model.py
====================================
Script to train the model for the Compliance case.
"""
import
paths
as
pt
from
tools
import
file_writer
,
data_loader
from
utility.settings
import
load_settings
import
xgboost
as
xgb
from
pathlib
import
Path
from
io
import
BytesIO
import
shutil
def
main
():
target_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"compliance.yaml"
)
dl
=
data_loader
.
ComplianceDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
"compliance_emb.csv"
,
target_settings
).
load_data
()
X
,
y
=
dl
.
get_data
()
params
=
{
"n_estimators"
:
100
,
"booster"
:
"gbtree"
,
"max_depth"
:
3
,
"gamma"
:
5
,
"colsample_bytree"
:
0.6
,
"min_child_weight"
:
4
,
"reg_alpha"
:
40
,
"reg_lambda"
:
0.6
,
"learning_rate"
:
0.2
,
"subsample"
:
1
,
"use_label_encoder"
:
False
,
"eval_metric"
:
"logloss"
,
"objective"
:
"binary:logistic"
,
"random_state"
:
0
}
model
=
xgb
.
XGBClassifier
(
**
params
)
model
.
fit
(
X
,
y
)
file_path
=
pt
.
MODELS_DIR
file_name
=
f
'complete_xgboost.joblib'
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_joblib
(
model
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
if
__name__
==
'__main__'
:
main
()
ml/src/model/train_rf_models.py
deleted
100644 → 0
View file @
3839a8c1
#!/usr/bin/env python
import
paths
as
pt
from
tools
import
file_writer
,
data_loader
from
utility.settings
import
load_settings
from
sklearn.ensemble
import
RandomForestClassifier
from
pathlib
import
Path
import
shutil
from
io
import
BytesIO
CASES
=
[
"Complete"
,
"Compliance"
,
"Fall"
,
"Risk"
]
DATASET_VERSION
=
'emb'
def
main
():
for
case
in
CASES
:
if
case
==
"Complete"
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"complete.yaml"
)
file_name
=
f
'complete_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
CompleteDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
elif
case
==
"Compliance"
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"compliance.yaml"
)
file_name
=
f
'compliance_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
ComplianceDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
elif
case
==
"Fall"
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"fall.yaml"
)
file_name
=
f
'fall_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
FallDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
else
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"fall.yaml"
)
file_name
=
f
'risk_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
RiskDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
model
=
RandomForestClassifier
(
n_estimators
=
1
,
bootstrap
=
False
,
min_samples_leaf
=
0.1
,
min_samples_split
=
0.54
,
max_depth
=
29
,
random_state
=
0
)
model
.
fit
(
X
,
y
)
file_path
=
pt
.
MODELS_DIR
file_name
=
f
'
{
case
.
lower
()
}
_rf.joblib'
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_joblib
(
model
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
if
__name__
==
'__main__'
:
main
()
ml/src/model/train_xgb_models.py
deleted
100644 → 0
View file @
3839a8c1
#!/usr/bin/env python
import
numpy
as
np
import
paths
as
pt
from
tools
import
file_writer
,
data_loader
from
utility.settings
import
load_settings
import
xgboost
as
xgb
from
pathlib
import
Path
from
io
import
BytesIO
import
shutil
CASES
=
[
"Complete"
,
"Compliance"
,
"Fall"
,
"Risk"
]
DATASET_VERSION
=
'emb'
def
main
():
for
case
in
CASES
:
if
case
==
"Complete"
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"complete.yaml"
)
file_name
=
f
'complete_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
CompleteDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
elif
case
==
"Compliance"
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"compliance.yaml"
)
file_name
=
f
'compliance_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
ComplianceDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
elif
case
==
"Fall"
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"fall.yaml"
)
file_name
=
f
'fall_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
FallDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
else
:
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"risk.yaml"
)
file_name
=
f
'risk_
{
DATASET_VERSION
}
.csv'
dl
=
data_loader
.
RiskDataLoader
(
pt
.
PROCESSED_DATA_DIR
,
file_name
,
settings
).
load_data
()
X
,
y
=
dl
.
prepare_data
()
neg
,
pos
=
np
.
bincount
(
y
)
scale_pos_weight
=
neg
/
pos
params
=
{
"n_estimators"
:
400
,
"learning_rate"
:
0.1
,
"scale_pos_weight"
:
scale_pos_weight
,
"objective"
:
"binary:logistic"
,
"use_label_encoder"
:
False
,
"eval_metric"
:
"logloss"
,
"random_state"
:
0
}
model
=
xgb
.
XGBClassifier
(
**
params
)
model
.
fit
(
X
,
y
)
file_path
=
pt
.
MODELS_DIR
file_name
=
f
'
{
case
.
lower
()
}
_xgb.joblib'
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_joblib
(
model
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment