train_complete_model.py 1.45 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""
train_complete_model.py
====================================
Script to train the model for the Complete case.
"""

import paths as pt
from tools import file_writer, data_loader
from utility.settings import load_settings
import xgboost as xgb
from pathlib import Path
from io import BytesIO
import shutil

def main():
    target_settings = load_settings(pt.CONFIGS_DIR, "complete.yaml")
    dl = data_loader.CompleteDataLoader(pt.PROCESSED_DATA_DIR,
                                       "complete_emb.csv",
                                       target_settings).load_data()
    X, y = dl.get_data()
    
    params = {"n_estimators": 100,
              "booster": "gbtree",
              "max_depth": 7,
              "gamma": 1,
              "colsample_bytree": 0.6,
              "min_child_weight": 7,
              "reg_alpha": 128,
              "reg_lambda": 0.9,
              "learning_rate": 0.1,
              "subsample": 1,
              "use_label_encoder": False,
              "eval_metric": "logloss",
              "objective": "binary:logistic",
              "random_state": 0}

    model = xgb.XGBClassifier(**params)
    model.fit(X, y)

    file_path = pt.MODELS_DIR
    file_name = f'complete_xgboost.joblib'
    with open(Path.joinpath(file_path, file_name), 'wb') as fd:
        outfile = BytesIO()
        file_writer.write_joblib(model, outfile)
        outfile.seek(0)
        shutil.copyfileobj(outfile, fd)

if __name__ == '__main__':
    main()