Commit 775541e3 authored by thecml's avatar thecml
Browse files

added client option to only make models

parent b6a8c57e
Pipeline #54221 passed with stage
in 2 minutes and 59 seconds
......@@ -23,10 +23,14 @@ def parse_arguments():
choices=['emb', 'ohe'], help='string indicating dataset version')
parser.add_argument("--enable-visualization", type=str2bool, nargs='?',
const=True, default=False,
help="bolean indicating if visualization should be enabled.")
help="boolean indicating if visualization should be enabled.")
parser.add_argument("--use-real-ats-names", type=str2bool, nargs='?',
const=True, default=False,
help="bolean indicating if we should use real ats names.")
help="boolean indicating if we should use real ats names.")
parser.add_argument("--run-full-pipeline", type=str2bool, nargs='?',
const=True, default=True,
help="boolean indicating if we should run full pipeline. " +
"set to false to only make models")
return parser.parse_args()
def main():
......@@ -35,42 +39,44 @@ def main():
dataset_version = parsed_args.dataset_version
enable_visualization = parsed_args.enable_visualization
use_real_ats_names = parsed_args.use_real_ats_names
run_full_pipeline = parsed_args.run_full_pipeline
print(f"Client started. Using this configuration:")
print(f"Raw data dictionary: {cfg.RAW_DATA_DIR_2020}")
print(f"Dataset year: {dataset_year}")
print(f"Dataset version: {dataset_version}")
print(f"Visualization enabled: {enable_visualization}")
print(f"Use real ATS names: {use_real_ats_names}\n")
print("Now parsing and cleaning data ...")
if dataset_year == '2019':
parse_and_clean_data.main(year=dataset_year)
else:
parse_and_clean_data.main()
print(f"Use real ATS names: {use_real_ats_names}")
print(f"Run full pipeline: {run_full_pipeline}\n")
if run_full_pipeline:
print("Now parsing and cleaning data ...")
if dataset_year == '2019':
parse_and_clean_data.main(year=dataset_year)
else:
parse_and_clean_data.main()
print("Extracting screenings ...")
make_screenings.main()
print("Extracting screenings ...")
make_screenings.main()
print("Making clusters ...")
make_clusters.main()
print(f"Completed making cluster model. It can be found at: {cfg.CLUSTERS_DIR}\n")
print("Making clusters ...")
make_clusters.main()
print(f"Completed making cluster model. It can be found at: {cfg.CLUSTERS_DIR}\n")
print("Making full dataset ...")
make_dataset_full.main(use_real_ats_names)
print("Making full dataset ...")
make_dataset_full.main(use_real_ats_names)
if dataset_version == "emb":
print("Making dataset with embedded ats ...")
make_dataset_emb.main(enable_visualization)
else:
print("Making dataset with one-hot encoded ats ...")
make_dataset_count.main()
if dataset_version == "emb":
print("Making dataset with embedded ats ...")
make_dataset_emb.main(enable_visualization)
else:
print("Making dataset with one-hot encoded ats ...")
make_dataset_count.main()
print("\nCompleted generating datasets at:")
print(f"Interim data dictionary: {cfg.INTERIM_DATA_DIR}")
print(f"Processed data dictionary: {cfg.PROCESSED_DATA_DIR}\n")
print("\nCompleted generating datasets at:")
print(f"Interim data dictionary: {cfg.INTERIM_DATA_DIR}")
print(f"Processed data dictionary: {cfg.PROCESSED_DATA_DIR}\n")
print(f"Now making 4 XGBoost models based on version: {dataset_version} ...")
print(f"Making 4 XGBoost models based on version: {dataset_version} ...\n")
make_xgb_models.main()
print(f"Completed making models. Models and SHAP plots can be found at:\n" +
f"{cfg.COMPLETE_XGB_DIR}\n" + f"{cfg.COMPLIANCE_XGB_DIR}\n" +
f"{cfg.FALL_XGB_DIR}\n" + f"{cfg.FALL_TEST_XGB_DIR}" + "\n")
......
.. air documentation master file, created by
sphinx-quickstart on Wed Apr 28 11:13:45 2021.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to air's documentation!
===============================
.. toctree::
:maxdepth: 2
:caption: Contents:
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
......@@ -36,8 +36,16 @@
"name": "python388jvsc74a57bd059ff6fbb0321898508cf6243593820bf2585fcfb6693fd00e85ec94ed8847fd0"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"version": ""
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"orig_nbformat": 2
},
......
......@@ -43,10 +43,7 @@ def main(year: int = '2020'):
file_writer.write_pickle(training_done, cfg.INTERIM_DATA_DIR, 'td.pkl')
file_writer.write_pickle(training_cancelled, cfg.INTERIM_DATA_DIR, 'tc.pkl')
file_writer.write_pickle(assistive_aids, cfg.INTERIM_DATA_DIR, 'ats.pkl')
if not ic.empty:
file_writer.write_pickle(ic, cfg.INTERIM_DATA_DIR, 'ic.pkl')
file_writer.write_pickle(ic, cfg.INTERIM_DATA_DIR, 'ic.pkl')
file_writer.write_pickle(fall_data, cfg.INTERIM_DATA_DIR, 'fd.pkl')
if __name__ == "__main__":
......
import os
from typing import List, Tuple
import numpy as np
import pandas as pd
......@@ -48,7 +47,7 @@ class NeuralEmbedder:
optimizer_fn: str = 'Adam',
metrics: List[str] = ['accuracy'],
epochs: int = 10,
batch_size: int = 128,
batch_size: int = 32,
verbose: bool = False,
artifacts_path: str = 'artifacts'):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment