client.py 2.43 KB
Newer Older
Christian Marius Lillelund's avatar
Christian Marius Lillelund committed
1
2
3
"""
client.py
====================================
4
A command line application that can create various datasets for AIR.
Christian Marius Lillelund's avatar
Christian Marius Lillelund committed
5
6
"""

7
import argparse
8
import paths as pt
9
10
11
12
13
14
15
16
from src.data import (load_and_clean_data,
                      make_screening_data,
                      make_alarm_data,
                      make_dataset_full,
                      make_dataset_count,
                      make_dataset_emb,
                      make_dataset_ohe,
                      make_dataset_ordinal)
17

18
19
20
21
22
23
24
def str2bool(v):
    if isinstance(v, bool):
       return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
25
    else:
26
        raise argparse.ArgumentTypeError('Boolean value expected.')
27
28

def parse_arguments():
29
30
    parser = argparse.ArgumentParser(description='A command line application that' +
                                     ' can create various datasets for AIR.')
31
    parser.add_argument('--dataset-version', type=str, default="emb",
32
33
                        choices=['emb', 'ohe', 'count', 'ordinal'],
                        help='string indicating dataset version')
34
35
36
37
    return parser.parse_args()

def main():
    parsed_args = parse_arguments()
38
39
    dataset_version = parsed_args.dataset_version
    print(f"Client started. Using this configuration:")
40
    print(f"Raw data dictionary: {pt.RAW_DATA_DIR_2021}")
41
    print(f"Dataset version: {dataset_version}")
42
    
43
44
    print("Now parsing and cleaning data ...")        
    load_and_clean_data.main()
45
    
46
47
48
49
50
    print("Making screenings ...")
    make_screening_data.main()

    print("Making full dataset ...")
    make_dataset_full.main()
51
    
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
    print("Making alarm dataset ...")
    make_alarm_data.main()

    if dataset_version == "emb":
        print("Making dataset with embedded ats ...")
        make_dataset_emb.main()
    elif dataset_version == "ohe":
        print("Making dataset with one-hot-encoded ats ...")
        make_dataset_ohe.main()
    elif dataset_version == "count":
        print("Making dataset with ats columns as counts ...")
        make_dataset_count.main()
    else:
        print("Making dataset with ats columns as ordinal values ...")
        make_dataset_ordinal.main()
        
    print("\nCompleted generating datasets at:")
    print(f"Interim data dictionary: {pt.INTERIM_DATA_DIR}")
    print(f"Processed data dictionary: {pt.PROCESSED_DATA_DIR}\n")
71
72
73
    
if __name__ == "__main__":
    main()