make_dataset_ohe.py 2.06 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/env python
import paths as pt
from tools import file_reader, file_writer
from tools import preprocessor
from utility import embedder
import pandas as pd
import numpy as np
from pathlib import Path
import yaml

def main():
12
    for target_name in ["Complete", "Compliance", "Alarm", "Fall"]:
13
14
15
16
17
        # Load settings for target
        with open(Path.joinpath(pt.CONFIGS_DIR,
                                f'{target_name.lower()}_emb.yaml'), 'r') as stream:
            settings = yaml.safe_load(stream)
            
18
        if target_name in ["Complete", "Compliance", "Alarm"]:
19
            ats = {str(i)+'Ats':str for i in range(1, settings['ats_resolution']+1)}
20
            df = file_reader.read_csv(pt.PROCESSED_DATA_DIR, f'{target_name.lower()}.csv', converters=ats)
21
22
23
24
        else:
            ex = {str(i)+'Ex':str for i in range(1, settings['ex_resolution']+1)}
            ats = {str(i)+'Ats':str for i in range(1, settings['ats_resolution']+1)}
            converters = {**ex, **ats}
25
            df = file_reader.read_csv(pt.PROCESSED_DATA_DIR, f'{target_name.lower()}.csv', converters=converters)
26
27
            
        # One-hot encode targets
28
        if target_name in ["Complete", "Compliance", "Alarm"]:
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
            ats_cols = [str(i)+'Ats' for i in range(1, settings['ats_resolution']+1)]
            df_enc = preprocessor.one_hot_encode(df, ats_cols)
            df = pd.concat([df.drop(ats_cols + [target_name], axis=1),
                            df_enc, df[[target_name]]], axis=1)
        else:
            ex_cols = [str(i)+'Ex' for i in range(1, settings['ex_resolution']+1)]
            ats_cols = [str(i)+'Ats' for i in range(1, settings['ats_resolution']+1)]
            total_cols = ex_cols + ats_cols
            df_enc = preprocessor.one_hot_encode(df, total_cols)
            df = pd.concat([df.drop(total_cols + [target_name], axis=1),
                            df_enc, df[[target_name]]], axis=1)
            
        # Save dataframe
        file_writer.write_csv(df, pt.PROCESSED_DATA_DIR, f'{target_name.lower()}_ohe.csv')
    
if __name__ == "__main__":
    main()