Commit ac5bfa4a authored by thecml's avatar thecml
Browse files

put isoall in references

parent f5747195
import argparse
import os
import config as cfg
from src.data import parse_and_clean_data, make_screenings
from src.data import make_clusters, make_dataset_full
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -15,13 +15,7 @@ def main(year: int = '2020'):
sc = parser19.parse_screening_content(cfg.PATHS_2019[1], cfg.RAW_DATA_DIR_2019)
tc = parser19.parse_training_cancelled(cfg.PATHS_2019[1], cfg.RAW_DATA_DIR_2019)
ss = parser19.parse_status_set(cfg.PATHS_2019[1], cfg.RAW_DATA_DIR_2019)
if os.path.isfile(Path.joinpath(cfg.RAW_DATA_DIR_2019, cfg.PATHS_2019[5])):
ic = parser19.parse_iso_classes(cfg.PATHS_2019[5], cfg.RAW_DATA_DIR_2019)
else:
print("No iso class file found, continuing ...")
ic = pd.DataFrame()
ic = parser19.parse_iso_classes('isoall.txt', cfg.REFERENCES_DIR)
fd = parser19.parse_fall_data(cfg.PATHS_2019[2], cfg.RAW_DATA_DIR_2019)
else:
parser20 = parser.Parser2020()
......@@ -30,13 +24,7 @@ def main(year: int = '2020'):
sc = parser20.parse_screening_content(cfg.PATHS_2020[1], cfg.RAW_DATA_DIR_2020)
tc = parser20.parse_training_cancelled(cfg.PATHS_2020[1], cfg.RAW_DATA_DIR_2020)
ss = parser20.parse_status_set(cfg.PATHS_2020[1], cfg.RAW_DATA_DIR_2020)
if os.path.isfile(Path.joinpath(cfg.RAW_DATA_DIR_2019, cfg.PATHS_2019[5])):
ic = parser20.parse_iso_classes(cfg.PATHS_2019[5], cfg.RAW_DATA_DIR_2019)
else:
print("No iso class file found, continuing ...")
ic = pd.DataFrame()
ic = parser20.parse_iso_classes('isoall.txt', cfg.REFERENCES_DIR)
fd = parser20.parse_fall_data(cfg.PATHS_2020[2], cfg.RAW_DATA_DIR_2020)
cleaner2020 = cleaner.Cleaner2020()
......
......@@ -76,8 +76,7 @@ class Cleaner2020(BaseCleaner):
ats = remove_rows_with_old_dates(ats, cfg.LEND_DATE)
ats = remove_deprecated_device_data(ats)
ats = remove_tainted_histories(ats)
if not ic.empty:
ats = drop_invalid_devices(ats, ic)
ats = drop_invalid_devices(ats, ic)
return ats
def clean_fall_data(self, fd: pd.DataFrame) -> pd.DataFrame:
......@@ -126,8 +125,7 @@ class Cleaner2019(BaseCleaner):
ats = remove_rows_with_old_dates(ats, cfg.RETURN_DATE)
ats = remove_deprecated_device_data(ats)
ats = remove_tainted_histories(ats)
if not ic.empty:
ats = drop_invalid_devices(ats, ic)
ats = drop_invalid_devices(ats, ic)
return ats
def clean_fall_data(self, fd: pd.DataFrame) -> pd.DataFrame:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment