Commit 9fcead93 authored by thecml's avatar thecml
Browse files

updated scripts for new loader

parent 785379cb
Pipeline #94221 failed with stage
in 4 minutes and 17 seconds
#!/usr/bin/env python #!/usr/bin/env python
from tools import file_writer, raw_loader, cleaner from tools import file_writer, raw_loader, cleaner
import paths as pt import paths as pt
from io import BytesIO
from pathlib import Path
import shutil
def main(): def main():
loader = raw_loader.RawLoader() loader = raw_loader.RawLoader()
...@@ -20,12 +23,41 @@ def main(): ...@@ -20,12 +23,41 @@ def main():
training_cancelled = cleaner2020.clean_training_cancelled(tc, patient_data) training_cancelled = cleaner2020.clean_training_cancelled(tc, patient_data)
assistive_aids = cleaner2020.clean_assistive_aids(ats, ic) assistive_aids = cleaner2020.clean_assistive_aids(ats, ic)
file_writer.write_pickle(screening_content, pt.INTERIM_DATA_DIR, 'sc.pkl') with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'sc.pkl'), 'wb') as fd:
file_writer.write_pickle(status_set, pt.INTERIM_DATA_DIR, 'ss.pkl') outfile = BytesIO()
file_writer.write_pickle(training_done, pt.INTERIM_DATA_DIR, 'td.pkl') file_writer.write_pickle(screening_content, outfile)
file_writer.write_pickle(training_cancelled, pt.INTERIM_DATA_DIR, 'tc.pkl') outfile.seek(0)
file_writer.write_pickle(assistive_aids, pt.INTERIM_DATA_DIR, 'ats.pkl') shutil.copyfileobj(outfile, fd)
file_writer.write_pickle(ic, pt.INTERIM_DATA_DIR, 'ic.pkl')
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ss.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(status_set, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'td.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(training_done, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'tc.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(training_cancelled, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ats.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(assistive_aids, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ic.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(ic, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
\ No newline at end of file
...@@ -9,9 +9,14 @@ import shutil ...@@ -9,9 +9,14 @@ import shutil
from pathlib import Path from pathlib import Path
def main(ats_resolution: int = None): def main(ats_resolution: int = None):
screenings = file_reader.read_csv(pt.INTERIM_DATA_DIR, 'screenings.csv', infile = StringIO()
converters={'CitizenId': str}) file_path = pt.INTERIM_DATA_DIR
file_name = 'screenings.csv'
with open(Path.joinpath(file_path, file_name), 'r') as fd:
shutil.copyfileobj(fd, infile)
infile.seek(0)
screenings = file_reader.read_csv(infile, converters={'CitizenId': str})
data_settings = load_settings(pt.CONFIGS_DIR, 'data.yaml') data_settings = load_settings(pt.CONFIGS_DIR, 'data.yaml')
if ats_resolution == None: if ats_resolution == None:
ats_resolution = data_settings['ats_resolution'] ats_resolution = data_settings['ats_resolution']
......
...@@ -8,19 +8,49 @@ from tools import file_reader, file_writer, inputter ...@@ -8,19 +8,49 @@ from tools import file_reader, file_writer, inputter
from utility.settings import load_settings from utility.settings import load_settings
from utility import data_dto, dataset from utility import data_dto, dataset
from pandas.tseries.offsets import DateOffset from pandas.tseries.offsets import DateOffset
from io import StringIO, BytesIO
import shutil
def main(): def main():
sc = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'sc.pkl') with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'sc.pkl'), 'rb') as fd:
ss = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'ss.pkl') infile = BytesIO()
td = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'td.pkl') shutil.copyfileobj(fd, infile)
tc = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'tc.pkl') infile.seek(0)
ats = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'ats.pkl') sc = file_reader.read_pickle(infile)
settings = load_settings("data.yaml")
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ss.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
ss = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'td.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
td = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'tc.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
tc = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ats.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
ats = file_reader.read_pickle(infile)
settings = load_settings(pt.CONFIGS_DIR, "data.yaml")
data = data_dto.Data(sc, ss, td, tc, ats) data = data_dto.Data(sc, ss, td, tc, ats)
screenings = get_screenings(data, settings) screenings = get_screenings(data, settings)
file_writer.write_csv(screenings, pt.INTERIM_DATA_DIR, 'screenings.csv') with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'screenings.csv'), 'w', newline='') as fd:
outfile = StringIO()
file_writer.write_csv(screenings, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
def get_screenings(data, settings): def get_screenings(data, settings):
ids = dataset.create_union_of_ids(data.sc, data.ss, data.td, data.tc) ids = dataset.create_union_of_ids(data.sc, data.ss, data.td, data.tc)
......
...@@ -9,7 +9,7 @@ import abc ...@@ -9,7 +9,7 @@ import abc
from io import BytesIO, StringIO from io import BytesIO, StringIO
import shutil import shutil
class BaseRawLoeader(metaclass=abc.ABCMeta): class BaseRawLoader(metaclass=abc.ABCMeta):
@abc.abstractmethod @abc.abstractmethod
def load_status_set(self, file_name, path): def load_status_set(self, file_name, path):
"""load the DiGiRehab StatusSet data set""" """load the DiGiRehab StatusSet data set"""
...@@ -34,11 +34,11 @@ class BaseRawLoeader(metaclass=abc.ABCMeta): ...@@ -34,11 +34,11 @@ class BaseRawLoeader(metaclass=abc.ABCMeta):
def load_clusters(self, file_name, path): def load_clusters(self, file_name, path):
"""load the generated clusters data set""" """load the generated clusters data set"""
@abc.abstractmethod @abc.abstractmethod
def load_iso_classes(self, file_name, path): def load_iso_classes(self, file_name, path):
"""load the ISO classes data set""" """load the ISO classes data set"""
class RawLoader(BaseRawLoeader): class RawLoader(BaseRawLoader):
def assert_datetime(self, entry: str) -> bool: def assert_datetime(self, entry: str) -> bool:
""" """
This method checks whether an entry can This method checks whether an entry can
......
...@@ -3,7 +3,7 @@ import pandas as pd ...@@ -3,7 +3,7 @@ import pandas as pd
@dataclass @dataclass
class Data: class Data:
'''Class for keeping track of feature data''' '''Dto for keeping track of feature data'''
sc: pd.DataFrame sc: pd.DataFrame
ss: pd.DataFrame ss: pd.DataFrame
td: pd.DataFrame td: pd.DataFrame
......
...@@ -23,10 +23,15 @@ def test_fit(): ...@@ -23,10 +23,15 @@ def test_fit():
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1 n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:] df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = "Complete"
train_ratio = 0.7
X_train, X_val, y_train, y_val, _ = preprocessor.prepare_data_for_emb(df_to_enc, X_train, X_val, y_train, y_val, _ = preprocessor.prepare_data_for_emb(df_to_enc,
"Complete", target_name,
0.7) train_ratio)
num_epochs = 10
network = neural_embedder.NeuralEmbedder(df=df_to_enc, target_name="Complete") network = neural_embedder.NeuralEmbedder(df=df_to_enc,
target_name=target_name,
epochs=num_epochs)
history = network.fit(X_train, y_train, X_val, y_val) history = network.fit(X_train, y_train, X_val, y_val)
assert len(history.history['accuracy']) == 10
\ No newline at end of file assert len(history.history['accuracy']) == num_epochs
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment