Source code for tools.labeler

"""
labeler.py
====================================
Labeler module to make class labels for cases.
"""

import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset

[docs]def make_complete_label(df: pd.DataFrame) -> pd.DataFrame: """ This method takes accumulated screenings and annotates citizens who have either completed or not completed their program based on the first screening :param df: a dataframe containing screenings :return: annotated dataframe """ df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1 df = do_citizens_complete_sessions(df) df = df.loc[df['Baseline'] == 1] df = df.reset_index(drop=True) return df
[docs]def make_compliance_label(df: pd.DataFrame) -> pd.DataFrame: """ This method takes accumulated screenings and annotates citizens who have either reached or not reached compliance in their program based on the first screening and assuming they completed :param df: a dataframe containing screenings :return: annotated dataframe """ df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1 df = do_citizens_reach_compliance(df) df = df.loc[(df['Baseline'] == 1) & (df['Complete'] == 1)] df = df.reset_index(drop=True) return df
[docs]def make_fall_label(df: pd.DataFrame): """ This method takes accumulated screenings and annotates citizens who have either fallen or not fallen in their program based on the first screening :param df: a dataframe containing screenings :return: annotated dataframe """ df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1 df = do_citizens_fall(df) df = df.loc[df['Baseline'] == 1] df = df.reset_index(drop=True) return df
[docs]def make_risk_label(df: pd.DataFrame, risk_period_months: int): """ This method takes accumulated screenings and annotates citizens who have either fallen or not fallen in a risk period. :param df: a dataframe containing screenings :param risk_period_months: the length of the risk period in months :return: annotated dataframe """ digi_falls = df[['CitizenId', 'NeedsReason', 'PhysicsReason', 'EndDate']].fillna('Ingen') digi_falls = digi_falls[digi_falls['NeedsReason'].str.contains("Fald/uheld") | digi_falls['PhysicsReason'].str.contains("Fald/uheld")] df['Risk'] = df[['CitizenId', 'EndDate']].apply(lambda x: annotate_falls(x, digi_falls, risk_period_months), axis=1) return df
[docs]def annotate_falls(row, digi_db, risk_period_months): """ Utility method to annotate rows in a dataframe if their citizen id appear in a fall data set for a specific time period. """ citizen_id = row['CitizenId'] current_date = pd.Timestamp(row['EndDate']) end_date = current_date + DateOffset(months=risk_period_months) digi_db['EndDate'] = pd.to_datetime(digi_db['EndDate']) timespan_digi_falls = digi_db.loc[(digi_db['EndDate'] >= current_date) & (digi_db['CitizenId'] == citizen_id) & (digi_db['EndDate'] <= end_date)] if len(timespan_digi_falls) > 0: return 1 return 0
[docs]def do_citizens_complete_sessions(df: pd.DataFrame) -> pd.DataFrame: """ This method evaluates if citizens in a dataframe have completed the sessions they've started :param df: a dataframe containing screenings :return: dataframe with a label for completed """ grp = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['HasCompletedSession'] df['Complete'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0) df = df.astype({"Complete": int}) return df
[docs]def do_citizens_reach_compliance(df: pd.DataFrame) -> pd.DataFrame: """ This method evaluates if citizens in a dataframe reach compliance during the sessions they've started :param df: a dataframe containing screenings :return: dataframe with a label for compliance """ grp = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['GotComplianceInSession'] df['Compliance'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0) df = df.astype({"Compliance": int}) return df
[docs]def do_citizens_fall(df: pd.DataFrame) -> pd.DataFrame: """ This method evaluates if citizens in a dataframe have fallen during the sessions they've started :param df: a dataframe containing screenings :return: dataframe with a label for fall """ grp = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['HasFallenInSession'] df['Fall'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0) df = df.astype({"Fall": int}) return df
[docs]def accumulate_screenings(df: pd.DataFrame, settings: dict) -> pd.DataFrame: """ This method accumulates screenings and annoates when a citizen complete, reach compliance or fall during a program :param df: a dataframe containing screenings :param settings: settings to use :return: dataframe with accmulated screenings """ for group_name, _ in df.groupby(['CitizenId', 'NumberSplit']): number_session = 0 cumsum_weeks = 0 cumsum_training = 0 citizen_df = df.loc[df['CitizenId'] == group_name[0]] items_weeks = citizen_df['NumberWeeks'].iteritems() items_training = citizen_df['NumberTraining'].iteritems() needs_reason = citizen_df['NeedsReason'].iteritems() physics_reason = citizen_df['PhysicsReason'].iteritems() has_fall_risk = citizen_df['HasFallRisk'].iteritems() mean_evaluation = citizen_df['MeanEvaluation'].iteritems() for (row_week, row_train, row_needs_reason, row_physics_reason, row_has_fall_risk, row_mean_evaluation) in zip( items_weeks, items_training, needs_reason, physics_reason, has_fall_risk, mean_evaluation): df.loc[row_week[0], 'NumberSession'] = number_session cumsum_weeks += row_week[1] cumsum_training += row_train[1] if (row_needs_reason[1] == 'Fald/uheld') | (row_physics_reason[1] == 'Fald/uheld') \ | row_has_fall_risk[1] is True: df.loc[row_week[0], 'HasFallenInSession'] = 1 else: df.loc[row_week[0], 'HasFallenInSession'] = 0 cond_weeks = cumsum_weeks >= settings['threshold_weeks'] cond_training = cumsum_training >= settings['threshold_training'] if cond_weeks and cond_training: cumsum_weeks = 0 cumsum_training = 0 number_session += 1 df.loc[row_week[0], 'HasCompletedSession'] = 1 df.loc[row_week[0], 'Baseline'] = 1 if row_mean_evaluation[1] >= 3.7: df.loc[row_week[0], 'GotComplianceInSession'] = 1 else: df.loc[row_week[0], 'GotComplianceInSession'] = 0 else: df.loc[row_week[0], 'HasCompletedSession'] = 0 df.loc[row_week[0], 'Baseline'] = 0 df.loc[row_week[0], 'GotComplianceInSession'] = 0 return df