Source code for tools.labeler
"""
labeler.py
====================================
Labeler module to make class labels for cases.
"""
import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset
[docs]def make_complete_label(df: pd.DataFrame) -> pd.DataFrame:
"""
This method takes accumulated screenings and annotates citizens
who have either completed or not completed their program based
on the first screening
:param df: a dataframe containing screenings
:return: annotated dataframe
"""
df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1
df = do_citizens_complete_sessions(df)
df = df.loc[df['Baseline'] == 1]
df = df.reset_index(drop=True)
return df
[docs]def make_compliance_label(df: pd.DataFrame) -> pd.DataFrame:
"""
This method takes accumulated screenings and annotates
citizens who have either reached or not reached compliance
in their program based on the first screening and assuming they completed
:param df: a dataframe containing screenings
:return: annotated dataframe
"""
df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1
df = do_citizens_reach_compliance(df)
df = df.loc[(df['Baseline'] == 1) & (df['Complete'] == 1)]
df = df.reset_index(drop=True)
return df
[docs]def make_fall_label(df: pd.DataFrame):
"""
This method takes accumulated screenings and annotates
citizens who have either fallen or not fallen in their
program based on the first screening
:param df: a dataframe containing screenings
:return: annotated dataframe
"""
df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1
df = do_citizens_fall(df)
df = df.loc[df['Baseline'] == 1]
df = df.reset_index(drop=True)
return df
[docs]def make_risk_label(df: pd.DataFrame, risk_period_months: int):
"""
This method takes accumulated screenings and annotates
citizens who have either fallen or not fallen in a risk period.
:param df: a dataframe containing screenings
:param risk_period_months: the length of the risk period in months
:return: annotated dataframe
"""
digi_falls = df[['CitizenId', 'NeedsReason', 'PhysicsReason', 'EndDate']].fillna('Ingen')
digi_falls = digi_falls[digi_falls['NeedsReason'].str.contains("Fald/uheld")
| digi_falls['PhysicsReason'].str.contains("Fald/uheld")]
df['Risk'] = df[['CitizenId', 'EndDate']].apply(lambda x:
annotate_falls(x, digi_falls, risk_period_months), axis=1)
return df
[docs]def annotate_falls(row, digi_db, risk_period_months):
"""
Utility method to annotate rows in a dataframe
if their citizen id appear in a fall data set
for a specific time period.
"""
citizen_id = row['CitizenId']
current_date = pd.Timestamp(row['EndDate'])
end_date = current_date + DateOffset(months=risk_period_months)
digi_db['EndDate'] = pd.to_datetime(digi_db['EndDate'])
timespan_digi_falls = digi_db.loc[(digi_db['EndDate'] >= current_date)
& (digi_db['CitizenId'] == citizen_id)
& (digi_db['EndDate'] <= end_date)]
if len(timespan_digi_falls) > 0:
return 1
return 0
[docs]def do_citizens_complete_sessions(df: pd.DataFrame) -> pd.DataFrame:
"""
This method evaluates if citizens in a dataframe
have completed the sessions they've started
:param df: a dataframe containing screenings
:return: dataframe with a label for completed
"""
grp = df.groupby(['CitizenId', 'NumberSplit',
'NumberSession'])['HasCompletedSession']
df['Complete'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Complete": int})
return df
[docs]def do_citizens_reach_compliance(df: pd.DataFrame) -> pd.DataFrame:
"""
This method evaluates if citizens in a dataframe
reach compliance during the sessions they've started
:param df: a dataframe containing screenings
:return: dataframe with a label for compliance
"""
grp = df.groupby(['CitizenId', 'NumberSplit',
'NumberSession'])['GotComplianceInSession']
df['Compliance'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Compliance": int})
return df
[docs]def do_citizens_fall(df: pd.DataFrame) -> pd.DataFrame:
"""
This method evaluates if citizens in a dataframe
have fallen during the sessions they've started
:param df: a dataframe containing screenings
:return: dataframe with a label for fall
"""
grp = df.groupby(['CitizenId', 'NumberSplit',
'NumberSession'])['HasFallenInSession']
df['Fall'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Fall": int})
return df
[docs]def accumulate_screenings(df: pd.DataFrame, settings: dict) -> pd.DataFrame:
"""
This method accumulates screenings and annoates when a
citizen complete, reach compliance or fall during a program
:param df: a dataframe containing screenings
:param settings: settings to use
:return: dataframe with accmulated screenings
"""
for group_name, _ in df.groupby(['CitizenId', 'NumberSplit']):
number_session = 0
cumsum_weeks = 0
cumsum_training = 0
citizen_df = df.loc[df['CitizenId'] == group_name[0]]
items_weeks = citizen_df['NumberWeeks'].iteritems()
items_training = citizen_df['NumberTraining'].iteritems()
needs_reason = citizen_df['NeedsReason'].iteritems()
physics_reason = citizen_df['PhysicsReason'].iteritems()
has_fall_risk = citizen_df['HasFallRisk'].iteritems()
mean_evaluation = citizen_df['MeanEvaluation'].iteritems()
for (row_week, row_train, row_needs_reason, row_physics_reason,
row_has_fall_risk, row_mean_evaluation) in zip(
items_weeks, items_training, needs_reason,
physics_reason, has_fall_risk, mean_evaluation):
df.loc[row_week[0], 'NumberSession'] = number_session
cumsum_weeks += row_week[1]
cumsum_training += row_train[1]
if (row_needs_reason[1] == 'Fald/uheld') | (row_physics_reason[1] == 'Fald/uheld') \
| row_has_fall_risk[1] is True:
df.loc[row_week[0], 'HasFallenInSession'] = 1
else:
df.loc[row_week[0], 'HasFallenInSession'] = 0
cond_weeks = cumsum_weeks >= settings['threshold_weeks']
cond_training = cumsum_training >= settings['threshold_training']
if cond_weeks and cond_training:
cumsum_weeks = 0
cumsum_training = 0
number_session += 1
df.loc[row_week[0], 'HasCompletedSession'] = 1
df.loc[row_week[0], 'Baseline'] = 1
if row_mean_evaluation[1] >= 3.7:
df.loc[row_week[0], 'GotComplianceInSession'] = 1
else:
df.loc[row_week[0], 'GotComplianceInSession'] = 0
else:
df.loc[row_week[0], 'HasCompletedSession'] = 0
df.loc[row_week[0], 'Baseline'] = 0
df.loc[row_week[0], 'GotComplianceInSession'] = 0
return df