Newer
Older
import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset
def make_fall_label(df: pd.DataFrame, settings: dict):
# Get DigiRehab falls
digi_falls = df[['CitizenId', 'NeedsReason', 'PhysicsReason', 'EndDate']].fillna('Ingen')
digi_falls = digi_falls[digi_falls['NeedsReason'].str.contains("Fald/uheld")
| digi_falls['PhysicsReason'].str.contains("Fald/uheld")]
# Make target by annotating falls in the risk period
fall_period = settings['fall_period_months']
df['Fall'] = df[['CitizenId', 'EndDate']].apply(lambda x: annotate_falls(x, digi_falls, fall_period), axis=1)
return df
def make_complete_label(df: pd.DataFrame, settings: dict) -> pd.DataFrame:
df = accumulate_screenings(df, settings)
# Set first screening as baseline
df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1
# Calculate if citizens complete their sessions
grp = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['HasCompletedSession']
df['Complete'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Complete": int})
# Select only the baseline screenings
df = df.loc[df['Baseline'] == 1]
# Reset index
df = df.reset_index()
def make_compliance_label(df: pd.DataFrame, settings: dict) -> pd.DataFrame:
df = accumulate_screenings(df, settings)
# Set first screening as baseline
df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1
# Calculate if citizens reached compliance in their sessions
grp_compliance = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['GotComplianceInSession']
df['Compliance'] = grp_compliance.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Compliance": int})
# Calculate if citizens completed their sessions
grp_complete = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['HasCompletedSession']
df['Complete'] = grp_complete.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Complete": int})
# Select only the baseline screenings
df = df.loc[df['Baseline'] == 1]
# Select only those that complete
df = df.loc[df['Complete'] == 1]
# Reset index
df = df.reset_index()
return df
df = accumulate_screenings(df, settings)
# Set first screening as baseline
df.loc[df['NumberScreening'] == 0, 'Baseline'] = 1
# Calculate if citizens got an alarm during session
grp = df.groupby(['CitizenId', 'NumberSplit', 'NumberSession'])['GotAlarmInSession']
df['Alarm'] = grp.transform(lambda x: 1 if np.max(x) > 0 else 0)
df = df.astype({"Alarm": int})
# Select only the baseline screenings
df = df.loc[df['Baseline'] == 1]
# Reset index
df = df.reset_index()
return df
def annotate_falls(row, digi_db, fall_period):
citizen_id = row['CitizenId']
current_date = pd.Timestamp(row['EndDate'])
end_date = current_date + DateOffset(months=fall_period)
digi_db['EndDate'] = pd.to_datetime(digi_db['EndDate'])
timespan_digi_falls = digi_db.loc[(digi_db['EndDate'] >= current_date)
& (digi_db['CitizenId'] == citizen_id)
& (digi_db['EndDate'] <= end_date)]
return 1
return 0
def accumulate_screenings(df: pd.DataFrame, settings: dict) -> pd.DataFrame:
for group_name, _ in df.groupby(['CitizenId', 'NumberSplit']):
number_session = 0
cumsum_weeks = 0
cumsum_training = 0
citizen_df = df.loc[df['CitizenId'] == group_name[0]]
items_weeks = citizen_df['NumberWeeks'].iteritems()
items_training = citizen_df['NumberTraining'].iteritems()
items_mean_evaluation = citizen_df['MeanEvaluation'].iteritems()
items_ats = citizen_df['Ats'].iteritems()
for (row_week, row_train, row_mean_evaluation, row_ats) in zip(
items_weeks, items_training, items_mean_evaluation, items_ats):
df.loc[row_week[0], 'NumberSession'] = number_session
cumsum_weeks += row_week[1]
cumsum_training += row_train[1]
if '222718' in row_ats[1].split(','):
df.loc[row_week[0], 'GotAlarmInSession'] = 1
df.loc[row_week[0], 'GotAlarmInSession'] = 0
cond_weeks = cumsum_weeks >= settings['threshold_weeks']
cond_training = cumsum_training >= settings['threshold_training']
if cond_weeks and cond_training:
cumsum_weeks = 0
cumsum_training = 0
number_session += 1
df.loc[row_week[0], 'HasCompletedSession'] = 1
df.loc[row_week[0], 'Baseline'] = 1
if (row_mean_evaluation[1] > 4):
df.loc[row_week[0], 'GotComplianceInSession'] = 1
else:
df.loc[row_week[0], 'GotComplianceInSession'] = 0
else:
df.loc[row_week[0], 'HasCompletedSession'] = 0
df.loc[row_week[0], 'Baseline'] = 0
df.loc[row_week[0], 'GotComplianceInSession'] = 0