Demographics¶

Created on Sat May 1 15:12:38 2019
@author: ‘Semeon Risom’
@email: ‘semeon.risom@gmail.com’
@url: ‘https://semeon.io/d/R33-analysis’
@purpose: Process Demographic Data.

##### imports

[ ]:

#----local
from imhr import plot, processing, raw, redcap
import imhr.model as model
import imhr.settings as settings

#----check if required libraries are available
is_library = False
if is_library:
    settings.library()
pass
#----core
from pdb import set_trace as breakpoint
import pandas as pd
import gc, glob, string, pytz
from datetime import datetime

##### initalize

[ ]:

processing = processing(config, filters)
console = settings.console

get metadata¶

[ ]:

print(console['red'] + 'processing metadata' + console['ENDC'])
#file path
fpath = config['output'] + "/raw/" + config['task']
#save path
spath = config['output'] + "/analysis/subject_metadata.csv"
subject_metadata = processing.subject_metadata(fpath=fpath, spath=spath)

prepare data¶

[ ]:

#exclude participants
exclude = [999999, 111111]
print(console['red'] + 'preparing data: %s'%(exclude) + console['ENDC'])

#read demographics and rename id="participant"
p_demo = config['output'] + "/analysis/demographics.csv"
df_demographics = pd.read_csv(p_demo, float_precision='high')
##exclude participants
df_demographics = df_demographics[~df_demographics['participant'].isin(exclude)]
#create gender column
df_demographics['gender'] = df_demographics.apply(lambda x: 'female' if (x['female'] == 1 and x['male'] == 0) else
                                                  ('male' if (x['male'] == 1 and x['female'] == 0) else 'other'), axis=1)
#replace eye color
color=['Light Gray','Gray','Light Blue','Blue','Violet','Blue-Green','Green','Amber','Hazel',
'Light Brown','Dark Brown','Black', 'Other']
df_demographics['eye_color'] = df_demographics['eye_color'].replace([1,2,3,4,5,6,7,8,9,10,11,12,13], color)

##-------read cesd and rename id="participant"
p_cesd = config['output'] + "/analysis/cesd_rrs.csv"
df_cesd = pd.read_csv(p_cesd, float_precision='high')
df_cesd = df_cesd.rename(columns={'record_id':'participant'})
##group cesd scores #bionomial
df_cesd['cesd_group'] = df_cesd.apply(lambda x: 1 if (x['cesd_score'] > 15) else 0, axis=1)
df_cesd['cesd_group_'] = df_cesd.apply(lambda x: 'High' if (x['cesd_score'] > 15) else 'Low', axis=1)
##exclude participants
df_cesd = df_cesd[~df_cesd['participant'].isin(exclude)]

##-------read mmpi
p_mmpi = config['output'] + "/analysis/mmpi.csv"
df_mmpi = pd.read_csv(p_mmpi, float_precision='high')
df_mmpi = df_mmpi.rename(columns={'record_id':'participant'})
##exclude participants
df_mmpi = df_mmpi[~df_mmpi['participant'].isin(exclude)]

##-------read subject metadata
p_subject = config['output'] + "/analysis/subject_metadata.csv"
df_metadata = pd.read_csv(p_subject, float_precision='high')
##drop duplicate participant listings
df_metadata = df_metadata.drop_duplicates(subset="participant", keep="first").reset_index(drop=True)
#start and end dates
date_start.append({'metadata':'%s'%(df_metadata['date'].min())})
date_end.append({'metadata':'%s'%(df_metadata['date'].max())})
##exclude participants
df_metadata = df_metadata[~df_metadata['participant'].isin(exclude)]
#r#ename variables
df_metadata = df_metadata.rename(columns={"isWindowSuccess": "is_calibrated"})

##-------read bias summary and rename id="participant"
#if eyetracking
if config['type'] == 'eyetracking': p_bias = config['output'] + "/bias/eyetracking_bias.csv"
#if behavioral
else: p_bias = config['output'] + "/bias/behavioral_bias.csv"

#load
df_bias = pd.read_csv(p_bias, float_precision='high')
df_bias = df_bias.rename(columns={'id':'participant'})
###drop unusual data
df_bias = df_bias.drop(df_bias[(df_bias['trialType'].isnull())].index)
##set dp_bias and gaze_bias as float
df_bias['dp_bias'] = df_bias['dp_bias'].astype(float)
if config['type'] == 'eyetracking': df_bias['gaze_bias'] = df_bias['gaze_bias'].astype(float)

#set trialtype as text
df_bias['trialType_'] = df_bias['trialType']
df_bias['trialType'] = df_bias.apply(lambda x: 1 if (x['trialType'] == 'pofa') else 0, axis=1)
##exclude participants
df_bias = df_bias[~df_bias['participant'].isin(exclude)]

##-------getting demographic data
df_s = df_metadata.merge(df_cesd,on='participant').merge(df_demographics,on='participant')

##-------merge
df = df_bias.merge(df_cesd,on='participant').merge(df_metadata,on='participant').merge(df_demographics,on='participant')
#exclude participants
df = df[~df['participant'].isin(exclude)]
#rename columns
##rename microsoft os to msos, mac os to macos
df['os'].replace(['Microsoft Windows', 'macOS','Chrome OS'], ['msos', 'macos', 'cos'], inplace=True)

##-------calculate difference between real stimulus, dotloc onset and real value #then merge medians with df
merge = ['race','gender','is_normalvision','os','participant']
df_error, onset_error, drop = processing.onset_diff(df0=df, merge=merge, cores=7)
##combine exclude lists
exclude = drop + exclude

##-------final version of df
#merge
df = pd.merge(df, df_error[['TrialNum_','m_rt','accuracy','m_diff_dotloc','m_diff_stim','participant']]              .drop_duplicates(subset="participant", keep="first"), how='left', on='participant')

##export for seperate analysis in r
csv_path = config['output'] + "/analysis/final_data.csv"
print(console['red'] + 'Step: export for R analysis: %s'%(csv_path) + console['ENDC'])
df.to_csv(csv_path, index=None)

##--------number of subjects
##demographics
subjects_demographics = df_demographics.shape[0]
###task
subjects_task = df_metadata.shape[0]
###eyetracking
subjects_eyetracking = df_metadata.loc[df_metadata['is_eyetracking'] == True].shape[0]
l_eyetracking = df_metadata.loc[df_metadata['is_eyetracking'] == True]['participant'].astype('int').to_list()
###eyetracking
subjects_calibrated = df_metadata.loc[df_metadata['is_calibrated'] == True].shape[0]
l_calibrated = df_metadata.loc[df_metadata['is_calibrated'] == True]['participant'].astype('int').to_list()
###behavioral
subjects_behavioral = df_metadata.loc[df_metadata['is_eyetracking'] == False].shape[0]
l_behavioral = df_metadata.loc[df_metadata['is_eyetracking'] == False]['participant'].astype('int').to_list()
##cesd
subjects_cesd = df_cesd.shape[0]
##cesd
subjects_mmpi = df_mmpi.shape[0]

###get actual participants used in analysis
subjects_eyetracking_used = len(glob.glob(config['output'] + "/tlbs/eyetracking/*.csv"))
subjects_behavioral_used = len(glob.glob(config['output'] + "/tlbs/behavioral/*.csv"))

##get subjects used
if config['type'] == 'eyetracking':
    subjects_used = subjects_eyetracking_used
else:
    subjects_used = subjects_behavioral_used

##--------date
date_start = dict((key,d[key]) for d in date_start for key in d)
date_end = dict((key,d[key]) for d in date_end for key in d)

demographic statistics¶

[ ]:

#-----------------------------get max, min values
#drop non-eyetracking participants
df_d = df_s[df_s['participant'].isin(l_eyetracking)]
#cesd high
df_dh = df_d.loc[df_d['cesd_score'] > 15].drop_duplicates(subset="participant", keep="first")
#cesd low
df_dl = df_d.loc[df_d['cesd_score'] <= 15].drop_duplicates(subset="participant", keep="first")

#get total used
total = len(l_eyetracking)

#-----------------------------descriptive demographic stats
print(console['red'] + 'Step: descriptive demographic' + console['ENDC'])
rows = []
##--------age
rows.append(["Age","mean (SD)",
             '%s (%s)'%(str(round(df_dl['age'].mean(),1)),str(round(df_dl['age'].std(),1))),
             '%s (%s)'%(str(round(df_dh['age'].mean(),1)),str(round(df_dh['age'].std(),1)))])

##--------race ##.sort_index(axis=0)
eyecolor_ = df_d.drop_duplicates(subset="participant", keep="first").loc[:,'eye_color'].value_counts()
for index, value in eyecolor_.items():
    if value != 0:
        above_pct = '%.1f'%(round(value/total, 4)*100)
        rows.append(["Eye Color","%s"%(index), '%s (%s)'%(value,above_pct),''])
del eyecolor_

##--------vision
#normal
df_sum = df_d.loc[df_d['is_normalvision'] == True].drop_duplicates(subset="participant",keep="first").reset_index(drop=True)
count = df_sum.shape[0]
above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
rows.append(["Vision", "Normal", '%s (%s)'%(count,above_pct),'a'])

#corrective
df_sum = df_d.loc[df_d['is_corrective'] == True].drop_duplicates(subset="participant", keep="first").reset_index(drop=True)
count = df_sum.shape[0]
above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
rows.append(["Vision", "Corrective", '%s (%s)'%(count,above_pct),'a'])

##--------handedness-right
df_sum = df_d.loc[df_d['handedness'] == 'Right'].drop_duplicates(subset="participant", keep="first").reset_index(drop=True)
count = df_sum.shape[0]
above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
rows.append(["Handedness (Right)","Right", '%s (%s)'%(count,above_pct),'a'])

##--------gender
##female
df_sum = df_d.loc[df_d['female'] == 1].drop_duplicates(subset="participant", keep="first").reset_index(drop=True)
count = df_sum.shape[0]
above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
rows.append(["Gender","Female", '%s (%s)'%(count,above_pct),'a'])

##male
df_sum = df_d.loc[df_d['male'] == 1].drop_duplicates(subset="participant",keep="first").reset_index(drop=True)
count = df_sum.shape[0]
above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
rows.append(["Gender","Male", '%s (%s)'%(count,above_pct),'a'])

##--------
df_sum = df_d.loc[df_d['hispanic'] == True].drop_duplicates(subset="participant", keep="first").reset_index(drop=True)
df_d.groupby(['hispanic']).agg(['mean', 'count'])
count = df_sum.shape[0]
above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
#rows.append(["Hispanic or Latino","(%)", '%s (%s)'%(count,above_pct),'a'])






#hispanic/latino-----------------------------------------------------------------------------------------------------
df_sum = df_d[['hispanic','cesd_group_']].loc[df_d['hispanic'] == True].groupby(['cesd_group_']).agg(['count'])
#reset multiindex
df_row = df_sum.reset_index()
#collapse row indexes to one
df_row.columns = df_row.columns.get_level_values(0)
#get value
lh = []
for value in ['Low','High']:
    lh.append(df_row.loc[df_row['cesd_group_'] == value]['hispanic'].values[0])
#percentage
pct_low = '%.1f'%(round(lh[0]/total, 4)*100)
pct_high = '%.1f'%(round(lh[1]/total, 4)*100)
#rows
rows.append(["Hispanic or Latino","(%)",'%s (%s)'%(lh[0],pct_low),'%s (%s)'%(lh[1],pct_high)])
#race----------------------------------------------------------------------------------------------------------------
df_sum = df_d.groupby(['race','cesd_group_'])['race'].agg(['count'])
#reset multiindex
#df_sum = df_sum.reset_index()
#collapse row indexes to one
#df_sum.columns = df_sum.columns.get_level_values(0)
#transpose
# df_sum.pivot(columns='cesd_group_', values='count')
# #for each race
# for race in ['Low','High']:
#     #get value
#     lh = []
#     for value in ['Low','High']:
#         lh.append(df_row.loc[df_row['cesd_group_'] == value]['hispanic'].values[0])
#     #percentage
#     pct_low = '%.1f'%(round(lh[0]/total, 4)*100)
#     pct_high = '%.1f'%(round(lh[0]/total, 4)*100)
#     #rows
#     rows.append(["Race","%s"%(race), '%s (%s)'%(lh[0],pct_low),'%s (%s)'%(lh[0],pct_high)])

##--------race ##.sort_index(axis=0)
race = df_d.drop_duplicates(subset="participant", keep="first").loc[:,'race'].value_counts()
for index, value in race.items():
    if value != 0:
        above_pct = '%.1f'%(round(value/total, 4)*100)
        rows.append(["Race","%s"%(index), '%s (%s)'%(value,above_pct),'a'])
del race

##--------rrs
rows.append(["Ruminative Response Scale","(SD)",
             '%s (%s)'%(str(round(df_dl['rrs_brooding'].mean(),1)), str(round(df_dl['rrs_brooding'].std(),1))),
             '%s (%s)'%(str(round(df_dh['rrs_brooding'].mean(),1)), str(round(df_dh['rrs_brooding'].std(),1)))])

##--------CESD
rows.append(["Center for Epidemiologic Studies Depression Scale","(SD)",
             '%s (%s)'%(str(round(df_dl['cesd_score'].mean(),1)), str(round(df_dl['cesd_score'].std(),1))),
             '%s (%s)'%(str(round(df_dh['cesd_score'].mean(),1)), str(round(df_dh['cesd_score'].std(),1)))])
##CESD > 15
# df_sum = df_d.loc[df_d['cesd_score'] > 15].drop_duplicates(subset="participant", keep="first").reset_index(drop=True)
# count = df_sum.shape[0]
# above_pct = '%.1f'%(round(df_sum.shape[0]/total, 4)*100)
# rows.append(['Center for Epidemiologic Studies Depression Scale', "CES-D > 15 (%)", '%s (%s)'%(count,above_pct)])

#----- to df
descriptive = pd.DataFrame(rows)
descriptive = descriptive.rename(columns={0:'ID',1:'Group',2:'CESD<=15',3:'CESD>15'})
del descriptive.index.name

##create html
html_name = 'demographic'
html_path = config['output'] + "/analysis/html/%s.html"%(html_name)
title = '<b>Table 1.</b> Participant characteristics (N = %s).'%(total)
footnote = "<div id='note'>N = Sample size of eyetracking participants. Total participants = %s."%(subjects_task)
html = plot.html(config=config, df=descriptive, path=html_path, name=html_name, source="demographic", title=title, footnote=footnote)

list of variables¶

[ ]:

print(console['red'] + 'Step: list of variables' + console['ENDC'])
df_variables = processing.variables(df=df)

##create html
html_name = 'definitions'
html_path = config['output'] + "/analysis/html/%s.html"%(html_name)
title = '<b>Table 1.</b> Task Variables and Definitions.'
html = plot.html(config=config, df=df_variables, path=html_path, name=html_name, source="definitions", title=title)

descriptive device¶

[ ]:

print(console['red'] + 'Step: descriptive device' + console['ENDC'])
rows = []
##--------os browser gpu type Webcam resolution Webcam message
os_ = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'os'].value_counts()
for index, value in os_.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Operating System","%s"%(index), '%s (%s)'%(value,above_pct)])
del os_

# ##--------os_version
os_ = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'os_version'].value_counts()
for index, value in os_.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Operating System version","%s"%(index), '%s (%s)'%(value,above_pct)])
del os_

##--------browser
browser = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'browser'].value_counts()
for index, value in browser.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Browser","%s"%(index), '%s (%s)'%(value,above_pct)])
del browser

##--------browser_version
browser = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'browser_version'].value_counts()
for index, value in browser.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Browser version","%s"%(index), '%s (%s)'%(value,above_pct)])
del browser

##--------gpu type
gpu_type = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'gpu_type'].value_counts()
for index, value in gpu_type.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["GPU type","%s"%(index), '%s (%s)'%(value,above_pct)])
del gpu_type

##--------webcam brand
gpu = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'gpu'].value_counts()
for index, value in gpu.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["GPU model","%s"%(index), '%s (%s)'%(value,above_pct)])
del gpu

##--------devicepixelratio
display = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'devicePixelRatio'].value_counts().sort_index(axis=0)
for index, value in display.items():
    index = '%.2f'%(round(index, 2))
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["devicePixelRatio","%s"%(index), '%s (%s)'%(value,above_pct)])
del display

##--------display resolution
display = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'monitorSize'].value_counts()
for index, value in display.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Display resolution","%s"%(index), '%s (%s)'%(value,above_pct)])
del display

##--------webcam message
webcam_m = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'WebcamMessage'].value_counts()
for index, value in webcam_m.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Webcam message","%s"%(index), '%s (%s)'%(value,above_pct)])

##--------webcam brand
webcamb = df_s.drop_duplicates(subset="participant", keep="first").loc[:,'webcam_brand'].value_counts()
for index, value in webcamb.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Webcam brand","%s"%(index), '%s (%s)'%(value,above_pct)])
del webcamb

##--------Webcam resolution
webcamr = df_s[~df_s['webcamSize'].isin(['.x.'])].drop_duplicates(subset="participant",
               keep="first").loc[:,'webcamSize'].value_counts()
for index, value in webcamr.items():
    above_pct = '%.1f'%(round(value/subjects_task, 4)*100)
    rows.append(["Webcam resolution","%s"%(index), '%s (%s)'%(value,above_pct)])
del webcamr

#-------to df
descriptive = pd.DataFrame(rows)
descriptive = descriptive.rename(columns={0:'ID',1:'Group',2:'Statistic'})
del descriptive.index.name

#footnote
footnote = [
'<div class="description">\n',
    'During data collection, participants screen resolution were multiplied by the pixel density ratio, or\
    <a class="ref" href="https://developer.mozilla.org/en-US/docs/Web/API/Window/devicePixelRatio"><i>devicePixelRatio</i></a>\
    (i.e. width = screen.width / devicePixelRatio = 1920 * 1.5). This was done with the intent of storing true device \
    physical resolution. However to simplify analysis using webgazer, which uses the same initial value \
    to calculate gaze location, participants screen resolution is reverted back to its original value.\n',
'</div>\n']
footnote = ''.join(footnote)

#create html
html_name = 'device'
html_path = config['output'] + "/analysis/html/%s.html"%(html_name)
title = '<b>Table 1.</b> Device characteristics (N = %s).'%(subjects_task)
html = plot.html(config=config, df=descriptive, path=html_path, name=html_name, source="device", title=title, footnote=footnote)

descriptive task¶

[ ]:

print(console['red'] + 'Step: descriptive task' + console['ENDC'])
rows = []
##--------step
##----demographic questionnaire
rows.append(["Pre-Questionnaire", "Demographics", '%s (100.0)'%(subjects_demographics)])

##----cesd
pre_ = '%s (%.1f)'%(subjects_cesd, (round(subjects_cesd/subjects_demographics, 4)*100))
rows.append(["Pre-Questionnaire", "CES-D, RRS", '%s'%(pre_)])

##----task
task_ = '%s (%.1f)'%(subjects_task, (round(subjects_task/subjects_demographics, 4)*100))
rows.append(["Task", "Task", '%s'%(task_)])
###eyetracking
eye_ = '%s (%.1f)'%(subjects_eyetracking, (round(subjects_eyetracking/subjects_task, 4)*100))
rows.append(["Task", "Eyetracking", '%s'%(eye_)])
###eyetracking-used
eyeused_ = '%s (%.1f)'%(subjects_eyetracking_used, (round(subjects_eyetracking_used/subjects_eyetracking, 4)*100))
rows.append(["Task", "Used", '%s'%(eyeused_)])
###calibrated
calibrated_ = '%s (%.1f)'%(subjects_calibrated, (round(subjects_calibrated/subjects_eyetracking, 4)*100))
rows.append(["Task", "Calibrated", '%s'%(calibrated_)])
###behavioral
behav_ = '%s (%.1f)'%(subjects_behavioral, (round(subjects_behavioral/subjects_task, 4)*100))
rows.append(["Task", "Behavioral", '%s'%(behav_)])
###behavioral-used
behavused_ = '%s (%.1f)'%(subjects_behavioral_used, (round(subjects_behavioral_used/subjects_behavioral, 4)*100))
rows.append(["Task", "Used", '%s'%(behavused_)])

##----post assessment
post_ = '%s (%.1f)'%(subjects_mmpi,(round(subjects_mmpi/subjects_demographics, 4)*100))
rows.append(["Post-Questionnaire", "MMPI", '%s'%(post_)])

#----to df
descriptive = pd.DataFrame(rows)
descriptive = descriptive.rename(columns={0:'ID',1:'Group',2:'Statistic'})
del descriptive.index.name

#----create html
title = '<b>Table 1.</b> Schedule of Assessments.'
##footnote
no_webcam = '%s, %s%%'%(webcam_m['NotFoundError'], '%1.f'%(round(webcam_m['NotFoundError']/subjects_task, 5)*100))
blocked_webcam = '%s, %s%%'%(webcam_m['NotAllowedError'], '%1.f'%(round(webcam_m['NotAllowedError']/subjects_task, 5)*100))
footnote = [
    '<div class="description">',
    'Data were collected from %s to %s. '%(date_start['metadata'],date_end['metadata']),
    'Participants unable to meet the eyetracking device requirements (e.g. Chrome and Firefox, webcam, laptop or desktop) ',
    'were placed in the behavioral version of dotprobe. Reasons include: participant dropout, ',
    'no webcam present on the device (n=%s), '%(no_webcam),
    'and blocked access of the webcam by the participants browser (n=%s)'%(blocked_webcam),
    '<a class="note" name="1"><sup>1</sup></a>.',
    '<br><br>',
    'Once completing the <i>Pre-Questionnaire</i> on REDCap, participants are redirected to the task. ',
    'Possible reasons for the drop off between <i>Pre-Questionnaire</i> (n=%s) \
    and <i>Task</i> (n=%s) samples can be due to: '%(subjects_cesd, subjects_task),
    'Technical error during redirect, and disinterest in continuing to participate in the experiment. ',
    '<br><br>',
    'Also of note is the amount of participants that were successfully calibrated (n=%s, %1.f%%).'%(subjects_calibrated,\
                    (round(subjects_calibrated/subjects_eyetracking, 4)*100)),
    '</div>'
]
footnote = ''.join(footnote)

#create html
html_name = 'task'
html_path = config['output'] + "/analysis/html/%s.html"%(html_name)
html = plot.html(config=config, df=descriptive, path=html_path, source="task", name=html_name, title=title, footnote=footnote)

summary data¶

[ ]:

print(console['red'] + 'Step: summary data' + console['ENDC'])
rows = []
#-----------------------------testing group by cesd group (high, low) and trial type mean
df_mean_std = df[['dp_bias','n_dp_valid','pct_dp_toward','mean_dp_toward','mean_dp_away','var_dp_bias','gaze_bias',
               'init_gaze_bias','final_gaze_bias','n_gaze_valid','n_gaze_toward','pct_gaze_center','mean_gaze_toward',
               'mean_gaze_away','var_gaze_bias','dp_gaze_cor','trialType_',
               'luminance','m_diff_stim','m_diff_dotloc']]

#------------------------get list of columns
l_var = list(df_mean_std)
l_var_gaze = ['gaze_bias','init_gaze_bias','final_gaze_bias','n_gaze_valid','n_gaze_toward','pct_gaze_center',
              'mean_gaze_toward','mean_gaze_away','var_gaze_bias']
l_var_dp = ['dp_bias','n_dp_valid','pct_dp_toward','mean_dp_toward','mean_dp_away','var_dp_bias']

##--------crate rows
df_mean_std = df_mean_std.groupby(['trialType_']).agg(['mean','std']).T.unstack(level=1)
#collapse headers
df_mean_std.columns = [' '.join(col).strip() for col in df_mean_std.columns.values]
#combine columns
df_mean_std['iaps'] = df_mean_std['iaps mean'].round(4).astype(str) + " (" + df_mean_std['iaps std'].round(4).astype(str) + ")"
df_mean_std['pofa'] = df_mean_std['pofa mean'].round(4).astype(str) + " (" + df_mean_std['pofa std'].round(4).astype(str) + ")"
#reindex and make new column for factor
df_mean_std['variable'] = df_mean_std.index
df_mean_std = df_mean_std.rename({'index': 'variable'}).reset_index(level=0,  drop=True)
#create group column
df_mean_std = df_mean_std.rename({'dp_gaze_corr': 'dpg_core'})
df_mean_std['group'] = pd.np.where(df_mean_std['variable'].str.contains("gaze_"), "gaze",
                       pd.np.where(df_mean_std['variable'].str.contains("dp_"), "dotprobe", "task"))

df_mean_std = df_mean_std[['group','variable','iaps','pofa']]
del df_mean_std.index.name

#footnote
footnote = [
'<div class="description">',
'</div>\n'
]
footnote = ''.join(footnote)

#create html
html_name = 'summary'
html_path = config['output'] + "/analysis/html/%s.html"%(html_name)
title = '<b>Table 1.</b> Summary Statistics (N = %s).'%(subjects_used)
html = plot.html(config=config, df=df_mean_std, path=html_path, name=html_name, source="summary", title=title, footnote=footnote)