Models¶

Created on Sat May 1 15:12:38 2019
@author: ‘Semeon Risom’
@email: ‘semeon.risom@gmail.com’
@url: ‘https://semeon.io/d/mdl’
@purpose: ‘Build models for analysis.’

##### Resources: - ANOVA:
- https://m-clark.github.io/docs/mixedModels/anovamixed.html
- http://dwoll.de/rexrepos/posts/anovaMixed.html
- https://rpsychologist.com/r-guide-longitudinal-lme-lmer
- https://stats.stackexchange.com/questions/247582/repeated-measures-anova-in-r-errorsubject-vs-errorsubject-day
- https://cran.r-project.org/web/packages/afex/vignettes/afex_anova_example.html#post-hoc-contrasts-and-plotting
- http://www.let.rug.nl/nerbonne/teach/rema-stats-meth-seminar/presentations/Wieling-MixedModels-2011.pdf
- negative-binomial poisson:
- https://stackoverflow.com/questions/47686227/poisson-regression-in-statsmodels-and-r - https://tsmatz.wordpress.com/2017/08/30/glm-regression-logistic-poisson-gaussian-gamma-tutorial-with-r/ - https://stats.stackexchange.com/questions/311556/help-interpreting-count-data-glmm-using-lme4-glmer-and-glmer-nb-negative-binom - logistic regression: - https://stats.idre.ucla.edu/other/mult-pkg/whatstat/ - https://stats.idre.ucla.edu/r/dae/mixed-effects-logistic-regression/ -
https://tsmatz.wordpress.com/2017/08/30/glm-regression-logistic-poisson-gaussian-gamma-tutorial-with-r/ - https://data.library.virginia.edu/diagnostic-plots/

##### Imports

[ ]:

#----local
from imhr import plot, processing, raw, redcap
import imhr.model as model
import imhr.settings as settings

#----check if required libraries are available
is_library = False
if is_library:
    settings.library()
pass

##### Start

[ ]:

processing = processing(config, filters)
console = settings.console

##### Analysis of Varience: Gaze and Dwell Time

[ ]:

effects = {}

#----load config
# p = path_['output'] + "/analysis/config.json"
# with open(p) as f:
#     config_ = json.loads(f.read())
config_ = config
console('fix config', 'red')

#----exclude
exclude = config_['metadata']['subjects']['exclude']

#----load data
p = path_['output'] + "/analysis/final_data.csv"
df_ = pd.read_csv(p, float_precision='high')

#-----parameters
# dependent variable
y = 'dwell_time'
# main effects
effects['main'] = {
    'cesd_group': 'categorical',
    'aoi': 'categorical',
    'trialType': 'categorical'
}
# random effects
effects['random'] = {
    'participant': 'categorical'
}
# formula
f = "%s ~ cesd_group + aoi + trialType + (1|participant)"%(y)

#----save data for access by R and for calculating dwell time
csv = "dwell_data.csv"
p = path_['output'] + "/analysis/html/model/anova/"

#-----calculate dwell time using multiprocessing
# use __name__ to protect main module
df_dwell, error_dwell = processing.dwell(df=df_, cores=7) if __name__ == '__main__' else None

#----normalize dwell_time for comparison between iaps and pofa
df_dwell['dwell_time'] = df_dwell.apply(lambda x: (x['dwell_time']/4500)
if (x['trialType'] == 'iaps') else (x['dwell_time']/3000), axis=1)

#-----exclude participants, group by subject:trialType:aoi
# exclude participants
df_dwell = df_dwell[~df_dwell['participant'].isin(exclude)]
# groupby
df_dwell = df_dwell.groupby(['participant','cesd_group','trialType','aoi'])['dwell_time'].mean().reset_index()

#-----run
anova_, anova_result, anova_r, html = model.anova(config=config_, df=df_dwell, y=y, f=f, csv=csv, path=p, effects=effects)

#-----delete
del y, f, csv, p

##### Linear Mixed Model Regression: Stimulus and Dotloc Onset Error

[ ]:

effects = {}

# #----load config
# p = path_['output'] + "/analysis/config.json"
# with open(p) as f:
#     config_ = json.loads(f.read())
config_ = config
console('fix config', 'red')

#----load data
p = path_['output'] + "/analysis/error.csv"
#df_error = pd.read_csv(p_error, float_precision='high')
df_ = pd.read_csv(p, float_precision='high')

#----parameters
# dependent variable
y = ['diff_stim','diff_dotloc'] #build models for each IV in list
# fixed effects
effects['fixed'] = {
    'os': 'categorical',
    'trialType': 'categorical',
    'TrialNum': 'factorial'
}
# random effects
effects['random'] = {
    'TrialNum': 'factorial',
    'participant': 'factorial',
}

#----save data for access by R and for calculating dwell time
csv = "onset_data.csv"

#----run model for each IV
for _y in y:
    # path
    p = path_['output'] + "/analysis/html/model/lmer/"
    # formula
    f = "sqrt(%s) ~ os + trialType + TrialNum + (1+TrialNum|participant)"%(_y)
    # run
    lmer_, lmer_result, lmer_r, html = model.lmer(config=config_, df=df_, y=_y, f=f, exclude=exclude, csv=csv, path=p, effects=effects)

#-----delete
del y, _y, f, csv, p

##### Analysis of Varience: Gaze and Dotprobe bias

[ ]:

effects = {}

# #----load config
# p = path_['output'] + "/analysis/config.json"
# with open(p) as f:
#     config_ = json.loads(f.read())
config_ = config
console('fix config', 'red')
#----load data
p = path_['output'] + "/analysis/final_data.csv"
df_ = pd.read_csv(p, float_precision='high')

#-----exclude participants, group by subject
# exclude participants
df_ = df_[~df_['participant'].isin(exclude)]

# groupby
df_ = df_.loc[df_['nested'] == 'trialType']

#----parameters
# dependent variable
y = ['dp_bias','gaze_bias']
# main effects
effects['main'] = {
    'cesd_group': 'categorical',
    'trialType': 'categorical'
}
# random effects
effects['random'] = {
    'participant': 'categorical'
}

#----create function for each IV
for _y in y:
    console('Step: ANOVA (%s)'%(_y), 'red')
    #----save data for access by R
    csv = "%s.csv"%(_y)
    #-----path
    p = path_['output'] + "/analysis/html/model/anova/"
    #-----formula
    f = "%s ~ cesd_group + trialType + (1|participant)"%(_y)
    #-----run
    anova_, anova_result, anova_r, html = model.anova(config=config, df=df_, y=_y, f=f, csv=csv, path=p, effects=effects)