{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ ".. _models:\n", " \n", ".. title:: Models\n", " \n", "#### Models" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " Created on Sat May 1 15:12:38 2019 \n", " @author: 'Semeon Risom' \n", " @email: 'semeon.risom@gmail.com' \n", " @url: 'https://semeon.io/d/mdl' \n", " @purpose: 'Build models for analysis.' " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ##### Resources:\n", " - ANOVA: \n", " - https://m-clark.github.io/docs/mixedModels/anovamixed.html \n", " - http://dwoll.de/rexrepos/posts/anovaMixed.html \n", " - https://rpsychologist.com/r-guide-longitudinal-lme-lmer \n", " - https://stats.stackexchange.com/questions/247582/repeated-measures-anova-in-r-errorsubject-vs-errorsubject-day \n", " - https://cran.r-project.org/web/packages/afex/vignettes/afex_anova_example.html#post-hoc-contrasts-and-plotting \n", " - http://www.let.rug.nl/nerbonne/teach/rema-stats-meth-seminar/presentations/Wieling-MixedModels-2011.pdf \n", " - negative-binomial poisson: \n", " - https://stackoverflow.com/questions/47686227/poisson-regression-in-statsmodels-and-r\n", " - https://tsmatz.wordpress.com/2017/08/30/glm-regression-logistic-poisson-gaussian-gamma-tutorial-with-r/\n", " - https://stats.stackexchange.com/questions/311556/help-interpreting-count-data-glmm-using-lme4-glmer-and-glmer-nb-negative-binom\n", " - logistic regression:\n", " - https://stats.idre.ucla.edu/other/mult-pkg/whatstat/\n", " - https://stats.idre.ucla.edu/r/dae/mixed-effects-logistic-regression/\n", " - https://tsmatz.wordpress.com/2017/08/30/glm-regression-logistic-poisson-gaussian-gamma-tutorial-with-r/\n", " - https://data.library.virginia.edu/diagnostic-plots/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ##### Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#----local\n", "from imhr import plot, processing, raw, redcap\n", "import imhr.model as model\n", "import imhr.settings as settings\n", "\n", "#----check if required libraries are available\n", "is_library = False\n", "if is_library:\n", " settings.library()\n", "pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ##### Start" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "processing = processing(config, filters)\n", "console = settings.console" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ##### Analysis of Varience: Gaze and Dwell Time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "effects = {}\n", "\n", "#----load config\n", "# p = path_['output'] + \"/analysis/config.json\"\n", "# with open(p) as f:\n", "# config_ = json.loads(f.read())\n", "config_ = config\n", "console('fix config', 'red') \n", "\n", "#----exclude\n", "exclude = config_['metadata']['subjects']['exclude']\n", "\n", "#----load data\n", "p = path_['output'] + \"/analysis/final_data.csv\"\n", "df_ = pd.read_csv(p, float_precision='high')\n", "\n", "#-----parameters\n", "# dependent variable\n", "y = 'dwell_time'\n", "# main effects\n", "effects['main'] = {\n", " 'cesd_group': 'categorical',\n", " 'aoi': 'categorical',\n", " 'trialType': 'categorical'\n", "}\n", "# random effects\n", "effects['random'] = {\n", " 'participant': 'categorical'\n", "}\n", "# formula\n", "f = \"%s ~ cesd_group + aoi + trialType + (1|participant)\"%(y)\n", "\n", "#----save data for access by R and for calculating dwell time\n", "csv = \"dwell_data.csv\"\n", "p = path_['output'] + \"/analysis/html/model/anova/\"\n", "\n", "#-----calculate dwell time using multiprocessing\n", "# use __name__ to protect main module\n", "df_dwell, error_dwell = processing.dwell(df=df_, cores=7) if __name__ == '__main__' else None\n", "\n", "#----normalize dwell_time for comparison between iaps and pofa\n", "df_dwell['dwell_time'] = df_dwell.apply(lambda x: (x['dwell_time']/4500) \n", "if (x['trialType'] == 'iaps') else (x['dwell_time']/3000), axis=1)\n", "\n", "#-----exclude participants, group by subject:trialType:aoi\n", "# exclude participants \n", "df_dwell = df_dwell[~df_dwell['participant'].isin(exclude)]\n", "# groupby\n", "df_dwell = df_dwell.groupby(['participant','cesd_group','trialType','aoi'])['dwell_time'].mean().reset_index()\n", "\n", "#-----run\n", "anova_, anova_result, anova_r, html = model.anova(config=config_, df=df_dwell, y=y, f=f, csv=csv, path=p, effects=effects)\n", "\n", "#-----delete\n", "del y, f, csv, p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ##### Linear Mixed Model Regression: Stimulus and Dotloc Onset Error" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "effects = {}\n", "\n", "# #----load config\n", "# p = path_['output'] + \"/analysis/config.json\"\n", "# with open(p) as f:\n", "# config_ = json.loads(f.read())\n", "config_ = config\n", "console('fix config', 'red') \n", "\n", "#----load data\n", "p = path_['output'] + \"/analysis/error.csv\"\n", "#df_error = pd.read_csv(p_error, float_precision='high')\n", "df_ = pd.read_csv(p, float_precision='high')\n", "\n", "#----parameters \n", "# dependent variable\n", "y = ['diff_stim','diff_dotloc'] #build models for each IV in list\n", "# fixed effects\n", "effects['fixed'] = {\n", " 'os': 'categorical',\n", " 'trialType': 'categorical',\n", " 'TrialNum': 'factorial'\n", "}\n", "# random effects\n", "effects['random'] = {\n", " 'TrialNum': 'factorial',\n", " 'participant': 'factorial',\n", "}\n", "\n", "#----save data for access by R and for calculating dwell time\n", "csv = \"onset_data.csv\"\n", "\n", "#----run model for each IV\n", "for _y in y:\n", " # path\n", " p = path_['output'] + \"/analysis/html/model/lmer/\"\n", " # formula\n", " f = \"sqrt(%s) ~ os + trialType + TrialNum + (1+TrialNum|participant)\"%(_y)\n", " # run\n", " lmer_, lmer_result, lmer_r, html = model.lmer(config=config_, df=df_, y=_y, f=f, exclude=exclude, csv=csv, path=p, effects=effects)\n", "\n", "#-----delete\n", "del y, _y, f, csv, p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " ##### Analysis of Varience: Gaze and Dotprobe bias" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "effects = {}\n", "\n", "# #----load config\n", "# p = path_['output'] + \"/analysis/config.json\"\n", "# with open(p) as f:\n", "# config_ = json.loads(f.read())\n", "config_ = config\n", "console('fix config', 'red') \n", "#----load data\n", "p = path_['output'] + \"/analysis/final_data.csv\"\n", "df_ = pd.read_csv(p, float_precision='high')\n", "\n", "#-----exclude participants, group by subject\n", "# exclude participants\n", "df_ = df_[~df_['participant'].isin(exclude)]\n", "\n", "# groupby\n", "df_ = df_.loc[df_['nested'] == 'trialType']\n", "\n", "#----parameters\n", "# dependent variable\n", "y = ['dp_bias','gaze_bias']\n", "# main effects\n", "effects['main'] = {\n", " 'cesd_group': 'categorical',\n", " 'trialType': 'categorical'\n", "}\n", "# random effects\n", "effects['random'] = {\n", " 'participant': 'categorical'\n", "}\n", "\n", "#----create function for each IV\n", "for _y in y:\n", " console('Step: ANOVA (%s)'%(_y), 'red')\n", " #----save data for access by R\n", " csv = \"%s.csv\"%(_y)\n", " #-----path\n", " p = path_['output'] + \"/analysis/html/model/anova/\"\n", " #-----formula\n", " f = \"%s ~ cesd_group + trialType + (1|participant)\"%(_y)\n", " #-----run\n", " anova_, anova_result, anova_r, html = model.anova(config=config, df=df_, y=_y, f=f, csv=csv, path=p, effects=effects)" ] } ], "metadata": { "file_extension": ".py", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "mimetype": "text/x-python", "name": "python", "npconvert_exporter": "python", "pygments_lexer": "ipython3", "version": 3 }, "nbformat": 4, "nbformat_minor": 2 }