Source code for imhr.Webgazer.metadata

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
| @purpose: Process participants metadata for analysis and export.  
| @date: Created on Sat May 1 15:12:38 2019  
| @author: Semeon Risom  
| @email: semeon.risom@gmail.com  
| @url: https://semeon.io/d/R33-analysis  
"""

# available classes and functions
__all__ = ['Metadata']

# required external libraries
__required__ = ['pandas','numpy','json']

# core
from pdb import set_trace as breakpoint
import os
import pandas as pd
import numpy as np
import json

# local libraries
from . import settings

[docs]class Metadata():
	"""Process participants metadata for analysis and export."""
	def __init__(self, isLibrary=False):
		"""Process participants metadata for analysis and export.

        Parameters
        ----------
        isLibrary : :obj:`bool`
            Check if required libraries are available. Default `False`.
        """
        #check libraries
		if isLibrary:
			settings.library(__required__)

[docs]	@classmethod
	def summary(cls, df, path):
		"""
		Preparing data for use in analysis.

		Parameters
		----------
		df : :obj:`str`
			Pandas dataframe of raw data.
		path : :obj:`str`
			The directory path of the subject data

		Attributes
		----------
		path : :obj:`str`
			Specific directory path used.
		attr2 : :obj:`str`, optional
			Description of `attr2`.

		Returns
		-------
		df : :class:`numpy.ndarray`
			Pandas dataframe of processed metadata.

		Notes
		-----    
		You can either get data from all files within a directory (directory), or from a specific
		subject (subject_session).

		Examples
		--------
		>>> #if using path:
		>>> df = getData(path=self.config['path'])

		>>> #if getting data for single subject:
		>>> df = getData(path=self.config['path'],subject_session=['1099','1', '0'])

		"""

		#drop subject 111111, 999999, nan
		df = df.drop(df[(df['participant']==111111)|(df['participant']==999999)].index)
		df['participant'] = df['participant'].apply(pd.to_numeric)

		"""processing data"""
		#rename browser, os, date
		df.rename(columns={'browser':'browser_old','os':'os_old','date':'date_old'}, inplace=True)

		"""gpu_type"""
		df['gpu_type'] = 'integrated'
		df.loc[df['gpu'].str.contains('AMD', na=False),'gpu_type'] = 'dedicated'
		df.loc[df['gpu'].str.contains('Nvidia', na=False),'gpu_type'] = 'dedicated'
		df.loc[df['gpu'].str.contains('NVIDIA', na=False),'gpu_type'] = 'dedicated'
	
		"""webcam brand"""
		df['webcam_brand']  = df['WebcamDevice'].str.split().str.get(0)

		"""webcam width"""
		#replace "-1" with ".x."
		df['webcamSize.px'] = df['webcamSize.px'].apply(lambda x: '.x.' if (x == -1) else x)
		#replace ""0x0"" with ".x."
		df['webcamSize.px'] = df['webcamSize.px'].apply(lambda x: '.x.' if (x == "0x0") else x)
		df[['webcamWidth','webcamHeight']] = df['webcamSize.px'].apply(lambda x: pd.Series([i for i in x.split('x')]))

		"""window width"""
		df[['windowWidth','windowHeight']] = df['windowSize.px'].apply(lambda x: pd.Series([i for i in x.split('x')]))

		"""monitor width"""
		df['monitorSize old'] = df['monitorSize.px']
		#adjust size back to value before manually multiplying by devicePixelratio
		df[['monitorWidth','monitorHeight']] = df['monitorSize.px'].apply(lambda x: pd.Series([i for i in x.split('x')]))
		df['monitorWidth'] = pd.to_numeric(df['monitorWidth']) / df['devicePixelRatio']           
		df['monitorHeight'] = pd.to_numeric(df['monitorHeight']) / df['devicePixelRatio']

		"""browser"""
		df['browser'] = 'None'
		###new column for version without number
		df.loc[df['browser_old'].str.contains('Chrome', na=False),'browser'] = 'Chrome'
		df.loc[df['browser_old'].str.contains('Safari', na=False),'browser'] = 'Safari'
		df.loc[df['browser_old'].str.contains('Edge', na=False),'browser'] = 'Edge'
		df.loc[df['browser_old'].str.contains('Firefox', na=False),'browser'] = 'Firefox'
		df.loc[df['browser_old'].str.contains('IE', na=False),'browser'] = 'IE'

		df['browser_version'] = 'None'
		###new column for version without number
		df['browser_version'] = df['browser_old'].map(lambda x: x.lstrip('Chrome').rstrip('aAbBcC'))
		df['browser_version'] = df['browser_old'].map(lambda x: x.lstrip('Safari').rstrip('aAbBcC'))
		df['browser_version'] = df['browser_old'].map(lambda x: x.lstrip('Edge').rstrip('aAbBcC'))
		df['browser_version'] = df['browser_old'].map(lambda x: x.lstrip('Firefox').rstrip('aAbBcC'))
		df['browser_version'] = df['browser_old'].map(lambda x: x.lstrip('IE').rstrip('aAbBcC'))

		"""os"""
		df['os'] = 'None'
		###new column for version without number
		df.loc[df['os_old'].str.contains('Windows', na=False),'os'] = 'Microsoft Windows'
		df.loc[df['os_old'].str.contains('Mac', na=False),'os'] = 'macOS'
		df.loc[df['os_old'].str.contains('Chrome', na=False),'os'] = 'Chrome OS'
	
		"""os version"""
		df['os_version'] = 'None'
		###new column for version without name
		df['os_version'] = df['os_old'].map(lambda x: x.lstrip('Windows').rstrip('aAbBcC'))
		df['os_version'] = df['os_version'].map(lambda x: x.lstrip('Mac OS X').rstrip('aAbBcC'))  
		df['os_version'] = df['os_version'].map(lambda x: x.lstrip('Chrome OS').rstrip('aAbBcC'))

		"""date"""
		##process date
		df['date'] = [x.strip().replace('_', '-') for x in df['date_old']] ##remove underscore
		df['date'] = pd.to_datetime(df['date']) #convert to pandas datetime format
		df['date'] = df['date'].dt.date #remove time
		df['date'] = df['date'].astype(str)

		#rename
		df = df.rename(columns={'windowSize.px':'windowSize','monitorSize.px':'monitorSize',\
										'webcamSize.px':'webcamSize','lum':'luminance'})

		#convert to cm
		df['diagonalSize.cm'] = df['diagonalSize.in'].map(lambda x: round(x * 2.54, 3))

		"""
		clean unusual resolutions for monitor 
		"""  
		#rev = df['monitorSize'].apply(lambda x: pd.Series([i for i in x.split('x')]))
		#rev.rename(columns={0:'monitorWidth',1:'monitorHeight'},inplace=True)

		#convert to integer
		df['monitorWidth'] = df['monitorWidth'].apply(pd.to_numeric)
		df['monitorHeight'] = df['monitorHeight'].apply(pd.to_numeric)

		#recombine
		df["monitorSize"] = df['monitorWidth'].map(str).str.split('.').str[0] + 'x' + \
							df['monitorHeight'].map(str).str.split('.').str[0]
		"""
		clean unusual dpi   
		"""    
		df['devicePixelRatio'] = df['devicePixelRatio'].apply(pd.to_numeric)

		'''
		modify gpu columns
		'''
		df['gpu'] = df['gpu'].fillna(np.NaN)
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('ANGLE ' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('(A' , 'A'))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('(I' , 'I'))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('(N' , 'N'))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('vs_5_' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('ps_5_' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('vs_3_' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('ps_3_' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace(' 0 0)' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('(R)' , ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('(TM)', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('OpenGL', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('Engine', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('Direct3D11', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('Direct3D9Ex', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('Family', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace('Mesa DRI ', ''))
		df['gpu'] = df['gpu'].map(lambda x: str(x).replace(' (Skylake GT2)', ''))

		"""
		clean white space in column
		"""    
		#remove trailing whitespace in gpu  
		df['gpu'] = df['gpu'].map(lambda x: x.strip())

		'''
		split WebcamDevice to retrieve vendor id and fix formatting
		'''
		##format to np.Nan if x=-1
		df['webcamSize'] = df['webcamSize'].map(lambda x: 
			np.NaN if x=='0x0' else np.where(x==-1, np.NaN, x))

		##format to np.Nan if x=-1
		df['WebcamDevice'] = df['WebcamDevice'].map(lambda x: 
			np.NaN if x==-1 else str(x).replace('(Built-in) ', ''))
		#df['WebcamDeviceVendor'] = df['WebcamDeviceVendor'].map(lambda x: x[x.find("(")+1:x.find(")")])
		df[['WebcamDeviceProductID']] = df['WebcamDevice'].str.split('\(|\)', expand=True).iloc[:,[1]]

		#rename variables
		df = df.rename(columns={'trialNumTask':'TrialNum','Key_Resp.resp':'RT','isWebcamUsed':'is_eyetracking'})
		#drop columns
		cols = ['sampleNum', 'x', 'y', 'duration.t', 'Stim_onset.t', 'DotLoc_onset.t', 'blockNum',\
				'trialNum', 'TrialNum', 'trialID', 'Key_Resp.rt', 'Key_Resp.cresp', 'Key_Resp.acc',\
				'DotLoc', 'LEmotion', 'LStim', 'LDescription', 'REmotion', 'RStim', 'RDescription', 
				'trialType','isCongruent', 'event', 'trial_type', 'internal_node_id', 'RT']
		#drop columns
		# cols = ['sampleNum', 'timestamp', 'x', 'y', 'duration.t', 'Stim_onset.t', 'DotLoc_onset.t', 'blockNum',\
		#          'trialNum', 'TrialNum', 'trialID', 'Key_Resp.rt', 'Key_Resp.resp', 'Key_Resp.cresp', 'Key_Resp.acc',\
		#          'DotLoc', 'LEmotion', 'LStim', 'LDescription', 'REmotion', 'RStim', 'RDescription', 'trialType',\
		#          'isCongruent', 'event', 'trial_type', 'internal_node_id', 'type', 'RT', 'marker', 'bad', 'sg_x',\
		#          'sg_y', 'sg_class', 'left_bound', 'right_bound', 'sg_fix_all', 'sg_fix_index', 'sg_roi_bounds',\
		#          'sg_fix_roi']
		df.drop(cols, inplace=True, axis=1)

		#------------------------------------------------save
		print("demographics saved: %s"%(path))
		df.to_csv(path, index=False)

		return df

[docs]	@classmethod
	def predict(cls, df):
		"""
		Predicting screen size (cm), device (i.e. macbook 2018).

		Parameters
		----------
		df : :class:`numpy.ndarray`
			Pandas dataframe of raw data.

		Returns
		-------
		df : :class:`numpy.ndarray`
			Pandas dataframe of raw data.
		"""
		#clean up sub-version of data
		df['os_version'] = np.where(df['os'] == 'OSX',\
											df['os_version'].map(lambda x: x.replace(r'[^.]+', '')[:-2]),\
											df['os_version']) #else
		"""
		import screensize sample list
		"""
		#import reference screen size
		screensize_path = os.path.abspath(__file__+ '../../../info')
		df_screensize = pd.read_excel(screensize_path+'/screensize.xlsx')
		df_screensize = df_screensize.rename(columns={'resolution (px)': 'monitorSize'}) #rename for merge
		df_screensize = df_screensize.rename(columns={'gpu': 'gpu list'}) #rename for merge

		#remove excel non-breaking space \xa0
		df_screensize['gpu list'] = df_screensize['gpu list'].replace({'\\xa0': ' '}, regex=True) 
		df_screensize['device'] = df_screensize['device'].replace({'\\xa0': ' '}, regex=True) 
		df_screensize['model id'] = df_screensize['model id'].replace({'\\xa0': ' '}, regex=True) 

		#clear leading and trailing white space in string
		df_screensize['gpu list'] = df_screensize['gpu list'].astype(str).map(lambda x: x.strip())

		#convert inches to cm
		df_screensize['screen size (cm)'] = df_screensize['screen size (in)'].map(lambda x: round(x * 2.54, 3))
		tt=df_screensize['gpu list'][0]

		#convert each gpu cell into a list
		#https://stackoverflow.com/a/47548471
		#https://stackoverflow.com/questions/38133961/pandas-how-to-store-a-list-in-a-dataframe
		#https://stackoverflow.com/questions/35565376/insert-list-of-lists-into-single-column-of-pandas-df
		df_screensize['gpu list']=df_screensize['gpu list'].map(lambda x: list(map(str.strip,x.split(","))))

		'''
		merge location (lab or home) df and df_summary data
		'''
		df_all = pd.merge(df, df_summary[['participant','session','location']],on=['participant','session'], how='left')
		df_all.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		df_lab=df_all.copy().reset_index(drop=True)
	
		'''
		lab computer (if: subject is in lab and using one of the lab machines)
		'''
		#filter
		df_screensize_filter=df_screensize.copy().reset_index(drop=True)
		df_screensize_filter = df_screensize_filter.loc[df_screensize['is lab computer'] == True].reset_index(drop=True)    
		df_screensize_filter['location'] = 'lab'
		df_screensize_filter['exact match'] = True

		#preparing new variables for df_osx
		df_screensize_filter['devices'] = df_screensize_filter['device'] 
		df_screensize_filter['model id'] = df_screensize_filter['model id']
		df_screensize_filter['resolution (px)'] = df_screensize_filter['monitorSize']

		#combine all    
		df_lab = pd.merge(df_lab,df_screensize_filter[['os','gpu list','monitorSize','location','screen size (cm)',\
														'pixel density (ppi)','exact match',\
														'devices','model id', 'resolution (px)']],\
														on=['os','monitorSize','location'], how='left')
		df_lab = df_lab.loc[df_lab['exact match'] == True].reset_index(drop=True) 
		df_lab.sort_values(by=['participant','session','subsession']).reset_index(drop=True)

		#save summary
		types_df_lab = df_lab.dtypes
		df_test = df_lab[['participant','session','subsession','os','os_version','gpu',\
								'diagonalSize.cm','screen size (cm)','gpu list','exact match']]
		df_test.to_excel(cwd_save+'/lab_summary.xlsx', index=False)
	
		'''
		osx devices
		'''
		#filter
		df_screensize_filter=df_screensize.copy().reset_index(drop=True)
		df_screensize_filter = df_screensize.loc[df_screensize['os'] == 'OSX'].reset_index(drop=True)
		df_osx = df_all.loc[df_all['os'] == 'OSX'].reset_index(drop=True)
		df_osx.sort_values(by=['participant','session','subsession']).reset_index(drop=True)

		#check each gpu to see if there is more than one matching
		df_osx['gpu list'] = 'nan' #gpu list
		df_osx['screen size (cm)'] = 'nan' #screen size
		df_osx['pixel density (ppi)'] = 'nan' #pixel density
		df_osx['exact match'] = 'nan' #only single match
		df_osx['devices'] = 'nan' #devices
		df_osx['model id'] = 'nan' #model id
		df_osx['resolution (px)'] = 'nan' #resolution
		for idx, rw in df_osx.iterrows():
			gpu = rw['gpu']
			l_match_d = [] #matching devices
			l_match_mid = [] #matching model id
			l_match_ss = [] #matching screen size
			l_match_res = [] #matching resolution
			l_match_gpu = [] #matching gpu
			l_match_px = [] #matching pixel density
			#for each device
			for index, row in df_screensize_filter.iterrows():
				#l_row = map(str.strip, row['gpu list']) #strip items in list
				l_row = row['gpu list']
				#if gpu in list
				if [x for x in l_row if gpu.lower() in x.lower()].__len__() > 0:
					#add device to list 
					l_match_d.append(str(row['device']))
					l_match_mid.append(str(row['model id']))
					l_match_gpu.append(str(row['gpu list']))
					l_match_px.append(str(row['pixel density (ppi)']))
					l_match_ss.append(str(row['screen size (cm)']))
					l_match_res.append(str(row['monitorSize']))
			
			#if no matches
			if l_match_d.__len__() == 0:
				df_osx['devices'][idx] = 'nan'
				df_osx['model id'][idx] = 'nan'
				df_osx['gpu list'][idx] = 'nan'
				df_osx['pixel density (ppi)'][idx] = 'nan'
				df_osx['screen size (cm)'][idx] = 'nan'
				df_osx['resolution (px)'][idx] = 'nan'
				df_osx['exact match'][idx] = False
	
			#if only one device add immediately            
			elif l_match_d.__len__() == 1:
				df_osx['devices'][idx] = l_match_d[0]
				df_osx['model id'][idx] = l_match_mid[0]
				df_osx['gpu list'][idx] = l_match_gpu[0]
				df_osx['pixel density (ppi)'][idx] = l_match_px[0]
				df_osx['screen size (cm)'][idx] = l_match_ss[0]
				df_osx['resolution (px)'][idx] = l_match_res[0]
				df_osx['exact match'][idx] = True    
		
			#if multiple matches
			elif l_match_d.__len__() > 1:
				#add device to dataframe
				df_osx['devices'][idx] = l_match_d
				df_osx['model id'][idx] = l_match_mid
				df_osx['gpu list'][idx] = l_match_gpu
				df_osx['pixel density (ppi)'][idx] = l_match_px
				df_osx['screen size (cm)'][idx] = l_match_ss
				df_osx['resolution (px)'][idx] = l_match_res
				df_osx['exact match'][idx] = False

		#save summary
		types_df_osx = df_osx.dtypes
		df_test = df_osx[['participant','session','subsession','os','os_version','gpu',\
								'diagonalSize.cm','screen size (cm)','gpu list','exact match']]
		df_test.to_excel(cwd_save+'/osx_summary.xlsx', index=False)

		'''
		chromebook devices
		'''
		#filter
		df_screensize_filter = df_screensize.loc[df_screensize['os'] == 'Chrome'].reset_index(drop=True)
		df_chrome = df_all.loc[df_all['os'] == 'Chrome'].reset_index(drop=True)
		df_chrome.sort_values(by=['participant','session','subsession']).reset_index(drop=True)


		'''#----------------------------attempt 1'''
		#check each gpu to see if there is more than one matching
		df_chrome['gpu list'] = 'nan' #gpu list
		df_chrome['screen size (cm)'] = 'nan' #screen size
		df_chrome['pixel density (ppi)'] = 'nan' #pixel density
		df_chrome['exact match'] = 'nan' #only single match
		df_chrome['devices'] = 'nan' #devices
		df_chrome['model id'] = 'nan' #model id
		df_chrome['resolution (px)'] = 'nan' #resolution
		idx=0
		rw=0
		index=0
		row=0
		for idx, rw in df_chrome.iterrows():
			gpu = rw['gpu']
			l_match_d = [] #matching devices
			l_match_mid = [] #matching model id
			l_match_ss = [] #matching screen size
			l_match_res = [] #matching resolution
			l_match_gpu = [] #matching gpu
			l_match_px = [] #matching pixel density
			#for each device
			for index, row in df_screensize_filter.iterrows():
				#l_row = map(str.strip, row['gpu list']) #strip items in list
				l_row = row['gpu list']
				#if gpu in list
				if [x for x in l_row if gpu.lower() in x.lower()].__len__() > 0:
					#add device to list 
					l_match_d.append(str(row['device']))
					l_match_mid.append(str(row['model id']))
					l_match_gpu.append(str(row['gpu list']))
					l_match_px.append(str(row['pixel density (ppi)']))
					l_match_ss.append(str(row['screen size (cm)']))
					l_match_res.append(str(row['monitorSize']))
			
			#if no matches
			if l_match_d.__len__() == 0:
				df_chrome['devices'][idx] = 'nan'
				df_chrome['model id'][idx] = 'nan'
				df_chrome['gpu list'][idx] = 'nan'
				df_chrome['pixel density (ppi)'][idx] = 'nan'
				df_chrome['screen size (cm)'][idx] = 'nan'
				df_chrome['resolution (px)'][idx] = 'nan'
				df_chrome['exact match'][idx] = False
	
			#if only one device add immediately            
			elif l_match_d.__len__() == 1:
				df_chrome['devices'][idx] = l_match_d[0]
				df_chrome['model id'][idx] = l_match_mid[0]
				df_chrome['gpu list'][idx] = l_match_gpu[0]
				df_chrome['pixel density (ppi)'][idx] = l_match_px[0]
				df_chrome['screen size (cm)'][idx] = l_match_ss[0]
				df_chrome['resolution (px)'][idx] = l_match_res[0]
				df_chrome['exact match'][idx] = True    
		
			#if multiple matches
			elif l_match_d.__len__() > 1:
				#add device to dataframe
				df_chrome['devices'][idx] = l_match_d
				df_chrome['model id'][idx] = l_match_mid
				df_chrome['gpu list'][idx] = l_match_gpu
				df_chrome['pixel density (ppi)'][idx] = l_match_px
				df_chrome['screen size (cm)'][idx] = l_match_ss
				df_chrome['resolution (px)'][idx] = l_match_res
				df_chrome['exact match'][idx] = False

		#save summary
		types_df_chrome = df_chrome.dtypes
		df_test = df_chrome[['participant','session','subsession','os','os_version','gpu',\
								'diagonalSize.cm','screen size (cm)','gpu list','exact match']]
		df_test.to_excel(cwd_save+'/chrome_summary.xlsx', index=False)           


		'''
		combine data---------osx, chrome, and lab
		'''     
		#sort
		df_lab.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		df_osx.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		df_chrome.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		#concat
		df_merge = pd.concat([df_lab, df_osx, df_chrome], ignore_index=True)
		df_merge.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		#arrange columns
		df_merge = df_merge.reindex(list(df_osx.columns), axis=1)

		'''
		combine data---------all
		'''
		df_merge_small = df_merge[
				['participant','session','subsession','devices','exact match','resolution (px)',
				'screen size (cm)','gpu list', 'pixel density (ppi)']]
		df_f = pd.merge(df, df_merge_small, on=['participant','session','subsession'], how='outer')

		#fix gpu lists
		df_f['gpu list'] = df_f['gpu list'].map(lambda x: str(x).replace('"[' , '['))
		df_f['gpu list'] = df_f['gpu list'].map(lambda x: str(x).replace(']"' , ']'))

		#fix devices lists
		df_f['devices'] = df_f['devices'].astype(str) 
		df_f['resolution (px)'] = df_f['resolution (px)'].astype(str) 
		df_f['screen size (cm)'] = df_f['screen size (cm)'].astype(str) 
		df_f['pixel density (ppi)'] = df_f['pixel density (ppi)'].astype(str) 

		#drop gpu lists
		df_f = df_f[df_f.columns.drop('gpu list')]

		#merge location (lab or home) df and df_summary data
		##prepare
		df_sum = df_summary[['participant','session','subsession','location']]
		df_sum.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		df_f.sort_values(by=['participant','session','subsession']).reset_index(drop=True)
		##drop unusual participants
		df_f = df_f.drop(df_f[(df_f['participant']==111111) 
							| (df_f['participant']==999999)].index)

		df_sum = df_sum.drop(df_sum[(df_sum['participant']==111111) 
							| (df_sum['participant']==999999)].index)

		##convert types
		df_f['participant'] = df_f['participant'].astype(float)
		df_f['session'] = df_f['session'].astype(float)
		df_sum['participant'] = df_sum['participant'].astype(float)
		df_sum['session'] = df_sum['session'].astype(float)

		##merge
		df_f = pd.merge(df_f, df_sum, 
							on=['participant','session','subsession'], how='left')

		#export
		finished_json = df_f.to_json(orient='records')
		finished_json = 'json_data =' + finished_json
		with open(cwd_save+'/summary.json', 'w+') as f:
			f.write(json.dumps(finished_json,indent=4).strip('"').replace('\\',''))
			f.close()
	
		return df