# http://DataColada.org/76
#
#Code to reproduce MTurk study on Maluma vs Takiti using Dutch Baby names from 2016
#Written by Uri Simonsohn (urisohn@gmail.com) 
#Please contact directly if you see any errors or have any comments.
#
#
#**Note from 2019 04 29** --- After posting, and following requests by Blake McShane, #
#   we added a dummy 1/0 to both datasets for whether the same MTurk ID
#   was observed across waves, and we later added a masked MTurk id as well.
#   When first posted, there was one observation that was was miscoded as being duplicate when it was not.
#   (as could be verified by the non-matching masked MTurk id's). 
#   Blake McShane spotted the error whose origin we were unable to identify, as re-running the
#   code we had used to generate the file,  did not generate the error.
#  The .csv files relied on the code below, which load the data from the Colada server, have that error corrected. 
#
#Version: 2019 05 06 (typo fixed in dropping missing values)
#
###########################################################################################################


#Outline
# 0. Preliminaries
# 1. Functions (just two)
# 2. Get data ready to be analyzed
#3 Analysis.
  #3.1  Function that computes d and n for every study, outputs data.frame with three columns: name, d, n  
  #3.2 Compute d & n for collected data
  #3.3 Correlation in observed effect size across waves within study
  #3.4 Meta analysis of the first wave, the I2=78% for Wave 1 reported in the post
  #3.5 Figure 1
  #3.6 Figure 2 - Scatterplot
  #3.7 Bootstraping individual data rows (original, see 3.9 by bootstrapping by participant)
  #3.8 Absolute mean error and Figure 3
  #3.9 Bootstraping participants
#######################################################################



# 0. Preliminaries
  rm(list = ls())
  library(metafor) #to run meta-analysis
  library(stringr) #to handle strings
  library(rio)     #to import any data format
  
#1) Functions
  
  #1.1 Function 1: like vlookup in Excel for dealing with the names
    vlookup=function(x,table,k=2) table[match(x,table[,1]),k] 

  #1.2 Function 2: format two-decimal numbers without a 0
    round.fun <- function(val) { sub("^(-?)0.", "\\1.", sprintf("%.2f", val)) }

#2) Get data ready to be analyzed
    #2.1) load directly from the Colada server
      #These files are the raw Qualtrics .csv files, except the following modifications
      #IP, latitude, longitude and MTURK id have been deleted for privacy reasons
    
      
    #Load from server datafiles which include masked MTurk ids
     a1=import("https://datacolada.org/appendix/76/Data%201%20-%20Colada%2076%20-%202019%2004%2026%20(masked%20mturk%20id).csv")
     a2=import("https://datacolada.org/appendix/76/Data%202%20-%20Colada%2076%20-%202019%2004%2026%20(masked%20mturk%20id).csv")

    #2.1.5) Add a subject id, just a counter as the true MTurk id is hidden for privacy reasons (this code was written before the masked id had been generated or used)
	   a1$id=1:nrow(a1)
       a2$id=1:nrow(a2)
       
       
#Note: 2019 04 25 
       #Late (on European time) on the day of the post we received an email, from Blake McShane, asking some clarifying questions
       #about the data, including if there were any repeat participant. We went back to the original files which ahd the 
       #MTUrk id that we had dropped, and created new columns in both datasets for when the same MTurk ID was used
       #by un-commenting teh following two lines, you can re-run the entire code dropping repeat participants 
       #we added a footnote to the post with those results
       #if writing code from scratch we would have made it easier to carry out the exclusions without relying on commenting out, but we didn't.
            #a1=a1[a1$duplicate.mturk.id==0,] #drop participants from Wave 1 that also participated in Wave 2 (at least, same MTUrk id)
            #a2=a2[a2$duplicate.mturk.id==0,] #drop participants from Wave 2 that also participated in Wave 1 (at least, same MTUrk id)
                 
    #2.2 Reshape the data to grab the multiple  columns with values and put them in a single vector, plus the name
      #They come in default Qualtrics format, where each question version is its own column, which are mostly empty
      
       #see the variable names
        names(a1)  #Data starts in column 5, so below the loop will start grabbing ys there.
        
      y1=y2=names1=names2=c()
      for (k in 1:50)
      {
      #Grab the names being used
        y1=c(y1,a1[,4+k]) #4+k becuase as noted above, starting in column 5 we have the data
        y2=c(y2,a2[,4+k])
        names1=c(names1,rep(names(a1)[4+k],nrow(a1)))
        names2=c(names2,rep(names(a2)[4+k],nrow(a2)))
      }
  
    #2.2.1 Create participant id for every row (copy vertically all the ids 50 times), this will allow knowing which person we are drawing later on in the bootstrap
      #Note 2019 04 29: This would no long ber relevant as we have masked MTurk id, but it is the way it was coded originally
      id1=rep(a1$id,50)
      id2=rep(a2$id+1000,50)
      
        
    #2.3 Delete empty rows
        keep1=ifelse(is.na(y1),F,T)
        keep2=ifelse(is.na(y2),F,T)
        names1=names1[keep1]
        names2=names2[keep2]
        y1=y1[keep1]
        y2=y2[keep2]
        id1=id1[keep1]
        id2=id2[keep2]

        
  #2.4 Turn to numeric
      y1=as.numeric(y1)
      y2=as.numeric(y2)
      #y2=as.numeric(y2)+runif(length(y2))  #use this to make sure the process works, when run this way, adding noise, indeed expected variation is below what's observed
  
          
  #2.5 Sort names so girl name appears first always 
      #Create table with names and whether it is reverse-code (i.e. girl name in the right)
      
        reverse.raw=rep(c(0,1),25)            #Names in the original stimuli are reversed girl-boy,boy-girl, etc.
        names.raw=names(a1[5:54])             #grab the names
        names_split=str_split(names.raw,"-")  #split the first and second name
         #Turn "list" int otwo arrays
            first   = unlist(names_split)[2*(1:50)-1]  #the name appearing first
            second  = unlist(names_split)[2*(1:50)  ]  #the name appearing second
      #put back together, always girl-boy
        names.adj=ifelse(reverse.raw==0,paste(first," - ",second),paste(second, " - ",first))
        names.adj
        
      
      #Now table names
        reverse.table=data.frame(names.raw, reverse.raw, names.adj)
      
  #3 Analysis.
        
  #3.1  Function that computes d and n for every study, outputs data.frame with three columns: name, d, n  
    run.once=function(y,names)  
      #y: is the dv
      #nmes: is a vector with the name pair associated to each observation 
       {
        #M and SD  by name
         m =aggregate(y,list(names),mean)
         sd=aggregate(y,list(names),sd)
         sum1=aggregate(y,list(names),FUN=sum)
         n=sum1$x/m$x
        
        #Lookup the names to know if they need to be reverse coded
          reverse=vlookup(m$Group.1,reverse.table,2)  #uses custom formula 1, like vlookup() in Excel
            
        #Adjust
            m.adjusted=ifelse(reverse==1,6-m$x,m$x)  #Reverse coded when girl name is on the right
        #Cohen d
            d=(m.adjusted-3)/sd$x    #standardize the effect size
        
        #output the cohen d and the sample size for each name
            data.frame(m$Group.1, d, n)   
        }
    

  #3.2 Compute d & n for collected data
     df1.obs=run.once(y1,names1)  #this outpus  a data.frame with the name pairs, cohen d, and sample size, in wave 1
     df2.obs=run.once(y2,names2)  #this is a data.frame with the name pairs, cohen d, and sample size, in wave 1
    
  #3.3 Correlation in observed effect size across waves
    r.obs=cor(df1.obs$d,df2.obs$d)  #each row is a name-pair, we compute the correlation between them
    r.obs
    
    
  #3.4 Meta analysis of the first wave, the I2=78% for Wave 1 reported in the post
    rma(df1.obs$d,1/df1.obs$n)   #This runs a meta-analysis with d as the effect size and 1/N as the variance
                                 #it's 1/N because d has Mean=0 and Var=1 by construction. 
                                 #The mean of that variable, then has 1/N as variance

  #3.5 Figure 1
   # setwd("c:/dropbox/DataColada/76 Heterogeneity")   #Change to folder you want to use, if you will be saving with png()
    
    #Effect in Wave 1
      d1=df1.obs$d
    #Names re-coded in Wave 1, using vlookup (Function 2 above)
      n1=vlookup(df1.obs$m.Group.1[order(d1)],reverse.table,3)
      
   # png("F1 - Barplot for Maluma Takiti.png", width=1200,height=800,res=125) #commented out, this saves the graph to a file
    #Margin
      
      par(mar=c(10.5,5.1,4.1,2.1))
    #Plot
      bp1=barplot(sort(d1),col='red3',las=1,ylim=c(-.1,.8))
    #x-axis
      axis(side=1,at=bp1,n1,las=2,cex=.9)
      mtext(side=1,line=8.5,cex=1.5,font=2,"Girl - Boy name pair")
    #y-axis
      mtext(side=2,line=3,cex=1.5,font=2,"Cohen's d")
      #mtext(side=2,line=3,cex=1.1,font=3,"(Rating i girl name to Maluma - 3)/SD")
      
    #Value labels
      dx1=df1.obs$d[order(df1.obs$d)]
      text(bp1,dx1+.02*sign(dx1),round.fun(dx1),cex=.7)
      
      mtext(side=3,font=2,cex=1.55,"Standardized rating assigning the girl name to the rounded shape ")
      mtext(side=3,font=3,cex=1.15,line=-1.25,"(Standardized: SDs of 1-5 rating above midpoint)")
  #dev.off()  #Commented out, this completes the .png file and closes it
      
#3.6 Figure 2 - Scatterplot
      #Simple vectors to work with
        d1=df1.obs$d  #effect size in wave 1
        d2=df2.obs$d  #effect size in wave 2
    
   #png("F2 - Scatterplot  Maluma Takiti.png", width=1200,height=800,res=125) #Commented out, this save figure to file 

    par(mar=c(5.1,6.1,4.1,2.1))
    plot(d1, d2,pch=16,col='gray77', xlab="",ylab="",xlim=c(-.2,1.1),ylim=c(-.2,1.1))
      abline(0,1,lty=3,lwd=3)
      abline(lm(d2~d1),col='red3',lwd=3)
      
    #legend
# Perfectly Replicable  (identical estimate waves 1 & 2)
# Observed Replicability  (OLS line) 

      legend(-.2,1.1,col=c("black","red3"),lty=c(3,1), lwd=c(3,3),
          legend=c("Perfect  replicability  line  (45 degrees: Wave 2 =  Wave 1)",
                   "Observed replicability line (regression:  Wave 2 = a + b * Wave 1)"),cex=1.15)
    #x-axis
       mtext(side=1,font=2,cex=1.4,"Effect Size in Wave 1",line=2.5)
       mtext(side=1,font=3,cex=1.2,"(June 2017)",line=3.75)
    #y-axis
       mtext(side=2,font=2,cex=1.4,"Effect Size in Wave 2",line=3.75)
       mtext(side=2,font=3,cex=1.2,"(April 2019)",line=2.55)
       
    #Text for correlation
       cor(d1,d2)
       cor(d1,d2,method="spearman") #robustness, not reported
       text(.6,0,"corr(wave 1, wave2) = .77",col='gray44',cex=1.25)
    #Header
       mtext(side=3,line=2,cex=2,font=2,"Observably Heterogenous Effects Do Replicate")
       mtext(side=3,line=.2,cex=1.35,font=3,col='gray20',"(Name-pairs showing larger Maluma effect in 2017, show larger effect in 2019)")
       
    #dev.off()  #Commented out, this would save the .png and close the figure file
       
#3.7 Bootstraping individual data rows (original, see 3.8 by bootstrapping by participant)
    #Preliminaries
       set.seed(190)      
       simtot=5000    #Number of bootstrap iterations
    
    #Combine y1 and y2, and names, to draw under null they all come from same population, no heterogeneity
      y12=c(y1,y2)
      names12=c(names1,names2)
    #Start the loop, saving the correlations in rk1 (when drawing only from 1) and rk12 (when drawing from 1 and 2)
      rk1=rk12=c()
      d1.all=d2.all=matrix(nrow=simtot,ncol=50)
      for (simk in 1:simtot)
        {
          N1=length(y1)   #How many observations in Wave 1
          N2=length(y2)   #How many observations in Wave 2
          
        #3.7.1 Draw from Wave 1 only (in footnote)
          k1=sample(N1,size=N1,replace=T)    #Draw Wave 1 in the size that it was, from Wave 1
          k2=sample(N1,size=N2,replace=T)    #Draw Wav2 2 in the size that it was, from Wave 1
          y1.k1=y1[k1]                       #This is boostrapped Wave 1
          y1.k2=y1[k2]                       #This is boostrapped Wave 2
          names1.k1=names1[k1]               #The corresponding names
          names1.k2=names1[k2]
          df1.bootk1=run.once(y1.k1,names1.k1) #Compute the cohen d's
          df2.bootk2=run.once(y1.k2,names1.k2) 
        
          rk1[simk]=cor(df1.bootk1$d, df2.bootk2$d)  #Store the correlation between d1 and d2 in this sample
        
        #3.7.2 Draw from waves 1 and 2 combined (In main text)
          k1=sample(N1+N2,size=N1, replace=T)    #Draw these observations for Wave 1 from 1 and 2
          k2=sample(N1+N2,size=N2, replace=T)    #Draw these observations for Wave 2 from 1 and 2
          
          #DV (here is the actual drawing, above just which observations are selected)
            y1.k12=y12[k1]
            y2.k12=y12[k2]
          #Names
            names1.k12=names12[k1]
            names2.k12=names12[k2]
          #Compute cohen ds 
            df1.bootk12=run.once(y1.k12,names1.k12)
            df2.bootk12=run.once(y2.k12,names2.k12)
          #Compute correlations
            rk12[simk]=cor(df1.bootk12$d, df2.bootk12$d)
        
          #Save all ds
            d1.all[simk,]=df1.bootk12$d
            d2.all[simk,]=df2.bootk12$d

          #Counter every 500
          if (simk%%500==0) cat("...",simk)
      }
    
     #3.7.3 Results of the bootstrap
      #3.7.3.1 Mean correlation, and p-value, bootstrapping from merging from waves
        #See the result

          mean(rk12)  #Average correlation under the null
        #Make the histogram in the footnotes
          hist(rk12,col='gray77',border=F,breaks=35,ylim=c(0,550),
             xlab='Correlation: r(Wave 1, wave 2))',
             main="Distribution of Correlations between Wave 1 and Wave 2 under null")
         
          #Vertical lines in histogram 
            abline(v=r.obs,col='red4',lwd=4)
            abline(v=mean(rk12),col='gray44',lwd=3,lty=2)
        
            text(.76,550,adj=1,"Observed correlation r=.77",col='red4')
            text(.80,550,adj=0,"Expected correlation r=.79",col='gray44')
          
      #3.7.3.2 Mean correlation, and p-value, bootstrapping from only wave 1
          mean(rk1)
          hist(rk1)
          abline(v=r.obs,col='red',lwd=4)

      #p-values for both appraochs
         mean(abs(rk12) <= abs(r.obs))
         mean(abs(rk1) <= abs(r.obs))
        
        
    #3.8 Absolute mean error
      #3.8.1 Compute absolute error in bootstrapped samples
          #Computer average absolute deviation between waves in the bootstrapped data      
           dif.abs.null=abs(d1.all-d2.all)  #d1.all and d2.all are data.frames generated in the boostrapping
                                             #loop above, they contain simtot rows and 50 columns, with the estimates for each simulation
          #Verify visually it works as intended: the 3rd row is the difference of first 2
            rbind(d1.all[,1],d2.all[,1],dif.abs.null[,1])[,1:5]
          
          #Expected mean absolute difference is teh mean of all absolute differences in the simtot bootstraps
            mean.dif.null=mean(dif.abs.null)
          
      #3.8.2 Compute absolute mean error in the observed sample
            mean.dif.obs=mean(abs(d1-d2))
            
  
     #3.8.3 Compute ranked absolute errors
            #Sort each row in the matrix with difference in effect so that we can analyze ranks
              sorted.rows=matrix(nrow=simtot, ncol=50)  #This matrix has simtot rows, with all studies in each row sorted by the absolute error
              for (k in 1:simtot) sorted.rows[k,]=sort(dif.abs.null[k,])
              null.sorted=colMeans(sorted.rows)
        
    #3.8.4 Plot figure 3         
        #png("F3 - Expected vs observed absolute errors.png", width=1400,height=800,res=125) #commented out, this saves the graph to a file

          par(mar=c(13.1,6.1,4.1,2.1))

              plot(sort(abs(d1-d2)),xaxt="n",pch=16,col='red4',cex=1.25,
                    xlab="",ylab="",las=1,xlim=c(0,55))
                points(null.sorted,col='blue',type='l',lwd=2)   
                legend(2,.4,cex=1,lty=c(NA,1),pch=c(16,NA), col=c("red4","blue"),
                    legend=c("Observed difference between waves for each name-pair",
                      "Expected difference between waves under ZERO heterogeneity"))
              axis(side=1,at=1:50,paste0(n1[order(abs(d1-d2))],"  [",1:50,"]"),las=2,cex=.9)
              
           #Names for bars
               axis(side=1,at=53,"Observed average ",las=2,cex=1,font=2)
               axis(side=1,at=55,"Expected average ",las=2,cex=1,font=2)

          #Bars at the end for the aveage
              polygon(x=c(52,52,54,54),y=c(0,mean.dif.obs,mean.dif.obs,0),col='red4')
              polygon(x=c(54,54,56,56),y=c(0,mean.dif.null,mean.dif.null,0),col='blue3')
      
         #Value labels
            text(52.5,mean.dif.obs+.01,round(mean.dif.obs,3),cex=.9,col='red4')    
            text(55.5,mean.dif.null+.01,round(mean.dif.null,3),cex=.9,col='blue3') 
            mtext(side=1,line=11,font=2,cex=1.75,"The 50 studies sorted by absolute difference in effects between waves")
            
          #Y-axis
              mtext(side=2,font=2,cex=1.5,line=4,"Difference in effect size across waves")
              mtext(side=2,font=3,cex=1.35,line=2.55,"(abs(Wave 2 - Wave1))",col='gray60')
               
          #Vertical line     
              abline(v=51,col='gray66')
              
          #Header
            mtext(side=3,font=2,cex=1.8,line=2,
              "Effect sizes between waves differ as expected with ZERO heterogeneity")
            
          
#dev.off()
##########################################################################      


#3.9  Bootstraping participants
        #Note: In feedback provided upon seeing a draft of this post, Blake, Ulf and Karsten commented 
        #that the bootstrap above does not take into account the dependency across
        #participants, to address that issue, here the bootstrap is done at the participant level
  
    #Preliminaries
       set.seed(191)      
       simtot=100    #Number of bootstrap iterations
    
    #Combine y1 and y2, and names, to draw under null they all come from same population, no heterogeneity
      y12=c(y1,y2)
      names12=c(names1,names2)
      id12=c(id1,id2)

    #Combine ids for participants for both waves
      N1.subjects=length(unique(id1))  #how many participants in wave 1
      N2.subjects=length(unique(id2))  #how many participants in wave 2
      id12.subjects=unique(c(id1,id2))  #unique identifier of each particiopant in both waves
      
    #Start the loop, saving the correlations in rk1 (when drawing only from 1) and rk12 (when drawing from 1 and 2)
        
        rk.subject=c()
        simtot=250
        for (simk in 1:simtot)
        {
      
        #3.9.1 Draw from both waves, entire subject 
          k1.subject=sample(id12.subjects,size=N1.subjects,replace=T)    #Draw the # of participants seen in Wave 1, from Wave 1 & 2
          k2.subject=sample(id12.subjects,size=N2.subjects,replace=T)    #Draw the # of participants seen in Wave 2, from Wave 1 & 2
          
          
        #3.9.2 Could not find easy vector-based way to do this, so  i loop of participant IDs drawn at random
          #    and get all the values of y that match that id
          y1.boot=y2.boot=name1.boot=name2.boot=c()
          for (k in k1.subject) 
              {
                  y1.boot=c(y1.boot, y12[id12==k])
                  name1.boot=c(name1.boot,names12[id12==k])
              }
          
          for (k in k2.subject) 
              {
                  y2.boot=c(y2.boot, y12[id12==k])
                  name2.boot=c(name2.boot,names12[id12==k])
              }
            
          
          df1.bootk1=run.once(y1.boot,name1.boot) #Compute the cohen d's
          df2.bootk2=run.once(y2.boot,name2.boot) #Compute the cohen d's
        
          rk.subject[simk]=cor(df1.bootk1$d, df2.bootk2$d)  #Store the correlation between d1 and d2 in this sample
        #Counter every 500
          if (simk%%10==0) cat("...",simk)
      }
      
     #3.9.3 Results of the bootstrap
      # Mean correlation, and p-value, bootstrapping from merging from waves
        r.expected=mean(rk.subject)  #Average correlation under the null, is very similar, r=.795 instead of .79
        r.expected
        hist(rk.subject,col='gray77',border=F,breaks=35,ylim=c(0,100),
             xlab='Correlation: r(Wave 1, wave 2))',
             main="Distribution of Correlations between Wave 1 and Wave 2 under null")
        abline(v=r.obs,col='red4',lwd=4)
        abline(v=mean(rk12),col='gray44',lwd=3,lty=2)
        
        text(.76,550,adj=1,"Observed correlation r=.77",col='red4')
        text(r.expected,550,adj=0,"Expected correlation r=.795",col='gray44')
     
      #p-values
        mean(abs(rk.subject) <= abs(r.obs))