#  This is R code to analyze independence in choice data for each subject
#  CITATION: Birnbaum, M. H. (2012). A statistical test of the assumption that repeated choices are independently and identically distributed. Judgment and Decision Making, 7, 97-109.
nchoices<-6     # nchoices is the number of choices (columns)
nreps<-20      # nreps is the number of repetitions of the study (rows)
nsubs<-1       # nsubs is the number of subjects. 
nruns<-100     # nruns is the number of random permutations (for accurate results = 10000)

# This is a list of the input files for individual participants
 
files1<-c('iid_test_sample_data.txt', 'trans_10_rep_20_times_02.txt', 'trans_10_rep_20_times_03.txt', 'trans_10_rep_20_times_04.txt', 'trans_10_rep_20_times_05.txt', 'trans_10_rep_20_times_06.txt', 'trans_10_rep_20_times_07.txt', 'trans_10_rep_20_times_08.txt', 'trans_10_rep_20_times_09.txt', 'trans_10_rep_20_times_10.txt', 'trans_10_rep_20_times_11.txt', 'trans_10_rep_20_times_12.txt', 'trans_10_rep_20_times_13.txt', 'trans_10_rep_20_times_14.txt', 'trans_10_rep_20_times_15.txt', 'trans_10_rep_20_times_16.txt', 'trans_10_rep_20_times_17.txt', 'trans_10_rep_20_times_18.txt', 'trans_10_rep_20_times_19.txt', 'trans_10_rep_20_times_20.txt')

###############################################  Preparing the output file CSV
   file_name<- paste0("iid_test_output.csv")
   zlabel=("file,mean,var,pV,r,pr,nruns")
   write.table(zlabel, file = file_name, append = TRUE, quote = FALSE, sep = ",",
            eol = "\n", na = "NA", dec = ".", row.names = FALSE,
            col.names = FALSE, qmethod = c("escape", "double"),
            fileEncoding = "")
   zend=" "
#################################################
 
for (iii in 1:nsubs) {
 
file1<-files1[iii]
mm=read.table(file1)  # read in the data for one subject
x <- mm               # x (same as mm) is a matrix of the original data
 
z=array(0,c(nreps,nreps))   # Here arrays are initialized
zz=array(0,c(nreps*nreps))
xperm=array(0,c(nreps,nchoices))
zperm=array(0,c(nreps,nreps))
zzperm=array(0,c(nreps*nreps))
vardist=array(0,c(nruns))
cordist=array(0,c(nruns))
repdif=array(0,c(nreps,nreps))
rrdif = array(0,c(nreps*nreps))
zzap=array(0,c(nreps-1))
sum=array(0,c(nreps-1))
 
#  These are calculations on the original data
#  z is the matrix of disagreements between reps in original data
for (i in 1:nreps) { for (j in 1:nreps)
{ for (k in 1:nchoices) { z[i,j] = z[i,j]+ (x[i,k]-x[j,k])^2 }
repdif[i,j]<-abs(i-j)
}}
zz<-c(z)
a <- mean(zz)
b <- var(zz)
 
# here we calculate the correlation between rep difference and distance
nn<-nreps-1
for (id in 1:nn) {
sum[id]<-0
ni<-nreps - id
for (i in 1:ni) {
j<-(i+id)
 
sum[id]<- sum[id]+ z[i,j]     }
zzap[id]<-sum[id]/(nreps-id) }
repdif2<-c(1:nn)
c<-cor(zzap,repdif2)
c[is.numeric(c) & is.na(c)] <- 0
 
# Here begin calculations on permuted data.  Note that data are
# permuted across rows within columns.  This leads to tests of iid
# independence.
# xperm is a permutation of the data.
# zperm is the matrix of disagreements between reps in the permuted data.
# totvar is the number of cases where the variance of permuted data
# exceeds the variance in the original data.
 
totvar=0.0
totcor=0.0
for (kk in 1:nruns) {
for (ii in 1:nreps){
for (jj in 1:nchoices) {xperm[,jj]<-x[sample(nreps,nreps),jj]} }
 
for (it in 1:nreps) {
for(jt in 1:nreps) {zperm[it,jt]=0} }
 
for (i in 1:nreps) {
for (j in 1:nreps) {
for (k in 1:nchoices) {zperm[i,j] = zperm[i,j]+
                         (xperm[i,k]-xperm[j,k])^2 } }}
 
zzperm<-c(zperm)
a1<-mean(zzperm)
b1<-var(zzperm)
vardist[kk]=b1     # vardist a vector of variances of zperm
if (b1 >= b) {totvar=totvar+1}
 
# calculate correlation btween rep difference and distance in permuted
# data
nn<-nreps-1
for (id in 1:nn) {
sum[id]<-0
ni<-nreps - id
for (i in 1:ni) {
j<-(i+id)
sum[id]<- sum[id]+ zperm[i,j]     }
zzap[id]<-sum[id]/(nreps-id) }
repdif2<-c(1:nn)
c1<-cor(zzap,repdif2)
c1[is.numeric(c1) & is.na(c1)] <- 0
cordist[kk]<-c1
if (abs(c1) >= abs(c)) {totcor=totcor+1}
}
 
p=totvar/nruns              # p is the p-value of the variance test of iid
p2=totcor/nruns             # p2 is the p-value of the correlation test

b = round(b, digits=3)      # variance and correlation rounded to 3 digits
c = round(c, digits=3)

o1=c(file1,a,b,p,c,p2,nruns)  # This is the list for printout
 

#######################################################################################
#   HERE we write out the results to a csv file
#######################################################################################
#
   z = o1
   z = unlist(z,use.names=FALSE)

   write.table(z, file = file_name, append = TRUE, quote = TRUE, sep = ",",
            eol = ",", na = "NA", dec = ".", row.names = FALSE,
            col.names = FALSE, qmethod = c("escape", "double"),
            fileEncoding = "")
   write.table(zend, file = file_name, append = TRUE, quote = TRUE, sep = ",",
            eol = "\n", na = "NA", dec = ".", row.names = FALSE,
            col.names = FALSE, qmethod = c("escape", "double"),
            fileEncoding = "")
##################################################################################

#  hist(vardist) this would display histogram sampling distb. under H0
# plot(density(vardist))  this would display the density of above histogram
}