# -*- coding: utf-8 -*-

#generates folders with the corresponding textfiles and R program for the models listed in lis_model, working directory must contain a test_iid.txt and the excel file with the models


#  Here are the setup lines, which specify the input data file, the names of worksheets with the datasets, 
#    the number of lines of data to be sampled for each so-called "subject", and the number of "subjects" to sample



excel_file = 'MARTER_data.xlsx'
lis_model = ['Trans1', 'Trans2', 'Trans3', 'Intrans1', 'Intrans2', 'iid_data']
lis_reps = ['10','20','100']
n_subs = 20
lis_start= {"Trans1":[5001,5201,5601], "Trans2":[5001,5201,5601], "Trans3":[5001,5201,5601], "Intrans1":[5001,5201,5601], "Intrans2":[5001,5201,5601], "iid_data":[5001,5201,5601]}


#  Note that the program samples, starting on line 5001 for each number of reps (blocks).
#  To start at a different line number, please see the guide




##import the packages
import numpy as np
import pandas as pd
import os
import shutil




def subsample_iid_test_setup(lis_model, lis_reps, n_subs, excel_file, lis_start):
    
    #adds the data from the excel sheet to a dataframe
    df_dict = {}
    for item in lis_model:
        df_dict[item] = pd.read_excel(excel_file, sheet_name = item, names = ['response1','response2'])
    #function changes the inputs to the correct format
    def toColumns(df, n_reps, n_subs, start):
        frame = pd.DataFrame()
        frame['v1']=df['response1'].astype(str).str[0].astype(int)
        frame['v2']=df['response1'].astype(str).str[1].astype(int)
        frame['v3']=df['response1'].astype(str).str[2].astype(int)
        frame['v4']=df['response2'].astype(str).str[0].astype(int)
        frame['v5']=df['response2'].astype(str).str[1].astype(int)
        frame['v6']=df['response2'].astype(str).str[2].astype(int)
        lis = []
        #start = start-2 (adjust start to match Excel columns?)
        for i in range(n_subs):
            start_index = i*n_reps+start
            end_index = (i+1)*n_reps +start
            sub = frame[start_index:end_index]
            lis.append(sub)
        first_row = pd.DataFrame({'v1': 'v1','v2': 'v2','v3': 'v3','v4': 'v4','v5': 'v5','v6': 'v6'},index=[0])
        for j in range(len(lis)):
            lis[j] = pd.concat([first_row, lis[j]])
            lis[j]=lis[j].reset_index()
            lis[j]=lis[j].drop('index',axis= 1)
            lis[j]=lis[j].reset_index()
            lis[j].iloc[0,0]=""
        return(lis)
    
    #function which writes the data frames as text files
    def to_txt(dict, lis_reps, n_subs, lis_start):
        text_file_names_all = {}
        for key, value in dict.items():
            for i in range(len(lis_reps)):
                start = lis_start[key][i]
                lis = toColumns(value, int(lis_reps[i]), n_subs, start)
                a = key+"_rep_"+lis_reps[i]+"_subs_"+str(n_subs)
                os.mkdir(a)
                text_file_names = []
                for i in range(len(lis)):
                    if (i+1)<10:
                        text_name = a+"_0"+str(i+1)+'.txt'
                        np.savetxt(a + "/"+text_name, lis[i].values, fmt='%s', delimiter="\t")
                        text_file_names.append(text_name)
                    else:
                        text_name = a+"_"+str(i+1)+'.txt'
                        np.savetxt(a + "/"+text_name, lis[i].values, fmt='%s', delimiter="\t")
                        text_file_names.append(text_name)
                text_file_names_all[a] = text_file_names
        return(text_file_names_all) #put in folder
    
    
    
    #calling the to_txt function
    name_dict = to_txt(df_dict, lis_reps, n_subs, lis_start)
    
    #finds the number of reps based on the file name
    def find_sub(word):
        a = word.find('subs_')
        return(word[a+5:])
    
    #finds the number of subs based on the file name
    def find_rep(word):
        a = word.find('rep_')
        b = word.find('_subs')
        return(word[a+4:b])
    
    #function which writes the R files based on test_iid.txt and places them in the correct folders
    for key, value in name_dict.items():
        shutil.copy2('test_iid.txt', key)
        txt_names = str(value).replace("[","(").replace("]",")")
        s = open(key+"/test_iid.txt").read()
        s = s.replace('files1<-c()', 'files1<-c'+txt_names)
        s = s.replace('nsubs<-3', 'nsubs<-'+find_sub(key))
        s = s.replace('nreps<-10','nreps<-'+find_rep(key))
        f = open(key+"/test_iid.txt", 'w')
        f.write(s)
        f.close()
        os.rename(key+"/test_iid.txt",key+"/test_iid.R")
            
    
subsample_iid_test_setup(lis_model, lis_reps, n_subs, excel_file, lis_start)