###################################################################################################################
# author: Jona Cederbaum
# date: 08.04.2015
###################################################################################################################
# description: function that is called in gendata.R to generate data sets
## which can be analyzed using the proposed FLMM. 
###################################################################################################################
gendata<-function(I=10,J=10,lamB_fun=function(k){1/k},lamC_fun=function(k){1/k},
                  lamU_fun=function(k){1/k},sigmasq=0.05,normal=TRUE,mu=function(t){t+sin(t)},
                  N_B=1,N_C=1,N_U=1,phiB_fun=phiB,phiC_fun=phiC,phiU_fun=phiU,minsize=10,
                  maxsize=20,min_grid=0,max_grid=1,min_visit=2,max_visit=3,use_RI_true=FALSE,covariate=FALSE,
                  center_scores=TRUE,decor_scores=TRUE){
  
  res<-list() # initialize output
  sigmasq_true<-sigmasq        
  
  
  # check which model type
  if(!use_RI_true){
    print("you are generating data using a crossed design")
  }else{
    print("you are generating data using a random intercept design")
    J<-NA
  }
  
  
  
  if(!use_RI_true){
    if(min_visit!=max_visit){
      Hvec<-sort(sample(x=(min_visit:max_visit),size=I*J,replace=TRUE,prob=sample(x=2:5,
                                                                                  size=(max_visit-min_visit+1),replace=TRUE))) 
    }else{
      Hvec<-rep(min_visit,length=I*J)
    }
  }else{
    if(min_visit!=max_visit){
      Hvec<-sort(sample(x=(min_visit:max_visit),size=I,replace=TRUE,prob=sample(x=2:5,
                                                                                size=(max_visit-min_visit+1),replace=TRUE))) 
    }else{
      Hvec<-rep(min_visit,length=I)
    }
  }
  n<-sum(Hvec)  # total number of curves
  
  ###########################################
  # make all combinations of subject and word
  ###########################################
  if(!use_RI_true){
    help<-expand.grid(1:J,1:I)
    subject_help<-help$Var2
    word_help<-help$Var1
    subject<-rep(subject_help,Hvec)
    word<-rep(word_help,Hvec)
    combi_help<-list()
    for(i in 1:(I*J)){
      combi_help[[i]]<-1:Hvec[i]
    }
    combi<-unlist(combi_help)
    
  }else{
    subject<-rep(1:I,times=Hvec)
  }
  
  ######################################
  # draw for each curve number of points
  ######################################
  if(minsize!=maxsize){
    number<-sample(x=rep(minsize:maxsize),size=n,replace=TRUE)   
  }else{
    number<-rep(minsize,length=n)     # number of time points per curve
  }
  
  number_long<-rep(number,number)
  
  n_long<-rep(1:n,number)  # long vector with 1:n each as many times as time points per curve
  
  subject_long<-rep(subject,number) # long vector with subject[1]:subject[n] each as many times as points per curve
  
  if(!use_RI_true){
    word_long<-rep(word,number)
    combi_long<-rep(combi,number)
    curve_info<-data.table(n_long,subject_long,word_long,combi_long,number_long)
  }else{
    curve_info<-data.table(n_long,subject_long,number_long)  
  }
  
  ###############################################################################
  # draw for each curve locations of the observations, given the number of points
  ###############################################################################
  
  # locations are randomly chosen from uniform distribution
  locs<-list()  # locations of measurements for each person as a list, because different numbers
  for(i in 1:n){ # for each curve i = 1,...,n
    locs[[i]]<-runif(n=number[i],min=min_grid,max=max_grid)
  }
  
  # now bring time points in order before functions are constructed
  t<-list()
  o<-lapply(locs,order)
  for(i in 1:n) {
    help<-o[[i]]
    t[[i]]<-locs[[i]][help]}  
  
  t_vec<-unlist(t)
  
  curve_info[,t:=t_vec]
  
  ###########################################################
  # draw scores of normal or mixure of normal distribution(s)
  ###########################################################
  if(N_B>0){
    lamB<-lamB_fun(1:N_B)  
  }else{
    lamB<-0
    warning("N_B is not positive")
  }        
  
  
  if(!use_RI_true){
    if(N_C>0){
      lamC<-lamC_fun(1:N_C)  
    }else{
      lamC<-0
      warning("N_C is not positive")
    }
  }
  
  if(N_U>0){
    lamU<-lamU_fun(1:N_U)  
  }else{
    lamU<-0
    warning("N_U is not positive")
  }     
  
  
  if(normal==TRUE){# in normal case: \xi is drawn from N(0,\lambda_k)        
    if(N_B>0){
      xiB<-rmvnorm(I,mean=rep(0,N_B),sigma=diag(N_B)*lamB)   
    }else{
      xiB<-rep(NA,length=I)
    }
    
    if(!use_RI_true){
      if(N_C>0){
        xiC<-rmvnorm(J,mean=rep(0,N_C),sigma=diag(N_C)*lamC)  
      }else{
        xiC<-rep(NA,length=J)
      }
    }
    
    if(N_U>0){
      xiU<-rmvnorm(n,mean=rep(0,N_U),sigma=diag(N_U)*lamU)   
    }else{
      xiU<-rep(NA,length=n)
    }
  } 
  
 
  # note to mvnorm:
  # note that one could also use matrix(rnorm(I*N_B),I,N_B)%*%diag(sqrt(lamB),N_B,N_B)
  # but the result is only the same when in rmvnorm pre0.9_9994 is set to TRUE
  
  
  if(normal==FALSE){ 
    if(N_B>1){
      xiB<-matrix(rnorm(I*N_B),I,N_B)%*%diag(sqrt(lamB/2))+matrix(2*rbinom(I*N_B,1,0.5)-1,I,N_B)%*%diag(sqrt(lamB/2))
    }else{
      if(N_B>0){
        xiB<-matrix(rnorm(I*N_B),I,N_B)%*%sqrt(lamB/2)+matrix(2*rbinom(I*N_B,1,0.5)-1,I,N_B)%*%sqrt(lamB/2)  
      } else{
        xiB<-rep(NA,length=I)
      }
    }
    
    if(!use_RI_true){
      if(N_C>1){
        xiC<-matrix(rnorm(J*N_C),J,N_C)%*%diag(sqrt(lamC/2))+matrix(2*rbinom(J*N_C,1,0.5)-1,J,N_C)%*%diag(sqrt(lamC/2))
        
      }else{
        if(N_C>0){
          xiC<-matrix(rnorm(J*N_C),J,N_C)%*%sqrt(lamC/2)+matrix(2*rbinom(J*N_C,1,0.5)-1,J,N_C)%*%sqrt(lamC/2)  
        }else{
          xiC<-rep(NA,length=J) 
        }
      }
    }
    if(N_U>1){
      xiU<-matrix(rnorm(n*N_U),n,N_U)%*%diag(sqrt(lamU/2))+matrix(2*rbinom(n*N_U,1,0.5)-1,n,N_U)%*%diag(sqrt(lamU/2))  
    }else{
      if(N_U>0){
        xiU<-matrix(rnorm(n*N_U),n,N_U)%*%sqrt(lamU/2)+matrix(2*rbinom(n*N_U,1,0.5)-1,n,N_U)%*%sqrt(lamU/2)  
      }else{
        xiU<-rep(NA,length=n) 
      }
    }
  }
  
  ###############
  # center scores
  ###############
  if(center_scores){
    if(N_B>0){
      for(k in 1:N_B){
        xiB[,k]<-xiB[,k]-mean(xiB[,k])
      }
    }
    
    
    if(!use_RI_true){
      if(N_C>0){
        for(k in 1:N_C){
          xiC[,k]<-xiC[,k]-mean(xiC[,k])
        }
      }
    }
    
    if(N_U>0){
      for(k in 1:N_U){
        xiU[,k]<-xiU[,k]-mean(xiU[,k])
      }
    }
  }
  
  
  ######################
  # decorrelate scores
  # and scale with right 
  # variance
  ######################
  if(decor_scores){
    if(N_B>1)
      xiB<-xiB%*%sqrtm(solve(cov(xiB)))%*%sqrtm(diag(lamB,N_B,N_B))
    if(!use_RI_true){
      if(N_C>1)
        xiC<-xiC%*%sqrtm(solve(cov(xiC)))%*%sqrtm(diag(lamB,N_C,N_C))
    }
    if(N_U>1)
      xiU<-xiU%*%sqrtm(solve(cov(xiU)))%*%sqrtm(diag(lamB,N_U,N_U))
  }
  
  
  
  ################
  # bring scores 
  # into format of
  # curve_info
  ################
  
  #####################################
  # for subject/first grouping variable
  #####################################
  if(N_B>0){
    xiB_long_help<-list()
    xiB_long_help2<-list()
    for(k in 1:N_B){
      xiB_long_help[[k]]<-xiB[,k][subject]
      xiB_long_help2[[k]]<-rep(xiB_long_help[[k]],number)
    }
    
    xiB_long<-xiB_long_help2[[1]]
    
    if(N_B>1){
      for(k in 2:N_B){
        xiB_long<-cbind(xiB_long,xiB_long_help2[[k]])
      }
    }else{
      xiB_long<-matrix(xiB_long,ncol=1) 
    }
    
    for(k in 1:N_B){
      curve_info[,paste0("xiB_long.",k):=xiB_long[,k]]
    }  
  }
  
  ###################################
  # for word/second grouping variable
  ###################################
  if(!use_RI_true){
    if(N_C>0){
      xiC_long_help<-list()
      xiC_long_help2<-list()
      for(k in 1:N_C){
        ###############################
        # attention: need to sort 
        # the same way as in curve_info
        ###############################
        xiC_long_help[[k]]<-xiC[,k][word]             
        xiC_long_help2[[k]]<-rep(xiC_long_help[[k]],number)
      }
      
      xiC_long<-xiC_long_help2[[1]]
      if(N_C>1){
        for(k in 2:N_C){
          xiC_long<-cbind(xiC_long,xiC_long_help2[[k]])
        }
      }else{
        xiC_long<-matrix(xiC_long,ncol=1) 
      }
      for(k in 1:N_C){
        curve_info[,paste0("xiC_long.",k):=xiC_long[,k]]
      }  
    }          
  } 
  
  
  #############
  # curve-level
  #############
  if(N_U>0){
    xiU_long_help<-list()
    for(k in 1:N_U){
      xiU_long_help[[k]]<-rep(xiU[,k],number)  
    }
    xiU_long<-xiU_long_help[[1]]
    if(N_U>1){
      for(k in 2:N_U){
        xiU_long<-cbind(xiU_long,xiU_long_help[[k]])
      }  
    }else{
      xiU_long<-matrix(xiU_long,ncol=1) 
    }
    for(k in 1:N_U){
      curve_info[,paste0("xiU_long.",k):=xiU_long[,k]]
    }  
  }
  
  ##################################################################################
  # construct true underlying functions B_i(t), C_j(t) and U_ijh(t) via KL-expansion
  ##################################################################################
  
  if(N_B>0){
    phiB<-sapply(1:N_B,phiB_fun,t=t_vec)
  }else{
    phiB<-rep(NA,length=length(t_vec))
  }
  
  if(!use_RI_true){
    if(N_C>0){
      phiC<-sapply(1:N_C,phiC_fun,t=t_vec)
    }else{
      phiC<-rep(NA,length=length(t_vec))
    }
  }
  
  if(N_U>0){
    phiU<-sapply(1:N_U,phiU_fun,t=t_vec)
  }else{
    phiU<-rep(NA,length=length(t_vec))
  }
  
  if(N_B>0){
    B_vec_help<-matrix(NA,ncol=N_B,nrow=length(n_long)) # matrix with columns the components of each k=1,..,N_B
    for(k in 1:N_B){
      B_vec_help[,k]<-curve_info[[paste("xiB_long.",k,sep="")]]*phiB[,k]
    }
    B_vec<-rowSums(B_vec_help)        
  }else{
    B_vec<-rep(0,length=length(t_vec))
  }
  
  
  if(!use_RI_true){
    if(N_C>0){
      C_vec_help<-matrix(NA,ncol=N_C,nrow=length(n_long)) # matrix with columns the components of each k=1,..,N_B
      for(k in 1:N_C){
        C_vec_help[,k]<-curve_info[[paste("xiC_long.",k,sep="")]]*phiC[,k]
      }       
      C_vec<-rowSums(C_vec_help)        
      
    }else{
      C_vec<-rep(0,length=length(t_vec))
    }
  }
  
  if(N_U>0){
    U_vec_help<-matrix(NA,ncol=N_U,nrow=length(n_long))  
    for(k in 1:N_U){
      U_vec_help[,k]<-curve_info[[paste("xiU_long.",k,sep="")]]*phiU[,k]
    }        
    U_vec<-rowSums(U_vec_help)
  }else{
    U_vec<-rep(0,length=length(t_vec))
  }
  
  if(N_B>0){
    curve_info[,"B_long":=B_vec]
  }
  
  if(!use_RI_true){
    if(N_C>0){
      curve_info[,"C_long":=C_vec]
    }
  }
  
  if(N_U>0){
    curve_info[,"U_long":=U_vec]
  }
  
  #########################################################
  # construct observations Y_ij, i.e. add measurement error 
  #########################################################
  # now construct observations but in list format and errors in list format
  epsilon<-list()  # list of length n
  mu_list<-list()  # list of length n
  
  
  for(i in 1:n){
    if(number[i]==1){
      epsilon[[i]]<-rmvnorm(1,mean=rep(0,number[i]),sigma=as.matrix(sigmasq_true,nrow=1,ncol=1))  
    }else{
      epsilon[[i]]<-rmvnorm(1,mean=rep(0,number[i]),sigma=diag(rep(sigmasq_true,length=number[i])))  
    }
  }
  
  epsilon_vec<-unlist(epsilon)
  
  for(i in 1:n){
    t_use<-subset(curve_info,select=t,n_long==i)
    mu_list[[i]]<-mu(t_use)
  }   
  
  mu_vec<-unlist(mu_list)
  
  ################
  # add covariates
  ################
  if(covariate){
    covariate.1_help<-rbinom(n=n,size=1,prob=0.2)   
    covariate.1<-rep(covariate.1_help,number)
    covariate.2_help<-rbinom(n=n,size=1,prob=0.6)   
    covariate.2<-rep(covariate.2_help,number)
    curve_info[,covariate.1:=covariate.1]
    curve_info[,covariate.2:=covariate.2]
    f.1<-function(t){cos(t)}
    f.1_vec<-sapply(t_vec,f.1)
    f.2<-function(t){2*t}
    f.2_vec<-sapply(t_vec,f.2)
    mu_vec<-mu_vec + f.1_vec*covariate.1+f.2_vec*covariate.2
  }
  
  
  if(!use_RI_true){
    y_vec<-mu_vec+B_vec+C_vec+U_vec+epsilon_vec   
  }else{
    y_vec<-mu_vec+B_vec+U_vec+epsilon_vec  
  }
  
  curve_info[,c("mu_vec","epsilon_vec","y_vec"):=list(mu_vec,epsilon_vec,y_vec)]
  
  ########
  # Output
  ########
  
  res[["lamB_true"]]<-lamB  
  if(!use_RI_true)
    res[["lamC_true"]]<-lamC  
  res[["lamU_true"]]<-lamU
  res[["mu_true"]]<-mu
  res[["phiB_true"]]<-phiB
  if(!use_RI_true)
    res[["phiC_true"]]<-phiC
  res[["phiU_true"]]<-phiU
  res[["phiB_fun_true"]]<-phiB_fun
  if(!use_RI_true)
    res[["phiC_fun_true"]]<-phiC_fun
  res[["phiU_fun_true"]]<-phiU_fun
  res[["minsize_true"]]<-minsize
  res[["maxsize_true"]]<-maxsize
  res[["min_grid_true"]]<-min_grid
  res[["max_grid_true"]]<-max_grid
  res[["min_visit_true"]]<-min_visit
  res[["max_visit_true"]]<-max_visit
  res[["sigmasq_true"]]<-sigmasq_true
  res[["I"]]<-I
  res[["J"]]<-J
  res[["number_true"]]<-number
  res[["curve_info_true"]]<-curve_info
  res[["normal"]]<-normal
  res[["covariate"]]<-covariate
  res[["xiB_true"]]<-xiB
  if(!use_RI_true)
    res[["xiC_true"]]<-xiC
  res[["xiU_true"]]<-xiU
  res[["center_scores"]]<-center_scores
  res[["decor_scores"]]<-decor_scores
  
  
  return(res)
}

######################################################################
