################################################################################
# author: Jona Cederbaum
# date: 08.04.2015
################################################################################
# description: specification of all input variables and call of FLMM estimation.
## The input parameters are here exemplarily given for the phonetics data. 
## All input and output variable are described here and the input is
## passed over to function call_functions in which the estimation takes place.
################################################################################
###############
# load packages
###############
require(devtools) # needed for installing refundDevel
require(refundDevel) # can be installed as follows:
# devtools::install_github("refunders/refund", ref="devel")
# (at least use commit: end of january 2015)
require(parallel) # for parallel estimation and prediction in bam
require(mgcv) # (at least version: 1.8-6)
require(MASS)
require(Matrix)
require(data.table) 

##################
# source functions
##################
source("functions.R")
source("prep_covariance.R")
source("mean_estimation.R")
source("cov_estimation.R")
source("fpc_estimation.R")
source("fpc_famm_estimation.R")
source("call_all_functions.R")

################################################################################
#########################
#load data
#########################
# examplarily for the subset of the phonetics data
load("../data/phonetics_data_subset.Rdata")

# gives object curve_info

##########################
# Input variables
##########################

# general smoothing:
####################
method<-"REML"        # estimation method for gam (alternative "ML")
bs<-"ps"              # basis of gam
d_grid<-100           # given grid length on which the covariance plane is evaluated and which is used later for the
                      ## estimation of the eigenvalues, eigenfunctions etc.
                      ## equidistant grid points with different lengths
                      ## NOTE: the length of the grid can be important for computation time
                      ## (approx. quadratic influence)

# mean estimation:
##################
bf_mean<-8            # dimension of basis used for global mean estimation via gam
bf_covariates<-8      # dimension of basis used for covariates in mean estimation via gam
m_mean<-c(2,3)        # order of the penalty for this term of gam of mean estimation
covariate<-TRUE       # if any covariate is in the formula (in mean)
num_covariates<-4     # number of covariates
covariate_form<-rep("by",num_covariates)  # form in which covariate enters the model. by: varying-coefficient,
                      ## others linear and semi (for semi-parametric) 
interaction<-TRUE     # if interactions of covariates should be modeled, which interactions, see which_interaction
which_interaction<-matrix(c(FALSE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,
                                               FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE),
                          byrow=TRUE,nrow=num_covariates,ncol=num_covariates)
                      # matrix that specifies which interactions should be considered. 
                      ## Entry [k,l] specifies if the interaction between covariate.k and covariate.l
                      ## should be used (example below). Entries are reduntant, [l,k] should be set to the same as [k,l]
save_gam_mean<-FALSE  # save gam_object or not (attention: can be large!)
plot_gam_mean<-FALSE  # if gam should be plotted as implemented in mgcv

# covariance estimation:
########################
bf_covs<-c(5,5,5)     # dimension of basis used for covariance estimation via gam, for each different
m_covs<-list(c(2,3),c(2,3))     # margnal penalties for gam of covariance estimation
use_bam<-TRUE         # TRUE if bam() shall be used instead of gam() (syntax is the same)
mp<-FALSE             # if FALSE, onlyone smoothing parameter in auto-covariance estimation
para_estim<-TRUE      # parallelization of the auto-covariance estimation (using bam)
para_estim_nc<-5      # number of cores for parallelization of estimation (using bam)

# eigen decomposition:
######################
var_level<-0.95       # pre-specified level of variance 
N_B<-NA               # number of components for B to keep, overrides var_level, if not NA
N_C<-NA               # number of components for C to keep, overrides var_level, if not NA 
N_U<-NA               # number of components for E (here U) to keep, overrides var_level, if not NA

# FPC-FAMM:
###########
use_pffr<-TRUE        # if pffr-function is used
use_bam_pffr<-TRUE    # if bam is used in pffr
plot_pffr<-TRUE       # if desired to plot pffr-objects
bs_int_pffr<-list(bs="ps", k=8, m=c(2, 3))    # specification of the smooth of the global mean
bs_y_pffr<-list(bs="ps", k=8, m=c(2, 3))      # specification of the smooth


# data table:
#############
# curve_info          # a data.table with the number of rows corresponding to 
##                    the total number of observation points and with the following columns
##  n_long:           to which curve does the observation belong (must start with 1)
##  subject_long:     to which level of the first grouping variable does the observation belong
##  word_long:        to which level of the second grouping variable does the observation belong
###                   (only in case of crossed random intercepts)
##  combi_long:       to which repetition of the combination of first and second grouping variable does the observation belong
###                   (only in case of crossed random intercepts)
##  y_vec:            entries of Y as a long vector
##  t:                observation points as a long vector
##  number_long:      how many observations are available for the curve to which this observation belongs
##  covariate.k:      covariates named as covariate.1, covariate.2 etc.

## Note: curve_info can be generated by using gendata.R and extracting the columns of interest, e.g.
### curve_info<-subset(gen$curve_info_true,select=c("n_long","subject_long","word_long","number_long","combi_long",
#### "t","covariate.1","covariate.2","y_vec")) or without "word_long" and combi_long for random intercept design

## Note: It is important that subject_long, word_long, and n_long are numbered from 1 to the number of respective levels.



# which design
##############
use_RI<-FALSE # random intercept setting: TRUE, crossed random intercepts: FALSE

########################
# Output variables 
########################
# results is a list with elements:
## nums_subject:        number of points per subject/level of first grouping variable
## nums_word:           number of points per word/level of second grouping variable(for crossed design)
## my_grid:             fine, regular grid used for evaluation of the auto-covariances
## mean_hat:            mean estimation (list)
## time_mean:           computation time for mean estimation
## cov_hat:             a list containing the error variance
## time_cov:            computation time for the estimation of the auto-covariances and the error variance
## fpc_hat:             a list containing the eigenfunctions (phi_..) and eigenvalues (lam_...), as well as the variance explained
###                     and the number of estimated FPCs per groupong variable, the total variance and predictions of the
###                     FPC weights (xi_..).
## time_fpc:            computation time for obtaining the FPCs and eigenvalues and for predicting the FPC weights
## fpc_famm_hat:       (if use_pffr=TRUE) a list containing 
### intercept
### predicted FPC weights (scores)
### error variance (sig2)
### residuals
### pffr_predict1/2/3 containing the predictions of B, C, U (E), respectively
### pffr_cb_mean containing the estimate (value) and standard errors (se) of the global mean
### pffr_cb_cov.1/2.. containing the estimate (value) and standard errors (se) of covariate.1/2..
### pffr_cb_inter_1_2... containing the estimate (value) and standard errors (se) of interaction 
#### between covariate 1 and 2 (for others named analogously)
### pffr_estim: model object (if wanted)
## time_fpc_famm:       computation time for FPC-FAMM estimation/prediction


##################
# automatically
# obtain number of 
# levels:
##################
# once the above input variables are specified, the number of levels per grouping variable can be obtained 

I<-length(unique(curve_info$subject_long)) # number of levels of first grouping variable
if(!use_RI){
  J<-length(unique(curve_info$word_long)) # number of levels of second grouping variable
}else{
  J<-NA # in case of random intercept design, this is set to NA
}


###########
#Estimation 
###########
results<-call_functions(use_RI=use_RI,method=method,bs=bs,d_grid=d_grid,bf_mean=bf_mean,bf_covariates=bf_covariates,
                        m_mean=m_mean,covariate=covariate,num_covariates=num_covariates,covariate_form=covariate_form,
                        interaction=interaction,which_interaction=which_interaction,
                        save_gam_mean=save_gam_mean,bf_covs=bf_covs,m_covs=m_covs,use_bam=use_bam,
                        mp=mp,para_estim=para_estim,para_estim_nc=para_estim_nc,var_level=var_level,N_B=N_B,N_C=N_C,N_U=N_U,
                        use_pffr=use_pffr,use_bam_pffr=use_bam_pffr,plot_pffr=plot_pffr,
                        bs_int_pffr=bs_int_pffr,bs_y_pffr=bs_y_pffr,curve_info=curve_info,I=I,J=J)


