## load data wc.data (different version for Group Lasso, called wc.data14)
load(file="Data/wc.data.02.14.rda")

## load all necessary packages
library(party)
library(glmnet)
library(grplasso)
library(mboost)
library(ranger)
library(ordinalForest)


## source methods, each method is saved in a separate file
source('Methods/gamboost_fun.R')
source('Methods/gamboost_nb.R')
source('Methods/RF_Result_party.R')
source('Methods/RF_Result_ordfor.R')
source('Methods/grp_lasso.R')


## source some additional help funcions we need
source('Methods/help_funs.R')


## number of results which are simulated per method
n.rep <- 100

## simple formula, needed for random forests and for lasso design matrix
form.rf <- as.formula(paste("Goals ~ ",paste(names(wc.data)[5:(ncol(wc.data))],collapse=" + ")))


## create alternative version of data frame, for gamboost models
wc.data2 <- model.matrix(form.rf, data = wc.data)[,-1]
which.f2 <- (1:ncol(wc.data2))[which(apply(wc.data2,2,function(x){length(unique(x))})==2)]
which.s2 <- (1:ncol(wc.data2))[which(apply(wc.data2,2,function(x){length(unique(x))})!=2)]
wc.data2 <- as.data.frame(wc.data2)

for(ii in which.f2){
  wc.data2[,ii] <- as.factor(wc.data2[,ii])
}
wc.data2 <- cbind(wc.data[,1:4],wc.data2)


## create alternative formula, for gamboost models
which.f <- (5:ncol(wc.data2))[sapply(wc.data2[,5:ncol(wc.data2)],is.factor)]
which.s <- (5:ncol(wc.data2))[!((5:ncol(wc.data2)) %in% which.f)]
bounds <- lapply(subset(wc.data2, select = which.s),function(x){range(x)*1.01})

form.gam <- as.formula(paste("Goals ~ bols(int,intercept = FALSE,df=1)+",
                                paste("bbs(",names(wc.data2)[which.s],",knots =20,df=1, 
                                              center = TRUE,boundary.knots =",
                                      bounds[names(wc.data2)[which.s]] ,")",collapse=" + "),"+",
                             paste("bols(",names(wc.data2)[which.s],",df=1,intercept = FALSE)",collapse=" + "),"+",
                             paste("bols(",names(wc.data2)[which.f],",df=1,intercept = FALSE)",collapse=" + ")))


## empty vectors for goals (results) and outcome probabilities
goals.rf <- goals.ranger <- goals.rfm <- goals.orf <- goals.lasso <- goals.lasso.strict <- 
  goals.grplasso <- goals.gam <- goals.gam2 <- c()

probs.rf <- probs.ranger <- probs.rfm <-  probs.orf <- probs.lasso <- probs.lasso.strict <- 
  probs.grplasso <- probs.gam <- probs.gam2 <- c()
  


## start iterating through world cups, each is used once as test data
for(wc in c("2002","2006","2010","2014")){
  cat("Test World Cup:",wc,"\n")
  
  ## create various versions of train and test data or design matrices
  d.train <- subset(wc.data, WM!=wc)
  d.test <- subset(wc.data, WM==wc)

  d.train2 <- subset(wc.data2, WM!=wc)
  d.test2 <- subset(wc.data2, WM==wc)
  
  des.train <- model.matrix(form.rf, data = d.train)[,-1]
  des.test <- model.matrix(form.rf, data = d.test)[,-1]
  
  
  ## RF Goals (party)
  print("RF Goals (party)")
  set.seed(1860)
  rf <- cforest(form.rf, data = d.train, controls = cforest_unbiased(
    mtry = 5, ntree = 5000))
  p.rf <- predict(rf, newdata = d.test)
  goals.rf <- c(goals.rf, pred_goals(p.rf, n.rep))
  probs.rf <- cbind(probs.rf, pred_probs(p.rf))

  
  ## RF Goals (ranger)
  print("RF Goals (ranger)")
  set.seed(1860)
  rangerf <- ranger(form.rf, data = d.train, num.trees = 5000, write.forest = TRUE)
  p.ranger <- predict(rangerf, data = d.test)$predictions
  goals.ranger <- c(goals.ranger, pred_goals(p.ranger, n.rep))
  probs.ranger <- cbind(probs.ranger,pred_probs(p.ranger))
  
  
  ## RF Result (party)
  print("RF Result (party)")
  set.seed(1860)
  p.rfm <- RF_Result_party(wc.data, wc, n.rep, permute = TRUE)
  goals.rfm <- c(goals.rfm, p.rfm$p.rf)
  probs.rfm <- cbind(probs.rfm, p.rfm$probs)
  
  
  ## RF Result (ordinalForest)
  print("RF Result (ordinalForest)")
  set.seed(1860)
  p.orf <- RF_Result_ordfor(wc.data, wc, n.rep, permute = TRUE)
  goals.orf <- c(goals.orf, p.orf$p.rf)
  probs.orf <- cbind(probs.orf, p.orf$probs)
  
  
  ## Lasso
  print("Lasso")
  set.seed(1860)
  lasso <- cv.glmnet(des.train, d.train$Goals, family="poisson")
  p.lasso <- exp(predict(lasso, des.test, s="lambda.min"))
  p.lasso.strict <- exp(predict(lasso, des.test))

  goals.lasso <- c(goals.lasso, pred_goals(p.lasso, n.rep))
  probs.lasso <- cbind(probs.lasso, pred_probs(p.lasso))
  
  print("Lasso (1se)")
  goals.lasso.strict <- c(goals.lasso.strict, pred_goals(p.lasso.strict, n.rep))
  probs.lasso.strict <- cbind(probs.lasso.strict, pred_probs(p.lasso.strict))
  
  
  ## Group Lasso
  print("Group Lasso")
  set.seed(1860)
  p.grplasso <- grp_lasso(wc.data14, wc)
  goals.grplasso <- c(goals.grplasso, pred_goals(p.grplasso, n.rep))
  probs.grplasso <- cbind(probs.grplasso, pred_probs(p.grplasso))
  

  ## Gamboost Poisson
  set.seed(1860)
  print("Gamboost Poisson")
  d.train2$int <- rep(1,nrow(d.train2))
  m.gam <- gamboost_fun(data = d.train2, form = form.gam)
  d.test2$int <- rep(1,nrow(d.test2))
  p.gam <- predict(m.gam, newdata = d.test2, type="response")

  goals.gam <- c(goals.gam, pred_goals(p.gam, n.rep))
  probs.gam <- cbind(probs.gam, pred_probs(p.gam))

  
  ## Gamboost Neg. Binomial
  print("Gamboost Neg. Binomial")
  set.seed(1860)
  m.gam2 <- gamboost_nb(data = d.train2, form = form.gam)
  p.gam2 <- predict(m.gam2, newdata = d.test2, type="response")

  goals.gam2 <- c(goals.gam2, pred_goals(p.gam2, n.rep))
  probs.gam2 <- cbind(probs.gam2, pred_probs(p.gam2))

}



## collect all predicted probabilities and goals
pred.goals <- cbind(goals.rf, goals.ranger, goals.rfm, goals.orf, goals.lasso, goals.lasso.strict, 
                    goals.grplasso, goals.gam, goals.gam2)

pred.probs <- list(probs.rf, probs.ranger, probs.rfm,  probs.orf, probs.lasso, probs.lasso.strict, 
                   probs.grplasso, probs.gam, probs.gam2)

colnames(pred.goals) <- names(pred.probs) <- 
  c("RF_Goals_party", "RF_Goals_ranger", "RF_Result_party", "RF_Result_ordinalForest", "Lasso", "Lasso_1se",
    "Grplasso", "Gamboost_Po", "Gamboost_NB")

## save results
save(pred.probs, pred.goals, file = "compare_methods.RData")



