
grp_lasso <- function(data, wm){
  data$confed <- NULL
  data$confed.oppo <- NULL

  all.wm <- c("2002","2006","2010","2014")
  
  team.list2 <- split(data$Team,rep(1:4,each=128))
  teams.wm <- unique(team.list2[[which(all.wm==wm)]])
  

  wm.index <- rep(all.wm, each = 128)
  data.predict <- data[wm.index==wm,]
  data <- data[wm.index!=wm,]
  
  team.list <- split(data$Team,rep(1:3,each=128))
  team.list <- table(unlist(lapply(team.list, unique)))
  newcomer <- names(team.list)[which(team.list==1)]
  
  newcomer.wm <- teams.wm[!(teams.wm %in% names(team.list))]
  newcomer.wm <- unique(c(newcomer, newcomer.wm))
  
  
  data$Team[data$Team %in% newcomer] <- "Newcomer"
  data$Opponent[data$Opponent %in% newcomer] <- "Newcomer"
  
  to.numeric <- c(7,16,17,22,31,32)
  for(i in to.numeric){
    data[,i] <- as.numeric(data[,i])-1
    data.predict[,i] <- as.numeric(data.predict[,i])-1
  }
  
  data$Team<-as.factor(data$Team)
  data$Opponent<-as.factor(data$Opponent)
  
  Diff <- data[,c(5:19)] - data[,c(20:34)]
  
  
  Diff$continent <- as.factor(Diff$continent)
  Diff$Nation.Coach <- as.factor(Diff$Nation.Coach)
  Diff$host <- as.factor(Diff$host)
  
  dummys <- model.matrix(~continent+Nation.Coach+host,data=Diff)[,-1]
  
  Diff <- cbind(Diff[,-c(3,12,13)],dummys)
  
  n.teams<-length(table(data$Team))
  
  W_start<-matrix(0,nrow=nrow(data),ncol=2*n.teams)
  colnames(W_start)<-paste(rep(levels(data$Team),each=2),c("att","def"),sep=".")
  
  for(i in 1:nrow(data))
  {
    W_start[i,paste(data$Team[i],"att",sep=".")]<-1
    W_start[i,paste(data$Opponent[i],"def",sep=".")]<--1
  }
  
  X_zw <- cbind(Diff,W_start)
  
  nu<-apply(X_zw,2,mean)
  sigma<-apply(X_zw,2,sd)
  
  Design <- cbind(1,scale(X_zw))
  
  
  
  Y <- data$Goals
  
  
  
  index <- c(NA,1:12,13,13,14,14,15,15,rep(16:(16+n.teams-1),each=2))
  
  
  
  lambda <- lambdamax(x=Design,y=Y,model=PoissReg(),standardize=FALSE,center=FALSE,index=index)
  lambda <- exp(seq(log(lambda),log(1e-2),length=100))
  
  lasso <- grplasso(x=Design,y=Y,model=PoissReg(),standardize=FALSE,center=FALSE,index=index,lambda=lambda,
                    control = grpl.control(trace=0))
  coefs <- lasso$coef
  
  N<-nrow(Design)/2
  
  k <- 10
  
  n.cv <- rep(floor(N/k),k)
  rest <- N%%k
  if(rest>0){
    n.cv[1:rest] <- n.cv[1:rest] +1}
  
  which.k <- rep(1:k,n.cv)
  
  set.seed(13+7+2014)
  id.k <- sample(which.k,N,replace=FALSE)
  id.k <- rep(id.k,each=2)
  
  deviances <- rep(0,length(lambda))
  
  for(i in 1:k){
    
    Design.train<-Design[-which(id.k==i),]
    Design.test<-Design[which(id.k==i),]
    
    y.train <- Y[-which(id.k==i)]
    y.test <- Y[which(id.k==i)]
    
    lasso.train <- grplasso(x=Design.train,y=y.train,model=PoissReg(),standardize=FALSE,
                            center=FALSE,index=index,lambda=lambda,control=grpl.control(trace=0))
    
    
    mu.hat <- predict(lasso.train, newdata=Design.test,type="response")
    
    family <- poisson(link = log)
    dev.i <- c()
    for(j in 1:ncol(mu.hat)){
      dev.i[j] <- sum(family$dev.resids(y.test,mu.hat[,j],wt=rep(1,nrow(mu.hat))))
    }
    
    deviances <- deviances + dev.i
    
    # print(paste0("CV-iteration: ",i))
  }
  
  
  coef.opt <- coefs[,which.min(deviances)]
  
  
  ## prediction
  data.predict$Team[data.predict$Team %in% newcomer.wm] <- "Newcomer"
  data.predict$Opponent[data.predict$Opponent %in% newcomer.wm] <- "Newcomer"
  
  data.predict$Team<-as.factor(data.predict$Team)
  data.predict$Opponent<-as.factor(data.predict$Opponent)
  
  Diff <- data.predict[,c(5:19)] - data.predict[,c(20:34)]
  
  
  Diff$continent <- as.factor(Diff$continent)
  Diff$Nation.Coach <- as.factor(Diff$Nation.Coach)
  Diff$host <- as.factor(Diff$host)
  
  dummys <- model.matrix(~continent+Nation.Coach+host,data=Diff)[,-1]
  
  Diff <- cbind(Diff[,-c(3,12,13)],dummys)
  
  n.teams<-length(table(data.predict$Team))
  
  W_start<-matrix(0,nrow=nrow(data.predict),ncol=2*n.teams)
  colnames(W_start)<-paste(rep(levels(data.predict$Team),each=2),c("att","def"),sep=".")
  
  for(i in 1:nrow(data.predict))
  {
    W_start[i,paste(data.predict$Team[i],"att",sep=".")]<-1
    W_start[i,paste(data.predict$Opponent[i],"def",sep=".")]<--1
  }
  
  
  X_zw <- cbind(Diff,W_start)
  sigma2 <- sigma[which(names(coef.opt) %in% names(X_zw))-1]
  nu2 <- nu[which(names(coef.opt) %in% names(X_zw))-1]
  X_zw <- t((t(X_zw)-nu2)/sigma2)
  

coef.predict <- coef.opt[c(1,which(names(coef.opt) %in% colnames(X_zw)))]
  
  Design <- cbind(1,X_zw)
  
  exp(Design%*%coef.predict)

}