#### Analysis
library(ggplot2)
library(zoo)
library(gridExtra)

df <- read.table("LaTeX/StatMod/ex3_3.txt", header=T)
x2  <- as.numeric(df$Cases)
n <- length(x2)

loglik4 <- function(b)
{
  loglik<-0
  for (i in 2:n)
  {
    c <- 0
    m <- min(x2[i-1], x2[i])
    for (k in 0:m)
    {
      c <- c + (b[2]^k*(1-b[2])^(x2[i-1]-k)*(b[1]+b[3]*IND[i])^(x2[i]-k))/
        (factorial(k)*factorial(x2[i]-k)*factorial(x2[i-1]-k))
    }
    loglik <- loglik - log(c) + b[1]+b[3]*IND[i]
  }
  return(loglik)
}

loglik6 <- function(b)
{
  loglik<-0
  for (i in 2:n)
  {
    c <- 0
    m <- min(x2[i-1], x2[i])
    for (k in 0:m)
    {
      c <- c + (b[2]^k*(1-b[2])^(x2[i-1]-k)*(b[1])^(x2[i]-k))/
        (factorial(k)*factorial(x2[i]-k)*factorial(x2[i-1]-k))
    }
    loglik <- loglik - log(c) + b[1]
  }
  return(loglik)
}

### Any Festival
IND <- df$Festival
MLE.inar.festival <- nlm(loglik4,p=c(1, 0.5, 1), hessian=TRUE,iterlim=1000)
sigma.festival <- solve(MLE.inar.festival$hessian)
low95.festival <- MLE.inar.festival$estimate - 1.96*sqrt(diag(sigma.festival))
upp95.festival <- MLE.inar.festival$estimate + 1.96*sqrt(diag(sigma.festival))

### AIC standard INAR(1)
4+2*nlm(loglik6,p=c(1, 0.5), hessian=TRUE,iterlim=1000)$minimum ## STANDARD INAR(1): 827.7453
6+2*MLE.inar.festival$minimum ## PROPOSED INAR(1): 820.544

### figure 4
Times <- as.yearmon(df$Time)
df2 <- data.frame(Observed=x2, Time=Times, Ind=IND)
pdates <- as.numeric(df2$Time[df2$Ind==1])
ggplot(df2, aes(Time)) + geom_line(aes(y = Observed)) + 
  scale_x_yearmon(breaks = seq(from = min(df2$Time), to = max(df2$Time), by = 0.3), format = "%Y-%m-%d") + xlab("") + ylab("Number of LGV cases") + 
  scale_y_continuous(limits=c(0, 12)) +geom_vline(xintercept=pdates,linetype="dotted",size=1.5)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

### mid-pseudo-residuals
probs <- function(x, n)
{
  m <- min(x, x2[n-1])
  suma <- 0
  for (k in 0:m)
  {
    suma <- suma + ((factorial(x2[n-1])/(factorial(k)*factorial(x2[n-1]-k)))*(MLE.inar.festival$estimate[2]^k)*(1-MLE.inar.festival$estimate[2])^(x2[n-1]-k))*
      (exp(-(MLE.inar.festival$estimate[1]+MLE.inar.festival$estimate[3]*IND[n]))*(MLE.inar.festival$estimate[1]+MLE.inar.festival$estimate[3]*IND[n])^(x-k))/
      factorial(x-k)
  }
  return(suma)
}

un.neg <- function(x, n)
{
  if (x==0) return(0)
  res <- 0
  for (i in 0:(x-1))
  {
    res <- res + probs(i, n)
  }
  return(res)
}

un.pos <- function(x, n)
{
  res <- 0
  for (i in 0:x)
  {
    res <- res + probs(i, n)
  }
  return(res)
}

### mid pseudo-residuals
mpr_neg <- vector()
mpr_neg[1] <- 0
for (i in 2:length(x2))
{
  mpr_neg[i] <- un.neg(x2[i], i)
}

mpr_pos <- vector()
mpr_pos[1] <- 0
for (i in 2:length(x2))
{
  mpr_pos[i] <- un.pos(x2[i], i)
}

mpr <- qnorm((mpr_neg+mpr_pos)/2)
mpr[1] <- 0
hist(mpr)
shapiro.test(mpr)

### figure 5
bacf   <- acf(mpr, lag.max = 20, plot = FALSE)
bacfdf <- with(bacf[2:20], data.frame(lag, acf))
conf.level <- 0.95
ciline <- qnorm((1 - conf.level)/2)/sqrt(length(x2))
q1 <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
  geom_hline(aes(yintercept = 0)) + geom_hline(aes(yintercept = ciline), linetype=2) + geom_hline(aes(yintercept = -ciline), linetype=2)+
  geom_segment(mapping = aes(xend = lag, yend = 0)) + ylab("") + xlab("Lag") + ggtitle("ACF") + theme(plot.title = element_text(hjust = 0.5))
bacf   <- pacf(mpr, lag.max = 20, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
conf.level <- 0.95
ciline <- qnorm((1 - conf.level)/2)/sqrt(length(x2))
q2 <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
  geom_hline(aes(yintercept = 0)) + geom_hline(aes(yintercept = ciline), linetype=2) + geom_hline(aes(yintercept = -ciline), linetype=2)+
  geom_segment(mapping = aes(xend = lag, yend = 0)) + ylab("") + xlab("Lag") + ggtitle("PACF") + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(q1, q2, ncol=2)

### Hudecová S., Husková M. and Meintanis S. (2015)
loglik3 <- function(b)
{
  loglik<-0
  for (i in 2:35) ### We use the first 35 observations to estimate the parameters (training data)
  {
    c <- 0
    m <- min(x2[i-1], x2[i])
    for (k in 0:m)
    {
      c <- c + (b[2]^k*(1-b[2])^(x2[i-1]-k)*(b[1])^(x2[i]-k))/
        (factorial(k)*factorial(x2[i]-k)*factorial(x2[i-1]-k))
    }
    loglik <- loglik - log(c) + b[1]
  }
  loglik
}
  
MLE.inar3 <- nlm(loglik3,p=c(1, 0.5), hessian=TRUE,iterlim=1000)
sigma3 <- solve(MLE.inar3$hessian)
MLE.inar3$estimate
MLE.inar3$estimate - 1.96*sqrt(diag(sigma3))
MLE.inar3$estimate + 1.96*sqrt(diag(sigma3))

S_mt <- function(t)
{
  m <- 35
  Q <- function(m, t, u)
  {
    sum <- 0
    for (i in (m+2):length(x2))
    {
      sum <- sum + u^x2[i]-((1+MLE.inar3$estimate[2]*(u-1))^x2[i-1])*(exp(-MLE.inar3$estimate[1] + MLE.inar3$estimate[1]*u))
    }
    resQ <- (1/sqrt(35))*sum
    return(resQ)
  }
  integrand <- function(u)
  {
    return((Q(m, t, u)-(t/35)*Q(0, 35, u))^2)
  }
  res <- integrate(integrand, 0, 1)$value
  return(res)
}
  
resultat <- vector()
for (i in 1:49)
{
  resultat[i] <- S_mt(i+35)
}

gamma <- 0
q_gamma <- function(s)
{
  return((1+s)*((s/(s+1))^gamma))
}  

def <- vector()
for (i in 1:49)
{
  def[i] <- resultat[i]/q_gamma((i+35)/35)
}
