###Regensburg data
###Monthly number of food inspections in food stores, restaurants and food companies in the city of Regensburg, Germany, that lead to the reporting of a (criminal or administrative) offence. The time series covers the period January 2001 to December 2010 (length: 120) as in Wei et al. (2016). The data are available from the statistics information system (SiS) of the city of Regensburg in Germany (http://www.statistik.regensburg.de/menue/informationen_u_zahlen.php).
###Reference: Wei, C.H., Homburg, A., Puig, P. (2016) Testing for zero inflation and overdispersion in INAR(1) models. Statistical Papers, forthcoming. 

##read data
X <- c(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 2, 1, 0, 4, 0, 0, 0, 1, 2, 0, 0, 1, 2, 1, 2, 2, 1, 1, 0, 1, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0)

#length of time series
n <- length(X)

##Descriptive statistics
plot(seq(2001,2011-1/12,1/12),X,type="b",pch=19,cex=0.5,xlab="",ylab="Offences")
(H <- tabulate(X+1)) #absolute frequencies
(mu <- mean(X))
#0.3833333
(sigma2 <- var(X))
#0.5240896
(p0 <- sum(X==0)/n) #zero frequency
#0.7333333
(U <- max(X)) #maximum of observations

#check for autocorrelation 
acf(X)
# -> not significant, assume i.i.d. data

##Estimation
#Table for Results
Results <- matrix(0,0,8)
colnames(Results) <- c("lambda","omega; tau",paste0("np_",0:U),"BIC")
#Information about data
Results <- rbind(Results,c(NA,NA,H,NA))
rownames(Results) <- "data"

###Poisson distribution
lambda_est_Poi <- mean(X) #ML estimator is the mean of the data
se_est_Poi <- (lambda_est_Poi/n)^0.5 #standard error of ML estimator is approximated by (mean/n)^0.5

neglmax_Poi <- -n*lambda_est_Poi - sum(log(factorial(X))) + log(lambda_est_Poi)*sum(X) #value of maximized likelihood
BIC_Poi <- -2*neglmax_Poi + log(n)*1 #BIC

Results <- rbind(Results, c(lambda_est_Poi,NA,n*dpois(0:U,lambda_est_Poi),BIC_Poi))
Results <- rbind(Results, c(se_est_Poi,rep(NA,7)))
rownames(Results)[2:3] <- c(c("Poi","s.e."))

###Zero-Inflated Poisson (ZIP) distribution
#likelihood function
llh_ZIP <- function(param){
  mu <- param[1]
  om <- param[2]
  
  P <- c(om+(1-om)*exp(-mu),(1-om)*dpois(1:U,mu))
  
  llh <- sum(H*log(P))
  
  return(-llh)
}

#starting values for optimization 
omega_start_ZIP <- max(min(((var(X)-mean(X))/(mean(X)^2))/(1+(var(X)-mean(X))/(mean(X)^2)),0.99),0.01)
lambda_start_ZIP <- mean(X)/(1-omega_start_ZIP)

#ML estimation
estim_ZIP <- optim(c(lambda_start_ZIP,omega_start_ZIP),llh_ZIP,method="L-BFGS-B",lower=c(0.0001,0.0001),upper=c(2*max(X),p0),hessian=TRUE)

lambda_ZIP <- estim_ZIP$par[1]
omega_ZIP <- estim_ZIP$par[2]

ofiest_ZIP <- estim_ZIP$hessian #inverse covariance
neglmax_ZIP <- estim_ZIP$value #negative maximized log-likelihood
estcov_ZIP <- solve(ofiest_ZIP)

stadflr_ZIP <- rep(0,2)
stadflr_ZIP[1] <- sqrt(estcov_ZIP[[1,1]])
stadflr_ZIP[2] <- sqrt(estcov_ZIP[[2,2]])

BIC_ZIP <- 2*neglmax_ZIP+log(n)*2

Results <- rbind(Results, c(c(lambda_ZIP,omega_ZIP),n*(omega_ZIP*c(1,rep(0,U))+(1-omega_ZIP)*dpois(0:U,lambda_ZIP)),BIC_ZIP))
Results <- rbind(Results, c(stadflr_ZIP,rep(NA,6)))

rownames(Results)[(nrow(Results)-1):(nrow(Results))] <- c("ZIP","s.e.")

###zero-inflated poisson caused by underreporting (UIP) distribution
#likelihood function
llh_UIP <- function(param){
  mu <- param[1]
  tau <- param[2]
  
  P <- c(exp(mu*(exp(-tau)-1)),exp(-mu)*((1-exp(-tau*(1:U)))*mu^(1:U))/factorial(1:U))
  
  llh <- sum(H*log(P))
  
  return(-llh)
}

#starting values for optimization 
lambda_start_UIP <- (lambda_ZIP+lambda_est_Poi)/2
tau_start_UIP <- max((log((1-omega_ZIP)+omega_ZIP*exp(lambda_start_UIP)))/lambda_start_UIP, 0.01)

#ML estimation
esti_UIP <- optim(c(lambda_start_UIP,tau_start_UIP),llh_UIP,method="L-BFGS-B",lower=c(0.001,0.0001),upper=c(99.99,99.99),hessian=TRUE)

lambda_UIP <- esti_UIP$par[1]
tau_UIP <- esti_UIP$par[2]
ofiest_UIP <- esti_UIP$hessian #inverse covariance
neglmax_UIP <- esti_UIP$value #negative maximized log-likelihood
estcov_UIP <- solve(ofiest_UIP)

stadflr_UIP <- rep(0,2)
stadflr_UIP[1] <- sqrt(estcov_UIP[[1,1]])
stadflr_UIP[2] <- sqrt(estcov_UIP[[2,2]])

BIC_UIP <- 2*neglmax_UIP+log(n)*2

Results <- rbind(Results, c(c(lambda_UIP,tau_UIP),n*c(exp(lambda_UIP*(exp(-tau_UIP)-1)),exp(-lambda_UIP)*(1-exp(-tau_UIP*1:U))*lambda_UIP^(1:U)/factorial(1:U)),BIC_UIP))
Results <- rbind(Results, c(stadflr_UIP,rep(NA,ncol(Results)-2)))

rownames(Results)[(nrow(Results)-1):(nrow(Results))] <- c("UIP","s.e.")

View(cbind(round(Results[,1:2],4),round(Results[,3:8],1)))
