#########FUNCTIONS TO ESTIMATE THE LM_gMTD MODELS SECTION 3.2


#Function to estimate a  mixture logistic model give weiht w


flogistic_mix=function(x,k,w,par)
{
  
  l_id=matrix(0,1,ncol(x))
  
  for (i in 1:ncol(x))
  {
    l=rep(0,nrow(x))
    p=matrix(0,nrow(x),k)
    
    for(t in (k+1):nrow(x))
    {
      for (s in 1:k)
      {
        p[t,s]=exp(par[s]+par[s+k]*x[t-s,i])/(1+exp(par[s]+par[s+k]*x[t-s,i]))
        l[t]=l[t] + w[t,s]*(x[t,i]*log(p[t,s])+(1-x[t,i])*log(1-p[t,s]))
      }
    }
    
    l=l[(k+1):nrow(x)]
    l_id[i]=sum(l)
  }
  lik=sum(l_id)
  return(-lik)  
}


est_logistic_mix=function(ini,x,k,w)
{
  optim(par=ini, fn=flogistic_mix,x=x,k=k,w=w)
}


#####LM-gMTD MODEL 

fithmmtd=function(x,k,tol=5e-4) 
{

# k: number of latent states
# x: binary series

T=nrow(x)

# output:
# rho: vector of initial probabilities

rho = c(1-k*1e-3,rep(1e-3,k-1))

# phi: latent transition matrix

phi = matrix(0.15/(k-1),k,k)
diag(phi)=0.85

## E-step 

dk=matrix(NA,T-k,k)
p=matrix(NA,T-k,k)

#Initial values betas

coef=rep(-0.50,2*k)

for(i in 1:k) 
{
  dk[,i]=1
    for(j in 1:ncol(x)) 
    {
      p[,i]=exp(coef[i]+coef[i+k]*x[((k+1):T)-i,j])/(1+exp(coef[i]+coef[i+k]*x[((k+1):T)-i,j]))
      dk[,i]=dk[,i]*(p[,i]^x[((k+1):T),j])*((1-p[,i])^(1-x[((k+1):T),j]))
    }
}

alpha=matrix(0,T-k,k)
beta=matrix(0,T-k,k)
alphascaled=alpha
s=rep(0,T-k)
betascaled=beta
pdpost=array(0,dim=c(T-k-1,k,k))

alpha[1,]=rho*dk[1,]
alphascaled[1,]=alpha[1,]/sum(alpha[1,])
s[1]=sum(alpha[1,])

for(t in 1:(T-k-1)) 
{

  for(h in 1:k) 
  {
    alpha[t+1,h]=sum(alphascaled[t,]*phi[,h]*dk[t+1,h])
  }

  s[t+1]=sum(alpha[t+1,])

  alphascaled[t+1,]=alpha[t+1,]/s[t+1]

}

lk=sum(log(s))

beta[T-k,]=1

for(t in (T-k-1):1) 
{
  for(h in 1:k) 
  {
    beta[t,h]=sum(beta[t+1,]*phi[h,]*dk[t+1,])/s[t+1]
  }
}

lko=-Inf
it=0

###Number of parameters
np=(k-1)+k*(k-1)+2*k

while(abs(lk-lko)>tol) 
{
it=it+1
if(it>1) 
{
print(c(it,lk,lk-lko,-2*lk+log((T-k)*ncol(x))*np))
}
lko=lk

# M-step & posterior probs

ppost=alphascaled*beta

for(h in 1:k) 
{
  for(m in 1:k) 
  {
    pdpost[,h,m]=alphascaled[1:(T-k-1),h]*phi[h,m]*beta[2:(T-k),m]*dk[2:(T-k),m]/s[2:(T-k)]
  }
}

for(h in 1:k) 
{
  for(m in 1:k) 
  {
    phi[h,m]=sum(pdpost[,h,m])/sum(pdpost[,h,])
  }
}

rho=ppost[1,]


mod=est_logistic_mix(coef,x,k,rbind(matrix(0,k,k),ppost))
coef=mod$par

# E-step 
p=matrix(NA,T-k,k)

for(i in 1:k) 
{
  dk[,i]=1
  for(j in 1:ncol(x)) 
  {
    p[,i]=exp(coef[i]+coef[i+k]*x[((k+1):T)-i,j])/(1+exp(coef[i]+coef[i+k]*x[((k+1):T)-i,j]))
    dk[,i]=dk[,i]*(p[,i]^x[((k+1):T),j])*((1-p[,i])^(1-x[((k+1):T),j]))  }
}

alpha[1,]=rho*dk[1,]
alphascaled[1,]=alpha[1,]/sum(alpha[1,])
s[1]=sum(alpha[1,])

for(t in 1:(T-k-1)) 
{

  for(h in 1:k) 
  {
    alpha[t+1,h]=sum(alphascaled[t,]*phi[,h]*dk[t+1,h])
  }

s[t+1]=sum(alpha[t+1,])

alphascaled[t+1,]=alpha[t+1,]/s[t+1]

}

lk=sum(log(s))

beta[T-k,]=1

for(t in (T-k-1):1) 
{
  for(h in 1:k) 
  {
    beta[t,h]=sum(beta[t+1,]*phi[h,]*dk[t+1,])/s[t+1]
  }
}

lk=sum(log(s))

beta[T-k,]=1

for(t in (T-k-1):1) 
{
  for(h in 1:k) 
  {
    beta[t,h]=sum(beta[t+1,]*phi[h,]*dk[t+1,])/s[t+1]
  }
}

}

print(paste(k,": BIC=",-2*lk+log((T-k)*ncol(x))*np))

return(list(lk=lk,rho=rho,phi=phi,coef=coef,bic=-2*lk+log((T-k)*ncol(x))*np))
}



#Import dataset Data_models_section3.2.txt

bdd=read.table("Data_models_section3.2.txt", header = TRUE, row.names = NULL,sep="\t")

bdd[1:50,]


x=bdd[,3:264]

x[1:50,]

##Run LM_gMTD models

##Remove the first 6 observations of each head-tail sequences to have the same number of components in the log-likelihood of each model,and obtain suitable BIC comparisons between Markov chains up to order k=6

#Computationally-intensive execution: reduce tolerance (e.g: tol=5e-3) to decrease time execution (similar results)  

b2=fithmmtd(x[-(1:4),],2,tol=5e-4)

b3=fithmmtd(x[-(1:3),],3,tol=5e-4)

b4=fithmmtd(x[-(1:2),],4,tol=5e-4)

b5=fithmmtd(x[-(1:1),],5,tol=5e-4)

b6=fithmmtd(x,6,tol=5e-4)


