source("SPSR.dump") 
#It contains the vowel data "Xaaao", "Yaaao"
#and mixture data "Ethanol", "Isopropanol", "Water", "iTemp30", "iTemp40", "iTemp50", "iTemp60", "iTemp70", "S.", "pure"
#and PSR functions (developed by Brian Marx) "poly.signal.fit","pspline.checker","pspline.fitter","signal.fit"
#These codes are also available on Brian Marx's webpage: http://www.stat.lsu.edu/faculty/marx/ 

#Loading support R packages
library(splines) #Generate B-splines basis
library(pls)     #Partial least square
library(gpls)    #Generalized partial least square
library(svcm)    #Clever search for the tuning parameters

########################### Mixture Example #######################

SPSR.regression<-function(q, ps., ord., lower, upper, ngrid, startvalue, x.index, x.train, y.train, x.test, y.test){
  xl<-min(x.index)
  xr<-max(x.index)
  xmax<-xr + 0.01 * (xr - xl)
  xmin<-xl - 0.01 * (xr - xl)
  dx<-(xmax - xmin)/ps.
  knots<-seq(xmin - q * dx, xmax + q * dx, by = dx)
  b<-spline.des(knots, x.index, q + 1, 0 * x.index)$design
  
  u.train<-x.train%*%b
  u.test<-x.test%*%b
  fit.pls<-plsr(y.train~u.train,3,validation="CV",x=T)
  coef.pls<-coef(fit.pls,ncomp=1)
  tmp1<-coef.pls[,,1]
  v<-abs(tmp1)/mean(abs(tmp1))
  wt<-v
  cleverwrapper<-function(vec){
    out<-signal.fit(y.train, x.index, x.train, ps.int=ps., lambda=vec,
                    coef.plot=F, order=ord., se=F, importance=v)$cv
    out
  }
  optimal.cv<-cleversearch(cleverwrapper, lower, upper, ngrid, startvalue, logscale=TRUE,     
                           clever=TRUE, verbose=FALSE)
  opt.fit<- signal.fit(y.train, x.index, x.train, ps.int=ps., lambda=optimal.cv$par, 
                       coef.plot=F, order=ord., se=F, 
                       importance=v, x.predicted=x.test, y.predicted=y.test)
  out<-list(test.err=opt.fit$cv.predicted, para=optimal.cv$par, fit=opt.fit, loocv=optimal.cv$value, wt=wt)
  out
}

APSR.regression<-function(q, ps., ord., lower, upper, ngrid, startvalue, p.order, x.index, x.train, y.train, x.test, y.test){
  cleverwrapper<-function(lam.){
    out<-poly.signal.fit(y.train,x.index=x.index,x.signal=x.train,poly.order=p.order,
    lamb=lam.,int=F,ps.int=ps.,order=ord.)$cv
    out
  }
  optimal.cv<-cleversearch(cleverwrapper,lower, upper, ngrid, startvalue, logscale=TRUE, 
                           clever=TRUE, verbose=FALSE)
  opt.fit<- poly.signal.fit(y.train, x.index, x.train, ps.int=ps., poly.order=p.order, lambda=optimal.cv$par, 
                             coef.plot=F, order=ord., x.predicted=x.test, y.predicted=y.test, int=F)
  out<-list(test.err=opt.fit$cv.predicted, para=optimal.cv$par, fit=opt.fit, loocv=optimal.cv$value)
  out
}


#Index matrix: from 1st to 5th row are the index for Temperature 30 to 70
#First 13 columns are training index, last 6 are test index
index<-matrix(0,5,19)
index[1,]<-c(2:5,8,9,11,13,14,17,19:21,6,7,10,12,15,16)
index[2,]<-index[1,]+22
index[3,]<-index[1,]+44
index[4,]<-index[1,]+66
index[5,]<-index[1,]+88


#Response (Molar concentration) matrix
#1st row (Ethanol), 2nd row (Water), 3rd row (2-propanol)
#First 13 columns are training, last 6 are test
resp<-matrix(0,3,19)
resp[1,]<-Ethanol[index[1,]]
resp[2,]<-Water[index[1,]]
resp[3,]<-Isopropanol[index[1,]]

#First order differencing
S..<-t(diff(t(S.))) #dim(S..) is 110 by 199


#For local model i, i from 1 to 5 corresponds to Temp. 30 to 70
#raw x matrix is S.[index[i,],] and response is resp[j,] (j 1~3)
#For global model, raw x matrix is S.[as.vector(t(index)),]
#response is rep(resp[j,],5)


#Global model on S..

Sol.index<-3  #1: Ethanol; 2: Water; 3: Isopropanol


n<-65 #training sample size
N<-95 #training + testing size


tmp1<-as.vector(t(index[1:5,1:13]))
x.train<-S..[as.vector(t(index[1:5,1:13])),]
y.train<-rep(resp[Sol.index,1:13],5)
x.test<-S..[as.vector(t(index[1:5,14:19])),]
y.test<-rep(resp[Sol.index,14:19],5)
x.index<-1:ncol(x.train)


#Fit SPSR model
q<-3      #degree
ps.<-150  #number of knots
ord.<-2   #order of penalty D matrix
lower<--11
upper<-2
ngrid<-100
startvalue<-lower
fit.spsr<-SPSR.regression(q, ps., ord., lower, upper, ngrid, startvalue, x.index, x.train, y.train, x.test, y.test)
fit.spsr$test.err  #Prediction error in RMSEP
fit.spsr$para      #lambda
fit.spsr$loocv     #Leave-one-out cross validation error


#Fit APSR model
q<-3      #degree
ps.<-150  #number of knots
ord.<-3   #order of penalty D matrix
lower<-c(-8,-8)
upper<-c(3,3)
ngrid<-100
startvalue<-upper
p.order<-2
fit.apsr<-APSR.regression(q, ps., ord., lower, upper, ngrid, startvalue, p.order, x.index, x.train, y.train, x.test, y.test)
fit.apsr$test.err  #Prediction error in RMSEP
fit.apsr$para      #lambda
fit.apsr$loocv     #Leave-one-out cross validation error


######################## vowel example ##############################


x<-Xaaao
y<-Yaaao
train.index<- c(40,82,7,27,115,103,151,59,85,49,143,61,136,72,28,80,22,11,
                118,155,135,75,137,10,24,33,57,81,158,130,146,127,1,125,4,
                91,78,145,142,98,15,36,108,83,35,8,9,134,62,154,160,159,56,
                156,60,77,141,132,88,30,54,123,25,17,148,18,105,133,121,109,
                94,110,51,128,117,71,5,32,87,114,37,84,97,14,106,95,3,102,
                131,153,101,50,39,58,45,113,63,68,44,129,73,89,34,152,147,
                107,69,74,100,119,92,93,76,86,64,138,21,157,70,31,140,48,79,
                2,47,111,126,29)
test.index<-c(6,12,13,16,19,20,23,26,38,41,42,43,46,52,53,55,65,66,67,
              90,96,99,104,112,116,120,122,124,139,144,149,150)


x.train<-x[train.index,]
x.test<-x[test.index,]
y.train<-y[train.index]
y.test<-y[test.index]
x.index<-1:ncol(x)#x index


ps.<-13           #number of knots
q<-3              #degree of B-splines
ord.<-3           #order of difference penalty
lower<-c(-5)      #lower bound for searching parameter in SPSR
upper<-c(2)       #upper bound for searching parameter in SPSR
lower2<-c(-3,-3)  #lower bound for searching parameter in APSR
upper2<-c(5,5)    #upper bound for searching parameter in APSR
ngrid<-50         #number of values for cleversearch grid
p.order<-2        #APSR order


#SPSR
xl<-min(x.index)
xr<-max(x.index)
xmax<-xr + 0.01 * (xr - xl)
xmin<-xl - 0.01 * (xr - xl)
dx<-(xmax - xmin)/ps.
knots<-seq(xmin - q * dx, xmax + q * dx, by = dx)
b<-spline.des(knots, x.index, q + 1, 0 * x.index)$design
u.train<-x.train%*%b
u.test<-x.test%*%b

fit.gpls<-gpls(u.train, y.train, K=1) 
v<-abs(fit.gpls$coef[-1])/mean(abs(fit.gpls$coef[-1])) #eliminate the first term (intercept)

cleverwrapper<-function(vec){
   out<-signal.fit(y.train, x.index, x.train, ps.int=ps., lambda=vec, degree=q, coef.plot=F, 
                   order=ord., se=F, importance=v, link="logit", family="binomial")$aic
   out
}

optimal.search<-cleversearch(cleverwrapper, lower, upper, ngrid, logscale=TRUE)
 
opt.fit<- signal.fit(y.train, x.index, x.train, ps.int=ps., lambda=optimal.search$par, 
                      coef.plot=F, order=ord., se=F, importance=v, x.predicted=x.test, 
                      y.predicted=y.test, link="logit", family="binomial")
tmp<-table((sign(opt.fit$summary.predicted[,2]-0.5)+1)/2,y.test)
predict.spsr<-(tmp[1,1]+tmp[2,2])/length(y.test)  #0.9375

1-predict.spsr        #Misclassification error rate
optimal.search$par    #Lambda
optimal.search$value  #AIC


#APSR
cleverwrapper<-function(vec){
  out<-poly.signal.fit(y.train,x.index,x.train,poly.order=p.order,link="logit",coef.plot=F, 
                       family="binomial",lamb=vec,int=F,ps.int=ps.,order=q)$aic
  out
}

optimal.search<-cleversearch(cleverwrapper,lower2, upper2, ngrid, logscale=TRUE, verbose=F,startvalue=upper)
opt.fit3<- poly.signal.fit(y.train, x.index, x.train,poly.order=p.order,link="logit",coef.plot=F,
                           family="binomial",lamb=optimal.search$par,int=F,ps.int=ps.,order=q, 
                           x.predicted=x.test, y.predicted=y.test)
tmp<-table((sign(opt.fit3$summary.predicted-0.5)+1)/2, y.test)
predict.apsr<-(tmp[1,1]+tmp[2,2])/length(y.test) 

1-predict.spsr        #Misclassification error rate
optimal.search$par    #Lambda
optimal.search$value  #AIC

