options(error = dump.frames)
require(glmnet)
require(Hmisc)
source('code/npm.r')

train = read.csv('data/vowel.train')
test = read.csv('data/vowel.test')
X = as.matrix(train[, 3:12])
y = train$y
Y = model.matrix(~ as.factor(y) - 1)

getloss = function(pred, y) {
  Y = model.matrix(~ as.factor(y) - 1)
  -mean(log(rowSums(Y*pred)))
}

getse = function(pred, y) {
  Y = model.matrix(~ as.factor(y) - 1)
  sd(log(rowSums(Y*pred)))/sqrt(nrow(Y))
}

pred.null = matrix(1 / 11, nrow = nrow(test), ncol = 11)
loss.null = getloss(pred.null, y = test$y)

fit.npmr = npmr(X, Y, exp(seq(6, -1, length = 40)), eps = 1e-6)
yhat.npmr = predict(fit.npmr, X)
pred.npmr = predict(fit.npmr, as.matrix(test[, 3:12]))
df.npmr = 100 *
  (loss.null - apply(yhat.npmr, 3, getloss, y = train$y)) / loss.null
loss.npmr = 100 *
  (loss.null - apply(pred.npmr, 3, getloss, y = test$y)) / loss.null
se.npmr = apply(pred.npmr, 3, getse, y = test$y) / loss.null * 100

fit.ridge = glmnet(X, y, family = 'multinomial', alpha = 0,
  lambda.min.ratio = 1e-7)
yhat.ridge = predict(fit.ridge, X, type = 'response')
pred.ridge = predict(fit.ridge, as.matrix(test[, 3:12]), type = 'response')
df.ridge = 100 *
  (loss.null - apply(yhat.ridge, 3, getloss, y = train$y)) / loss.null
loss.ridge = 100 *
  (loss.null - apply(pred.ridge, 3, getloss, y = test$y)) / loss.null
se.ridge = apply(pred.ridge, 3, getse, y = test$y) / loss.null * 100

pdf('figs/vowel.pdf', height = 5)
plot(df.npmr, loss.npmr, type = 'l', col = 'dodgerblue', lwd = 2,
  xlim = c(0, 70), xlab = 'Training % deviance explained',
  ylim = c(0, 50), ylab = 'Test % deviance explained')
errbar(df.npmr, loss.npmr, loss.npmr + se.npmr, loss.npmr - se.npmr,
  add = TRUE, type = 'n', col = 'dodgerblue', errbar.col = 'dodgerblue')
lines(df.ridge, loss.ridge, lty = 4, lwd = 2, col = 'darkorange')
errbar(df.ridge, loss.ridge, loss.ridge + se.ridge, loss.ridge - se.ridge,
  add = TRUE, type = 'n', col = 'darkorange', errbar.col = 'darkorange',
  lty = 4)
abline(h = 0, lty = 2, lwd = 2, col = 'forestgreen')
legend('topleft', c('NPMR', 'Ridge', 'Null'),
  lty = c(1, 4, 2), lwd = 2,
  col = c('dodgerblue', 'darkorange', 'forestgreen'))
dev.off()

SVD = svd(fit.npmr$B[, , which.min(loss.npmr)])
V = SVD$v
D = SVD$d
vowel.table = rbind(V, D, rep(NA, 10))
rownames(vowel.table) = c('i (heed)', 'I (hid)', 'E (head)', 'A (had)',
  'a: (hard)', 'Y (hud)', 'O (hod)', 'C: (hoard)', 'U (hood)', 'u: (who\'d)',
  '3: (heard)', 'Corresponding', 'diagonal')
print(xtable::xtable(vowel.table, digits = 2), file = 'tabs/B-vowel.tex',
  include.colnames = FALSE, only.contents = TRUE, hline.after = 11)
pdf('figs/B-vowel.pdf', width = 10)
barplot(D, axes = FALSE, col = 'lightgreen', border = NA)
dev.off()


