require(glmnet)
source('code/baseball.r')
source('code/npm.r')

load('results/baseball1.rda')

B.bat = fit.npmr$fit$B[group == 2, , 1]
SVD.bat = svd(B.bat)
V.bat = SVD.bat$v
rownames(V.bat) = sort(events)
D.bat = SVD.bat$d
B.bat.table = rbind(V.bat, D.bat, rep(NA, 9))
rownames(B.bat.table) = c(sort(events), 'Corresponding', 'diagonal')
print(xtable::xtable(B.bat.table, digits = 2), file = 'tabs/B-bat.tex',
    include.colnames = FALSE, only.contents = TRUE, hline.after = 9)
pdf('figs/B-bat.pdf', width = 10)
barplot(D.bat, axes = FALSE, col = 'lightgreen', border = NA, ylim = c(0, 4))
dev.off()

B.pit = fit.npmr$fit$B[group == 3, , 1]
SVD.pit = svd(B.pit)
V.pit = SVD.pit$v
rownames(V.pit) = sort(events)
D.pit = SVD.pit$d
B.pit.table = rbind(V.pit, D.pit, rep(NA, 9))
rownames(B.pit.table) = c(sort(events), 'Corresponding', 'diagonal')
print(xtable::xtable(B.pit.table, digits = 2), file = 'tabs/B-pit.tex',
    include.colnames = FALSE, only.contents = TRUE, hline.after = 9)
pdf('figs/B-pit.pdf', width = 10)
barplot(D.pit, axes = FALSE, col = 'lightgreen', border = NA, ylim = c(0, 4))
dev.off()


U.bat = SVD.bat$u
names.bat = sort(unique(batterID[subset]))
head(names.bat[order(-U.bat[, 1])])
tail(names.bat[order(-U.bat[, 1])])
head(names.bat[order(U.bat[, 2])])
tail(names.bat[order(U.bat[, 2])])
head(names.bat[order(U.bat[, 3])])
tail(names.bat[order(U.bat[, 3])])

U.pit = SVD.pit$u
names.pit = sort(unique(pitcherID[subset]))
head(names.pit[order(-U.pit[, 1])])
tail(names.pit[order(-U.pit[, 1])])
head(names.pit[order(U.pit[, 2])])
tail(names.pit[order(U.pit[, 2])])
head(names.pit[order(-U.pit[, 3])])
tail(names.pit[order(-U.pit[, 3])])


# get naive rate estimates
counts = rbind(aggregate(y, by = list(hand), table),
  aggregate(y, by = list(home), table),
  aggregate(y, by = list(as.character(data$HOME_TEAM_ID[subset])), table),
  aggregate(y, by = list(batter), table),
  aggregate(y, by = list(pitcher), table))[, 2]
rates = counts/rowSums(counts)
rownames(rates) = c('samehand', 'oppohand', 'away', 'home',
  sort(unique(as.character(data$HOME_TEAM)[subset])),
  sort(unique(batterID[subset])), sort(unique(pitcherID[subset])))

pdf('figs/biplot-bat.pdf', height = 5, width = 5)
par(mar = c(4, 4, 0, 0))
c = 20; s1 = 'G'; s2 = 'F'
plot(rates[group == 2, s1], rates[group == 2, s2], axes = FALSE, cex = 0.6,
    xlab = paste(s1, 'rate'), ylab = paste(s2, 'rate'), col = 'dodgerblue')
axis(1); axis(2)
mean1 = mean(y == s1); mean2 = mean(y == s2)
text(mean1 - V.bat[s1, 1]*D.bat[1]/c, mean2 - V.bat[s2, 1]*D.bat[1]/c - .012,
  'Patience', col = 'black', cex = 1.5)
arrows(mean1, mean2, mean1 - V.bat[s1, 1]*D.bat[1]/c,
  mean2 - V.bat[s2, 1]*D.bat[1]/c, length = .1,
  col = 'black', lwd = 2)
text(mean1 + V.bat[s1, 2]*D.bat[2]/c, mean2 + V.bat[s2, 2]*D.bat[2]/c + .01,
  'Trajectory', col = 'black', cex = 1.5)
arrows(mean1, mean2, mean1 + V.bat[s1, 2]*D.bat[2]/c,
  mean2 + V.bat[s2, 2]*D.bat[2]/c, length = .1,
  col = 'black', lwd = 2)
text(mean1 - V.bat[s1, 3]*D.bat[3]/c, mean2 - V.bat[s2, 3]*D.bat[3]/c + .012,
  'Speed', col = 'black', cex = 1.5)
arrows(mean1, mean2, mean1 - V.bat[s1, 3]*D.bat[3]/c,
  mean2 - V.bat[s2, 3]*D.bat[3]/c, length = .1,
  col = 'black', lwd = 2)
dev.off()

pdf('figs/biplot-pit.pdf', height = 5, width = 5)
par(mar = c(4, 4, 0, 0))
c = 20; s1 = 'G'; s2 = 'K'
mean1 = mean(y == s1); mean2 = mean(y == s2)
plot(rates[group == 3, s1], rates[group == 3, s2], axes = FALSE, cex = 0.6,
  xlab = paste(s1, 'rate'), ylab = paste(s2, 'rate'), col = 'dodgerblue')
axis(1); axis(2)
mean1 = mean(y == s1); mean2 = mean(y == s2)
text(mean1 - V.pit[s1, 1]*D.pit[1]/c, mean2 - V.pit[s2, 1]*D.pit[1]/c + .015,
  'Power', col = 'black', cex = 1.5)
arrows(mean1, mean2, mean1 - V.pit[s1, 1]*D.pit[1]/c,
  mean2 - V.pit[s2, 1]*D.pit[1]/c, length = .1,
  col = 'black', lwd = 2)
text(mean1 - V.pit[s1, 2]*D.pit[2]/c-.01, mean2 - V.pit[s2, 2]*D.pit[2]/c+.015,
  'Trajectory', col = 'black', cex = 1.5)
arrows(mean1, mean2, mean1 - V.pit[s1, 2]*D.pit[2]/c,
  mean2 - V.pit[s2, 2]*D.pit[2]/c, length = .1,
  col = 'black', lwd = 2)
text(mean1 + V.pit[s1, 3]*D.pit[3]/c+.05, mean2 + V.pit[s2, 3]*D.pit[3]/c,
  'Command', col = 'black', cex = 1.5)
arrows(mean1, mean2, mean1 + V.pit[s1, 3]*D.pit[3]/c,
  mean2 + V.pit[s2, 3]*D.pit[3]/c, length = .1,
  col = 'black', lwd = 2)
dev.off()


# baseball results
glmnet.mean = glmnet.sd = npmr.mean = npmr.sd = rep(NA, 5)
index = 0
for (block in c(1, 3, 6, 10, 15)) {
  index = index + 1
  load(paste('results/baseball', block, '.rda', sep = ''))
  print(index)

  deviance.glmnet = -2 * log(rowSums(Y[test, ] * pred.glmnet.test))
  glmnet.mean[index] = mean(deviance.glmnet)
  glmnet.sd[index] = sd(deviance.glmnet)/sqrt(length(deviance.glmnet))

  deviance.npmr = -2 * log(rowSums(Y[test, ] * pred.npmr.test))
  npmr.mean[index] = mean(deviance.npmr)
  npmr.sd[index] = sd(deviance.npmr)/sqrt(length(deviance.npmr))
}

pred.null = matrix(colMeans(Y), nrow(Y), ncol(Y), byrow = TRUE)
null.mean = mean(-2 * log(rowSums(Y * pred.null)))

glmnet.exp = (null.mean - glmnet.mean) / null.mean * 100
glmnet.exp.sd = glmnet.sd / null.mean * 100

npmr.exp = (null.mean - npmr.mean) / null.mean * 100
npmr.exp.sd = npmr.sd / null.mean * 100

pdf('figs/baseball-test.pdf', height = 6, width = 8)
x = c(9, 27, 54, 90, 135)
Hmisc::errbar(x+1, glmnet.exp, glmnet.exp - glmnet.exp.sd,
  glmnet.exp + glmnet.exp.sd,
  col = 'darkorange', errbar.col = 'darkorange', lwd = 2, pch = 15,
  axes = FALSE, xlab = 'Number of training plate appearances (thousands)',
  ylab = 'Test % deviance explained', ylim = c(0, 2), cex = 1.4)
Hmisc::errbar(x-1, npmr.exp, npmr.exp - npmr.exp.sd, npmr.exp + npmr.exp.sd,
  add = TRUE, col = 'dodgerblue', errbar.col = 'dodgerblue', lwd = 2, pch = 17,
  cex = 1.4)
abline(h = 0, col = 'forestgreen', lty = 2, lwd = 2)
axis(1, at = x)
axis(2)
legend('topleft', c('NPMR', 'Ridge', 'Null'),
  col = c('dodgerblue', 'darkorange', 'forestgreen'),
  pch = c(17, 15, NA), lty = c(0, 0, 2), bty = 'n', pt.cex = 1.4)
dev.off()

