library(RCPS)
library(RColorBrewer)
library(parallel)
library(ggplot2)
library(gridExtra)

### Simulations

#########################################
## Variety of recruitment trees
#########################################

# Perfect recruitment, no preferential recruitment
aa <- simrec(2, 2000, as.matrix(data.frame(char1=sample(0:1, N, replace=TRUE))), 
	0, 0, c(-2,-2,-2), -2, 4)
rtpbox(aa$rds, "char1", title="Simulation: Perfect Recruitment, No Preferential Recruitment")

# Perfect recruitment, homophilous
bb <- simrec(2, 2000, as.matrix(data.frame(char1=sample(0:1, N, replace=TRUE))), 
	2, 2, c(-2,-2,-2), -2, 4)
rtpbox(bb$rds, "char1", title="Simulation: Perfect Recruitment, Perfect Homophilous Recruitment")

# Perfect recruitment, heterophilous
cc <- simrec(2, 2000, as.matrix(data.frame(char1=sample(0:1, N, replace=TRUE))), 
	-2, -2, c(-2,-2,-2), -2, 4)
rtpbox(cc$rds, "char1", title="Simulation: Perfect Recruitment, Perfect Heterophilous Recruitment")

# Stringy, no preferential recruitment
dd <- simrec(12, 2000, as.matrix(data.frame(char1=sample(0:1, N, replace=TRUE))), 
	0, 0, c(6,6,-2), -2, 4)
rtpbox(dd$rds, "char1", title="Simulation: Stringy Trees, No Preferential Recruitment")

# Realistic trees, strong homophilous recruitment
ee <- simrec(4, 2000, as.matrix(data.frame(char1=sample(0:1, N, replace=TRUE))), 
	0.5, 0.5, c(2,2,2), 3.5, 6)
rtpbox(ee$rds, "char1", title="Simulation: Realistic Trees, Strong Homophilous Recruitment")

# Realistic trees, weak homophilous recruitment
ff <- simrec(3, 2000, as.matrix(data.frame(char1=sample(0:1, N, replace=TRUE))), 
	0.01, 0.01, c(1.5,1.5,1.5), 3.5, 10)
rtpbox(ff$rds, "char1", title="Simulation: Realistic Trees, Weak Homophilous Recruitment")

#########################################
## EXAMPLE 1: No preferential recruitment
#########################################

st.t <- Sys.time()
# Set up simulation
nseeds <- 3
N <- 500
char1 <- sample(0:1, N, replace=TRUE)
ch <- as.matrix(data.frame(char1))
waves <- 5
alpha <- 0
beta <- 0
ksi <- c(2, 1, 0)
zeta <- 1
ncoup <- 3

# Begin parallelization
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores, outfile="progress_0.txt")

fn <- function(chn, nseeds, N, ch, alpha, beta, ksi, zeta, waves) {
  library(RCPS)

  a <- simrec(nseeds, N, ch, alpha, beta, ksi, zeta, waves)
  # Set up inference
  rdf <- a$rdsfull
  covnames <- c("char1")
  ma0 <- c(0, 0, 0, 0)
  sa0 <- diag(4)
  mb0 <- c(0, 0)
  sb0 <- diag(2)
  Kcalc <- 10000
  tallowed <- 100
  restrict.local <- FALSE
  
  out <- urInfR(rdf, covnames, ma0, sa0, mb0, sb0, Kcalc, tallowed, restrict.local)
  
  return(list(a=a, out=out))
  
}

out <- clusterApply(cl, 1:no_cores, fn, nseeds, N, ch, alpha, beta, ksi, zeta, waves)

#save(out, file="simout_0.RData")

stopCluster(cl)

et.t <- Sys.time()
et.t - st.t

## Gather/format results

Kcalc <- 10000
no_cores <- 7

# BURN and THIN
al1b <- c()
al2b <- c()
al3b <- c()
al4b <- c()
be1b <- c()
be2b <- c()
for (att in 1:no_cores) {
  
  almat <- rbind(out[[att]]$out$alpha[[1001]], out[[att]]$out$alpha[[1002]])
  bemat <- rbind(out[[att]]$out$beta[[1001]], out[[att]]$out$beta[[1002]])
  lim <- Kcalc
  if (att == 3) {lim <- 9180}
  for (ite in 1003:lim) {
    almat <- rbind(almat, out[[att]]$out$alpha[[ite]])
    bemat <- rbind(bemat, out[[att]]$out$beta[[ite]])
  }
  
  al1b <- c(al1b, almat[,1])
  al2b <- c(al2b, almat[,2])
  al3b <- c(al3b, almat[,3])
  al4b <- c(al4b, almat[,4])
  be1b <- c(be1b, bemat[,1])
  be2b <- c(be2b, bemat[,2])
  
}
al1bt <- al1b[seq(1, length(al1b), 20)]
al2bt <- al2b[seq(1, length(al2b), 20)]
al3bt <- al3b[seq(1, length(al3b), 20)]
al4bt <- al4b[seq(1, length(al4b), 20)]
be1bt <- be1b[seq(1, length(be1b), 20)]
be2bt <- be2b[seq(1, length(be2b), 20)]

# Plot MCMC and density
Value <- c(al1bt, al2bt, al3bt, al4bt, be1bt, be2bt)
Parameter <- factor(rep(c("alpha", "xi[1]", "xi[2]", "xi[3]", "beta", "zeta"), 
                        each=length(al1bt)))
Iteration <- c(rep(1:2250,12), rep(1:2045, 6), rep(1:2250,24))
TrueValue <- rep(c(alpha,ksi,beta,zeta), each=length(al1bt))
Sim <- rep(c(rep(1:2,each=2250), rep(3,2045), rep(4:7, each=2250)), times=6)
Median <-  rep(c(median(al1bt), median(al2bt), median(al3bt), median(al4bt), median(be1bt), median(be2bt)), each=length(al1bt))
zl <- rep(0, length(Sim))

df <- data.frame(Value, Parameter, Iteration, TrueValue, Sim, Median, zl)

## Parameter plots (main paper)
ggplot(subset(df, Parameter=="alpha" | Parameter=="beta"), aes(x=Value)) +
  geom_density(fill="gray60", adjust=2) +
  facet_grid(Parameter ~ ., labeller=label_parsed) +
  geom_vline(aes(xintercept=zl), color="black", lty=3) +
  geom_vline(aes(xintercept=TrueValue), color="red") +
  geom_vline(aes(xintercept=Median), color="blue", lty=2) +
  ggtitle("No Preferential Recruitment") + 
  theme_bw()

## MCMC plots
ggplot(data=df, aes(x=Iteration, y=Value)) +
  geom_line() +
  facet_grid(Sim ~ Parameter, labeller=label_parsed) +
  geom_hline(aes(yintercept=TrueValue), color="red") +
  ggtitle("MCMC Posterior Draws, No Preferential Recruitment") +
  theme_bw()

## Parameter plots (supplement - all reps)
ggplot(subset(df, Parameter=="alpha" | Parameter=="beta"), aes(x=Value)) +
  geom_density(fill="gray60", adjust=2) +
  facet_grid(Sim ~ Parameter, labeller=label_parsed) +
  geom_vline(aes(xintercept=TrueValue), color="red") +
  ggtitle("Posterior Distributions of Preference Parameters for Each Replicate,\nNo Preferential Recruitment") +
  theme_bw()


## Set up simulation using parameters from previous inference run
anew <- median(al1bt)
bnew <- median(be1bt)
xnew <- c(median(al2bt), median(al3bt), median(al4bt))
znew <- median(be2bt)

nseeds <- 3
N <- 500
char1 <- sample(0:1, N, replace=TRUE)
ch <- as.matrix(data.frame(char1))
waves <- 5

num0 <- c()
num1 <- c()
num2 <- c()
num3 <- c()
for (newatt in 1:200) {
  atmp <- simrec(nseeds, N, ch, anew, bnew, xnew, znew, waves)
  snr <- get.number.of.recruits(atmp$rds)
  num0[newatt] <- length(which(snr == 0)) - length(which(atmp$rds$wave == waves))
  num1[newatt] <- length(which(snr == 1))
  num2[newatt] <- length(which(snr == 2))
  num3[newatt] <- length(which(snr == 3))
}

NumRecruitsVal <- c(num0, num1, num2, num3)
NumRecruits <- rep(0:3, each=length(num0))
OrigVal1 <- rep(c(table(get.number.of.recruits(out[[1]]$a$rds))[1]-length(which(out[[1]]$a$rds$wave == waves)), 
                 table(get.number.of.recruits(out[[1]]$a$rds))[2],
                 table(get.number.of.recruits(out[[1]]$a$rds))[3],
                 table(get.number.of.recruits(out[[1]]$a$rds))[4]), each=length(num0))
OrigVal2 <- rep(c(table(get.number.of.recruits(out[[2]]$a$rds))[1]-length(which(out[[2]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[2]]$a$rds))[2],
                  table(get.number.of.recruits(out[[2]]$a$rds))[3],
                  table(get.number.of.recruits(out[[2]]$a$rds))[4]), each=length(num0))
OrigVal3 <- rep(c(table(get.number.of.recruits(out[[3]]$a$rds))[1]-length(which(out[[3]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[3]]$a$rds))[2],
                  table(get.number.of.recruits(out[[3]]$a$rds))[3],
                  table(get.number.of.recruits(out[[3]]$a$rds))[4]), each=length(num0))
OrigVal4 <- rep(c(table(get.number.of.recruits(out[[4]]$a$rds))[1]-length(which(out[[4]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[4]]$a$rds))[2],
                  table(get.number.of.recruits(out[[4]]$a$rds))[3],
                  table(get.number.of.recruits(out[[4]]$a$rds))[4]), each=length(num0))
OrigVal5 <- rep(c(table(get.number.of.recruits(out[[5]]$a$rds))[1]-length(which(out[[5]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[5]]$a$rds))[2],
                  table(get.number.of.recruits(out[[5]]$a$rds))[3],
                  table(get.number.of.recruits(out[[5]]$a$rds))[4]), each=length(num0))
OrigVal6 <- rep(c(table(get.number.of.recruits(out[[6]]$a$rds))[1]-length(which(out[[6]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[6]]$a$rds))[2],
                  table(get.number.of.recruits(out[[6]]$a$rds))[3],
                  table(get.number.of.recruits(out[[6]]$a$rds))[4]), each=length(num0))
OrigVal7 <- rep(c(table(get.number.of.recruits(out[[7]]$a$rds))[1]-length(which(out[[7]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[7]]$a$rds))[2],
                  table(get.number.of.recruits(out[[7]]$a$rds))[3],
                  table(get.number.of.recruits(out[[7]]$a$rds))[4]), each=length(num0))

dfsim <- data.frame(NumRecruits, NumRecruitsVal, OrigVal1, OrigVal2, OrigVal3, OrigVal4, OrigVal5, OrigVal6, OrigVal7)
dfsim$OrigValmed<- apply(dfsim[,3:9], 1, median)

## Plot of re-sampled number of recruits distribution
ggplot(dfsim, aes(x=NumRecruitsVal)) +
  geom_histogram(aes(y=..density..), fill="gray40", colour="black", binwidth=2, boundary=0.5) +
  facet_wrap( ~ NumRecruits, ncol=4) +
  ggtitle("Distribution of Re-Sampled Number of Recruits") +
  geom_vline(aes(xintercept=OrigVal1), color="red") +
  geom_vline(aes(xintercept=OrigVal2), color="red") +
  geom_vline(aes(xintercept=OrigVal3), color="red") +
  geom_vline(aes(xintercept=OrigVal4), color="red") +
  geom_vline(aes(xintercept=OrigVal5), color="red") +
  geom_vline(aes(xintercept=OrigVal6), color="red") +
  geom_vline(aes(xintercept=OrigVal7), color="red") +
  theme(plot.title = element_text(face="bold")) +
  xlab("Number of Recruiters") +
  theme_bw()

#########################################
## EXAMPLE 2: Homophilous recruitment
#########################################

st.t <- Sys.time()
# Set up simulation
nseeds <- 3
N <- 500
char1 <- sample(0:1, N, replace=TRUE)
ch <- as.matrix(data.frame(char1))
waves <- 5
alpha <- 0.5
beta <- 0.5
ksi <- c(2, 1, 1)
zeta <- 2
ncoup <- 3

# Begin parallelization
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores, outfile="progress_p.txt")

fn <- function(chn, nseeds, N, ch, alpha, beta, ksi, zeta, waves) {
  library(RCPS)
  
  a <- simrec(nseeds, N, ch, alpha, beta, ksi, zeta, waves)
  # Set up inference
  rdf <- a$rdsfull
  covnames <- c("char1")
  ma0 <- c(0, 0, 0, 0)
  sa0 <- diag(4)
  mb0 <- c(0, 0)
  sb0 <- diag(2)
  Kcalc <- 5000
  tallowed <- 100
  restrict.local <- FALSE
  
  out <- urInfR(rdf, covnames, ma0, sa0, mb0, sb0, Kcalc, tallowed, restrict.local)
  
  return(list(a=a, out=out))
  
}

out <- clusterApply(cl, 1:no_cores, fn, nseeds, N, ch, alpha, beta, ksi, zeta, waves)

#save(out, file="simout_p.RData")

stopCluster(cl)

et.t <- Sys.time()
et.t - st.t

## Gather/format results

Kcalc <- 5000
no_cores <- 7

al1b <- c()
al2b <- c()
al3b <- c()
al4b <- c()
be1b <- c()
be2b <- c()
for (att in 1:no_cores) {
  
  almat <- rbind(out[[att]]$out$alpha[[1001]], out[[att]]$out$alpha[[1002]])
  bemat <- rbind(out[[att]]$out$beta[[1001]], out[[att]]$out$beta[[1002]])
  for (ite in 1003:Kcalc) {
    almat <- rbind(almat, out[[att]]$out$alpha[[ite]])
    bemat <- rbind(bemat, out[[att]]$out$beta[[ite]])
  }
  
  al1b <- c(al1b, almat[,1])
  al2b <- c(al2b, almat[,2])
  al3b <- c(al3b, almat[,3])
  al4b <- c(al4b, almat[,4])
  be1b <- c(be1b, bemat[,1])
  be2b <- c(be2b, bemat[,2])
  
}
al1bt <- al1b[seq(1, length(al1b), 20)]
al2bt <- al2b[seq(1, length(al2b), 20)]
al3bt <- al3b[seq(1, length(al3b), 20)]
al4bt <- al4b[seq(1, length(al4b), 20)]
be1bt <- be1b[seq(1, length(be1b), 20)]
be2bt <- be2b[seq(1, length(be2b), 20)]

# Plot MCMC and density
Value <- c(al1bt, al2bt, al3bt, al4bt, be1bt, be2bt)
Parameter <- factor(rep(c("alpha", "xi[1]", "xi[2]", "xi[3]", "beta", "zeta"), 
                        each=length(al1bt)))
Iteration <- rep(1:(length(al1bt)/no_cores), 6*no_cores)
TrueValue <- rep(c(alpha,ksi,beta,zeta), each=length(al1bt))
Median <-  rep(c(median(al1bt), median(al2bt), median(al3bt), median(al4bt), median(be1bt), median(be2bt)), each=length(al1bt))
Sim <- rep(rep(1:no_cores, each=length(al1bt)/7), times=6)
zl <- rep(0, length(Sim))

df <- data.frame(Value, Parameter, Iteration, TrueValue, Sim, Median,zl)

## Parameter plots (main paper)
ggplot(subset(df, Parameter=="alpha" | Parameter=="beta"), aes(x=Value)) +
  geom_density(fill="gray60", adjust=2) +
  facet_grid(Parameter ~ ., labeller=label_parsed) +
  geom_vline(aes(xintercept=TrueValue), color="red") +
  geom_vline(aes(xintercept=Median), color="blue", lty=2) +
  geom_vline(aes(xintercept=zl), color="black", lty=3) +
  ggtitle("Homophilous Recruitment") +
  theme_bw()

## MCMC plots
ggplot(data=df, aes(x=Iteration, y=Value)) +
  geom_line() +
  facet_grid(Sim ~ Parameter, labeller=label_parsed) +
  geom_hline(aes(yintercept=TrueValue), color="red") +
  ggtitle("MCMC Posterior Draws, Homophilous Recruitment") +
  theme_bw()

## Parameter plots (supplement - all reps)
ggplot(subset(df, Parameter=="alpha" | Parameter=="beta"), aes(x=Value)) +
  geom_density(fill="gray60", adjust=2) +
  facet_grid(Sim ~ Parameter, labeller=label_parsed) +
  geom_vline(aes(xintercept=TrueValue), color="red") +
  ggtitle("Posterior Distributions of Preference Parameters for Each Replicate,\nHomophilous Recruitment") +
  theme_bw()

## Set up simulation using parameters from previous inference run
anew <- median(al1bt)
bnew <- median(be1bt)
xnew <- c(median(al2bt), median(al3bt), median(al4bt))
znew <- median(be2bt)

nseeds <- 3
N <- 500
char1 <- sample(0:1, N, replace=TRUE)
ch <- as.matrix(data.frame(char1))
waves <- 5

num0 <- c()
num1 <- c()
num2 <- c()
num3 <- c()
for (newatt in 1:200) {
  print(newatt)
  atmp <- simrec(nseeds, N, ch, anew, bnew, xnew, znew, waves)
  snr <- get.number.of.recruits(atmp$rds)
  num0[newatt] <- length(which(snr == 0)) - length(which(atmp$rds$wave == waves))
  num1[newatt] <- length(which(snr == 1))
  num2[newatt] <- length(which(snr == 2))
  num3[newatt] <- length(which(snr == 3))
}

NumRecruitsVal <- c(num0, num1, num2, num3)
NumRecruits <- rep(0:3, each=length(num0))
OrigVal1 <- rep(c(table(get.number.of.recruits(out[[1]]$a$rds))[1]-length(which(out[[1]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[1]]$a$rds))[2],
                  table(get.number.of.recruits(out[[1]]$a$rds))[3],
                  table(get.number.of.recruits(out[[1]]$a$rds))[4]), each=length(num0))
OrigVal2 <- rep(c(table(get.number.of.recruits(out[[2]]$a$rds))[1]-length(which(out[[2]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[2]]$a$rds))[2],
                  table(get.number.of.recruits(out[[2]]$a$rds))[3],
                  table(get.number.of.recruits(out[[2]]$a$rds))[4]), each=length(num0))
OrigVal3 <- rep(c(table(get.number.of.recruits(out[[3]]$a$rds))[1]-length(which(out[[3]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[3]]$a$rds))[2],
                  table(get.number.of.recruits(out[[3]]$a$rds))[3],
                  table(get.number.of.recruits(out[[3]]$a$rds))[4]), each=length(num0))
OrigVal4 <- rep(c(table(get.number.of.recruits(out[[4]]$a$rds))[1]-length(which(out[[4]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[4]]$a$rds))[2],
                  table(get.number.of.recruits(out[[4]]$a$rds))[3],
                  table(get.number.of.recruits(out[[4]]$a$rds))[4]), each=length(num0))
OrigVal5 <- rep(c(table(get.number.of.recruits(out[[5]]$a$rds))[1]-length(which(out[[5]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[5]]$a$rds))[2],
                  table(get.number.of.recruits(out[[5]]$a$rds))[3],
                  table(get.number.of.recruits(out[[5]]$a$rds))[4]), each=length(num0))
OrigVal6 <- rep(c(table(get.number.of.recruits(out[[6]]$a$rds))[1]-length(which(out[[6]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[6]]$a$rds))[2],
                  table(get.number.of.recruits(out[[6]]$a$rds))[3],
                  table(get.number.of.recruits(out[[6]]$a$rds))[4]), each=length(num0))
OrigVal7 <- rep(c(table(get.number.of.recruits(out[[7]]$a$rds))[1]-length(which(out[[7]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[7]]$a$rds))[2],
                  table(get.number.of.recruits(out[[7]]$a$rds))[3],
                  table(get.number.of.recruits(out[[7]]$a$rds))[4]), each=length(num0))

dfsim <- data.frame(NumRecruits, NumRecruitsVal, OrigVal1, OrigVal2, OrigVal3, OrigVal4, OrigVal5, OrigVal6, OrigVal7)
dfsim$OrigValmed<- apply(dfsim[,3:9], 1, median)

## Plot of re-sampled number of recruits distribution
ggplot(dfsim, aes(x=NumRecruitsVal)) +
  geom_histogram(aes(y=..density..), fill="gray40", colour="black", binwidth=2, boundary=0.5) +
  facet_wrap( ~ NumRecruits, ncol=4) +
  ggtitle("Distribution of Re-Sampled Number of Recruits") +
  geom_vline(aes(xintercept=OrigVal1), color="red") +
  geom_vline(aes(xintercept=OrigVal2), color="red") +
  geom_vline(aes(xintercept=OrigVal3), color="red") +
  geom_vline(aes(xintercept=OrigVal4), color="red") +
  geom_vline(aes(xintercept=OrigVal5), color="red") +
  geom_vline(aes(xintercept=OrigVal6), color="red") +
  geom_vline(aes(xintercept=OrigVal7), color="red") +
  theme(plot.title = element_text(face="bold")) +
  xlab("Number of Recruiters") +
  theme_bw()

#########################################
## EXAMPLE 3: Heterophilous recruitment
#########################################

st.t <- Sys.time()
# Set up simulation
nseeds <- 3
N <- 500
char1 <- sample(0:1, N, replace=TRUE)
ch <- as.matrix(data.frame(char1))
waves <- 5
alpha <- -0.5
beta <- -0.5
ksi <- c(1, 1, 0)
zeta <- 2
ncoup <- 3

# Begin parallelization
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores, outfile="progress_n.txt")

fn <- function(chn, nseeds, N, ch, alpha, beta, ksi, zeta, waves) {
  library(RCPS)
  
  a <- simrec(nseeds, N, ch, alpha, beta, ksi, zeta, waves)
  # Set up inference
  rdf <- a$rdsfull
  covnames <- c("char1")
  ma0 <- c(0, 0, 0, 0)
  sa0 <- diag(4)
  mb0 <- c(0, 0)
  sb0 <- diag(2)
  Kcalc <- 20000
  tallowed <- 500
  restrict.local <- FALSE
  
  out <- urInfR(rdf, covnames, ma0, sa0, mb0, sb0, Kcalc, tallowed, restrict.local)
  
  return(list(a=a, out=out))
  
}

out <- clusterApply(cl, 1:no_cores, fn, nseeds, N, ch, alpha, beta, ksi, zeta, waves)

#save(out, file="simout_n.RData")

stopCluster(cl)

et.t <- Sys.time()
et.t - st.t

## Gather/format results

no_cores <- 7
Kcalc <- 20000

al1b <- c()
al2b <- c()
al3b <- c()
al4b <- c()
be1b <- c()
be2b <- c()
for (att in 1:no_cores) {
  
  almat <- rbind(out[[att]]$out$alpha[[1001]], out[[att]]$out$alpha[[1002]])
  bemat <- rbind(out[[att]]$out$beta[[1001]], out[[att]]$out$beta[[1002]])
  for (ite in 1003:Kcalc) {
    almat <- rbind(almat, out[[att]]$out$alpha[[ite]])
    bemat <- rbind(bemat, out[[att]]$out$beta[[ite]])
  }
  
  al1b <- c(al1b, almat[,1])
  al2b <- c(al2b, almat[,2])
  al3b <- c(al3b, almat[,3])
  al4b <- c(al4b, almat[,4])
  be1b <- c(be1b, bemat[,1])
  be2b <- c(be2b, bemat[,2])
  
}
al1bt <- al1b[seq(1, length(al1b), 20)]
al2bt <- al2b[seq(1, length(al2b), 20)]
al3bt <- al3b[seq(1, length(al3b), 20)]
al4bt <- al4b[seq(1, length(al4b), 20)]
be1bt <- be1b[seq(1, length(be1b), 20)]
be2bt <- be2b[seq(1, length(be2b), 20)]

# Plot MCMC and density
Value <- c(al1bt, al2bt, al3bt, al4bt, be1bt, be2bt)
Parameter <- factor(rep(c("alpha", "xi[1]", "xi[2]", "xi[3]", "beta", "zeta"), 
                        each=length(al1bt)))
Iteration <- rep(1:(length(al1bt)/no_cores), 6*no_cores)
TrueValue <- rep(c(alpha,ksi,beta,zeta), each=length(al1bt))
Median <-  rep(c(median(al1bt), median(al2bt), median(al3bt), median(al4bt), median(be1bt), median(be2bt)), each=length(al1bt))
Sim <- rep(rep(1:no_cores, each=length(al1bt)/7), times=6)
zl <- rep(0, length(Sim))

df <- data.frame(Value, Parameter, Iteration, TrueValue, Sim, Median,zl)

## Parameter plots (main paper)
ggplot(subset(df, Parameter=="alpha" | Parameter=="beta"), aes(x=Value)) +
  geom_density(fill="gray60", adjust=2) +
  facet_grid(Parameter ~ ., labeller=label_parsed) +
  geom_vline(aes(xintercept=TrueValue), color="red") +
  geom_vline(aes(xintercept=Median), color="blue", lty=2) +
  geom_vline(aes(xintercept=zl), color="black", lty=3) +
  ggtitle("Heterophilous Recruitment") +
  theme_bw()

## MCMC plots
ggplot(data=df, aes(x=Iteration, y=Value)) +
  geom_line() +
  facet_grid(Sim ~ Parameter, labeller=label_parsed) +
  geom_hline(aes(yintercept=TrueValue), color="red") +
  ggtitle("MCMC Posterior Draws, Heterophilous Recruitment") +
  theme_bw()

## Parameter plots (supplement - all reps)
ggplot(subset(df, Parameter=="alpha" | Parameter=="beta"), aes(x=Value)) +
  geom_density(fill="gray60", adjust=2) +
  facet_grid(Sim ~ Parameter, labeller=label_parsed) +
  geom_vline(aes(xintercept=TrueValue), color="red") +
  ggtitle("Posterior Distributions of Preference Parameters for Each Replicate,\nHeterophilous Recruitment") +
  theme_bw()

## Set up simulation using parameters from previous inference run
anew <- median(al1bt)
bnew <- median(be1bt)
xnew <- c(median(al2bt), median(al3bt), median(al4bt))
znew <- median(be2bt)

nseeds <- 3
N <- 500
char1 <- sample(0:1, N, replace=TRUE)
ch <- as.matrix(data.frame(char1))
waves <- 5

num0 <- c()
num1 <- c()
num2 <- c()
num3 <- c()
for (newatt in 1:200) {
  print(newatt)
  atmp <- simrec(nseeds, N, ch, anew, bnew, xnew, znew, waves)
  snr <- get.number.of.recruits(atmp$rds)
  num0[newatt] <- length(which(snr == 0)) - length(which(atmp$rds$wave == waves))
  num1[newatt] <- length(which(snr == 1))
  num2[newatt] <- length(which(snr == 2))
  num3[newatt] <- length(which(snr == 3))
}

NumRecruitsVal <- c(num0, num1, num2, num3)
NumRecruits <- rep(0:3, each=length(num0))
OrigVal1 <- rep(c(table(get.number.of.recruits(out[[1]]$a$rds))[1]-length(which(out[[1]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[1]]$a$rds))[2],
                  table(get.number.of.recruits(out[[1]]$a$rds))[3],
                  table(get.number.of.recruits(out[[1]]$a$rds))[4]), each=length(num0))
OrigVal2 <- rep(c(table(get.number.of.recruits(out[[2]]$a$rds))[1]-length(which(out[[2]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[2]]$a$rds))[2],
                  table(get.number.of.recruits(out[[2]]$a$rds))[3],
                  table(get.number.of.recruits(out[[2]]$a$rds))[4]), each=length(num0))
OrigVal3 <- rep(c(table(get.number.of.recruits(out[[3]]$a$rds))[1]-length(which(out[[3]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[3]]$a$rds))[2],
                  table(get.number.of.recruits(out[[3]]$a$rds))[3],
                  table(get.number.of.recruits(out[[3]]$a$rds))[4]), each=length(num0))
OrigVal4 <- rep(c(table(get.number.of.recruits(out[[4]]$a$rds))[1]-length(which(out[[4]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[4]]$a$rds))[2],
                  table(get.number.of.recruits(out[[4]]$a$rds))[3],
                  table(get.number.of.recruits(out[[4]]$a$rds))[4]), each=length(num0))
OrigVal5 <- rep(c(table(get.number.of.recruits(out[[5]]$a$rds))[1]-length(which(out[[5]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[5]]$a$rds))[2],
                  table(get.number.of.recruits(out[[5]]$a$rds))[3],
                  table(get.number.of.recruits(out[[5]]$a$rds))[4]), each=length(num0))
OrigVal6 <- rep(c(table(get.number.of.recruits(out[[6]]$a$rds))[1]-length(which(out[[6]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[6]]$a$rds))[2],
                  table(get.number.of.recruits(out[[6]]$a$rds))[3],
                  table(get.number.of.recruits(out[[6]]$a$rds))[4]), each=length(num0))
OrigVal7 <- rep(c(table(get.number.of.recruits(out[[7]]$a$rds))[1]-length(which(out[[7]]$a$rds$wave == waves)), 
                  table(get.number.of.recruits(out[[7]]$a$rds))[2],
                  table(get.number.of.recruits(out[[7]]$a$rds))[3],
                  table(get.number.of.recruits(out[[7]]$a$rds))[4]), each=length(num0))

dfsim <- data.frame(NumRecruits, NumRecruitsVal, OrigVal1, OrigVal2, OrigVal3, OrigVal4, OrigVal5, OrigVal6, OrigVal7)
dfsim$OrigValmed<- apply(dfsim[,3:9], 1, median)

## Plot of re-sampled number of recruits distribution
ggplot(dfsim, aes(x=NumRecruitsVal)) +
  geom_histogram(aes(y=..density..), fill="gray40", colour="black", binwidth=2, boundary=0.5) +
  facet_wrap( ~ NumRecruits, ncol=4) +
  ggtitle("Distribution of Re-Sampled Number of Recruits") +
  geom_vline(aes(xintercept=OrigVal1), color="red") +
  geom_vline(aes(xintercept=OrigVal2), color="red") +
  geom_vline(aes(xintercept=OrigVal3), color="red") +
  geom_vline(aes(xintercept=OrigVal4), color="red") +
  geom_vline(aes(xintercept=OrigVal5), color="red") +
  geom_vline(aes(xintercept=OrigVal6), color="red") +
  geom_vline(aes(xintercept=OrigVal7), color="red") +
  theme(plot.title = element_text(face="bold")) +
  xlab("Number of Recruiters") +
  theme_bw()

#########################################
## Pseudo-code for Francophone migrants 
## (unable to share original data due to confidentiality)
#########################################

rdsdf <- read.rdsobj("MigrantsFranco2013Rabat.rdsobj")
rdsdf$wherelive <- rdsdf$Q209wherelive

# Recruitment plot
rtpbox(rdsdf, "wherelive")

homophily.estimates(rdsdf, "wherelive", recruitment=TRUE) # 2.39

# Set up model
rdsdf$peer <- get.id(rdsdf)
covnames <- c("wherelive")
ma0 <- c(0, 0, 1, 1, 1)
sa0 <- diag(5)
mb0 <- c(0, 0, 0)
sb0 <- diag(3)
Kcalc <- 2000
tallowed <- 24

# Run model
#set.seed(40706)
out <- urInfR(rdsdf, covnames, ma0, sa0, mb0, sb0, Kcalc, tallowed, 
	restrict.local=FALSE, usenetsize=TRUE)

## Extract values of parameters
nwav <- 6
noutiter <- Kcalc #set differently if sampler times out

aout1 <- c()
aout2 <- c()
ksiout1 <- c()
ksiout2 <- c()
ksiout3 <- c()
bout1 <- c()
bout2 <- c()
zetaout <- c()
for (i in 1:noutiter) {
  wav <- sample(1:nwav, 1)
  aout1 <- c(aout1, out$alpha[[i]][wav, 1])
  aout2 <- c(aout2, out$alpha[[i]][wav, 2])
  ksiout1 <- c(ksiout1, out$alpha[[i]][wav, 3])
  ksiout2 <- c(ksiout2, out$alpha[[i]][wav, 4])
  ksiout3 <- c(ksiout3, out$alpha[[i]][wav, 5])
  bout1 <- c(bout1, out$beta[[i]][wav, 1])
  bout2 <- c(bout2, out$beta[[i]][wav, 2])
  zetaout <- c(zetaout, out$beta[[i]][wav, 3])
}

burn <- 500
int <- 5

aout1bi <- aout1[seq(burn+1, length(aout1), int)]
aout2bi <- aout2[seq(burn+1, length(aout2), int)]
ksiout1bi <- ksiout1[seq(burn+1, length(ksiout1), int)]
ksiout2bi <- ksiout2[seq(burn+1, length(ksiout2), int)]
ksiout3bi <- ksiout3[seq(burn+1, length(ksiout3), int)]
bout1bi <- bout1[seq(burn+1, length(bout1), int)]
bout2bi <- bout2[seq(burn+1, length(bout2), int)]
zetaoutbi <- zetaout[seq(burn+1, length(zetaout), int)]

alphamod <- c(median(aout1bi), median(aout2bi))
betamod <- c(median(bout1bi), median(bout2bi))
ksimod <- c(median(ksiout1bi), median(ksiout2bi), median(ksiout3bi))
zetamod <- median(zetaoutbi)

## Here are the real values
alphaout <- c(0.417, 0.083)
betaout <- c(0.552, -0.339)
xiout <- c(0.351, -0.182, -0.866)
zetaout <- 2.184

## Re-simulation with model parameters (two examples)
rdsdf$wherelive <- factor(rdsdf$Q209wherelive)

resim1 <- gennewchain(alphaout, betaout, usenet = TRUE, xiout, zetaout, rdsdf, "wherelive")
homophily.estimates(resim1$rds, "char", recruitment=TRUE)

rtpbox(resim1$rds, "char")

resim2 <- gennewchain(alphaout, betaout, usenet = TRUE, xiout, zetaout, rdsdf, "wherelive")
homophily.estimates(resim2$rds, "char", recruitment=TRUE)

rtpbox(resim2$rds, "char")


## Re-simulation with model parameters (many)
rh <- c()
nr <- matrix(0, nrow=500, ncol=4)
nw <- matrix(0, nrow=500, ncol=11)
difn.sum <- c()
difn.abs <- c()

ctr <- 1

while (length(rh) < 500) {
  
  while (ctr < 500) {
  tmp <- gennewchain(alphaout, betaout, usenet = TRUE, xiout, zetaout, rdsdf, "wherelive")
  
  ht <- chisq.test(homophily.estimates(tmp$rds, "char", recruitment=TRUE)@estimate)
  rh[ctr] <- sum(diag(ht$observed)) / sum(diag(ht$expected))
  nr[ctr, ] <- table(get.number.of.recruits(tmp$rds))
  nw[ctr, 1:(max(get.wave(tmp$rds))+1)] <- table(get.wave(tmp$rds))
  ds <- get.net.size(tmp$rds)
  r.ds <- ds[as.numeric(get.rid(tmp$rds))]
  difn.sum[ctr] <- sum(r.ds - ds[-c(1:6)], na.rm=TRUE)
  difn.abs[ctr] <- sum(abs(r.ds - ds[-c(1:6)]), na.rm=TRUE)
  
  ctr <- ctr + 1
  
  }
  
}

rh1=rh
nr1=nr
nw1=nw
difn.sum1=difn.sum
difn.abs1=difn.abs

rh2=c(rh, rh1)
nr2=c(nr[1:26,], nr1[1:72,])
nw2=c(nw[1:26,], nw1[1:72,])
difn.sum2=c(difn.sum, difn.sum1)
difn.abs2=c(difn.abs, difn.abs1)

nr2.m <- rbind(matrix(nr2[1:(26*4)],ncol=4), nr1[1:72,])
nw2.m <- rbind(matrix(nw2[1:(26*11)],ncol=11), nw1[1:72,])

rh3=c(rh, rh2)
nr3=rbind(nr[1:107,], nr2.m)
nw3=rbind(nw[1:107,], nw2.m)
difn.sum3=c(difn.sum, difn.sum2)
difn.abs3=c(difn.abs, difn.abs2)

ds <- get.net.size(rdsdf)
r.ds <- ds[as.numeric(get.rid(rdsdf))]

sum(r.ds - ds, na.rm=TRUE)


nr3.m <- matrix(nr3,ncol=4)
nw3.m <- matrix(nw3,ncol=11)

rh.u <- rh3[6:205]
nr.u <- nr3[6:205, ]
nw.u <- nw3[6:205, ]
difn.sum.u <- difn.sum3[6:205]
difn.abs.u <- difn.abs3[6:205]

dfp <- data.frame(rh=rh.u, difn.sum=difn.sum.u)

# Plots of re-simulated model parameters
ggplot(dfp, aes(x=rh.u)) + 
  geom_density(fill="gray80") + 
  theme_bw() + 
  geom_vline(xintercept=2.399, col="red") + 
  labs(x="Recruitment Homophily on Living Location", 
  	title="Simulated Assessment of Living Location")

ggplot(dfp, aes(x=difn.sum)) + 
  geom_density(fill="gray80") + 
  theme_bw() + 
  geom_vline(xintercept=1141, col="red") + 
  labs(x="Sum of Recruiter-Peer Difference in Network Size", 
	title="Simulated Assessment of Network Size")

# Plot of number of people per wave
df.nw <- data.frame(count=c(nw.u), wave=rep(0:10, each=200))
p1 <- ggplot(df.nw[df.nw$wave==1,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 1") + 
  geom_vline(xintercept=17, col="red")
p2 <- ggplot(df.nw[df.nw$wave==2,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 2") + 
  geom_vline(xintercept=44, col="red")
p3 <- ggplot(df.nw[df.nw$wave==3,], aes(x=count)) + 
  geom_histogram() + theme_bw() + 
  labs(x="Number of People", title="Wave 3") + 
  geom_vline(xintercept=99, col="red")
p4 <- ggplot(df.nw[df.nw$wave==4,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 4") + 
  geom_vline(xintercept=127, col="red")
p5 <- ggplot(df.nw[df.nw$wave==5,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 5") + 
  geom_vline(xintercept=81, col="red")
p6 <- ggplot(df.nw[df.nw$wave==6,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 6") + 
  geom_vline(xintercept=35, col="red")
p7 <- ggplot(df.nw[df.nw$wave==7,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 7") + 
  geom_vline(xintercept=1, col="red")
p8 <- ggplot(df.nw[df.nw$wave==8,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 8") + 
  geom_vline(xintercept=0, col="red")
p9 <- ggplot(df.nw[df.nw$wave==9,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 9") + 
  geom_vline(xintercept=0, col="red")
p10 <- ggplot(df.nw[df.nw$wave==10,], aes(x=count)) + 
  geom_histogram() + 
  theme_bw() + 
  labs(x="Number of People", title="Wave 10") + 
  geom_vline(xintercept=0, col="red")
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, ncol=5)

df.nr <- data.frame(count=c(nr.u), 
	num=rep(0:3, each=200), 
	int=rep(c(233,43,41,93), each=200))

# Plot of number of recruits 
ggplot(df.nr, aes(x=count)) + 
  geom_histogram() + 
  facet_grid(.~num) + 
  theme_bw() + 
  labs(x="Number of recruits") + 
  geom_vline(aes(xintercept=int), col="red")

