######
#Computation Section
####

##1: A Normal Example: optim and simulation
##2: Apply()
##3: Challenge problems for apply()
##4: Challenge problem for a normal example

################
## 1. A Normal Example: optim and simulation
################

library(datasets)
library(xtable)

#We are going to use a dataset of chicks' weights on different diets
data(ChickWeight)
head(ChickWeight)

#Create a log-likelihood function
ll.normal <- function(par,y,X){
	beta <- par[1:ncol(X)]
	sigma2 <- exp(par[ncol(X)+1])
	-1/2 * (sum(log(sigma2) + (y -(X%*%beta))^2/sigma2)) 
	}

# testing the function:
ll.normal(par=c(1,2,3), y=ChickWeight$weight, X=cbind(1,ChickWeight$Time))

#create a 1/0 variable for three out of four diets
ChickWeight$diet1 <- as.numeric(ChickWeight$Diet==1)
ChickWeight$diet2 <- as.numeric(ChickWeight$Diet==2)
ChickWeight$diet3 <- as.numeric(ChickWeight$Diet==3)

#create the model matrix and dependent variable
X <- cbind(1,ChickWeight$Time, ChickWeight$diet1, ChickWeight$diet2, ChickWeight$diet3)
y <- ChickWeight$weight

#optimize to get the MLEs
my.opt1 <- optim(par = rep(0, ncol(X) + 1), fn = ll.normal, y = y, X = X, control = list(fnscale = -1), method = "BFGS", hessian = TRUE)

coefficients1 <- my.opt1$par

#Check with the linear model
lm.1 <- lm(weight ~ Time + diet1 + diet2 + diet3, data=ChickWeight)

## Make a pretty table output
ses1 <- sqrt(diag(solve(-my.opt1$hessian)))
tabl1 <- cbind(coefficients1, ses1)
row.names(tabl1) <- c("Intercept","Time", "Diet 1", "Diet 2", "Diet 3", "sigma")
xtable(tabl1, digits=3)

#Simulate:
#We want to compare a chick on diet 1, 2, 3, and 4

#Create sets of covariates for chicks on each diet
diet1 <- c(1, median(ChickWeight$Time), 1, 0, 0)
diet2 <- c(1, median(ChickWeight$Time), 0, 1, 0)
diet3 <- c(1, median(ChickWeight$Time), 0, 0, 1)
diet4 <- c(1, median(ChickWeight$Time), 0, 0, 0)

#Simulate betas to get estimation uncertainty
library(mvtnorm)
set.seed(12345) 
simbetas <- rmvnorm(1000, mean=my.opt1$par, sigma=(solve(-my.opt1$hessian)))

#We are going to use a for loop to be clear here.
#First, create empty vectors for the y's for each diet
simydiet1 <- NULL
simydiet2 <- NULL
simydiet3 <- NULL
simydiet4 <- NULL

#For each row of simbetas, simulate a normal random variable for each diet
for(i in 1:nrow(simbetas)){
  betas <- simbetas[i,1:(ncol(simbetas)-1)]
  sigma2 <- exp(simbetas[i,ncol(simbetas)])
  simydiet1[i] <- rnorm(1,diet1%*%betas, sqrt(sigma2))
  simydiet2[i] <- rnorm(1,diet2%*%betas, sqrt(sigma2))
  simydiet3[i] <- rnorm(1,diet3%*%betas, sqrt(sigma2))
  simydiet4[i] <- rnorm(1,diet4%*%betas, sqrt(sigma2))
}

#Plot the predicted weights for the different diets. 
plot(density(simydiet1), main="Predicted Weight", xlab="Weight", xlim=c(-50, 250), ylim=c(0,.015))
lines(density(simydiet2), col="blue")
lines(density(simydiet3), col="orange")
lines(density(simydiet4), col="red")
legend(-40, .01, c("Diet 1", "Diet 2", "Diet 3", "Diet 4"), col=c("black", "blue", "orange", "red"), lty=1)

############
# Part 2:  Apply
###########

#Make some random numbers
set.seed(01238)
mat <- matrix(rnorm(20,5,4), nrow=5, ncol=4)

#Basic apply use
apply(mat,1,mean)
apply(mat,2,median)

###############
# Part 3: Apply Challenge problems
##############

#Challenge: only find the mean of the first three data points in each row











































apply(mat,1, function (x) mean(x[1:3]))

#Challenge: create a new matrix where all columns sum to 1































mat2 <- apply(mat, 1, function (x) x/sum(x))

#Use apply to check to see if you did it right






































apply(mat2,2,sum)

#Challenge: use apply for the simulated data






































simydiet1 <- apply(simbetas, 1, function(beta) rnorm(1, diet1%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)]))))              
simydiet2 <- apply(simbetas, 1, function(beta) rnorm(1, diet2%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)]))))              
simydiet3 <- apply(simbetas, 1, function(beta) rnorm(1, diet3%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)]))))              
simydiet4 <- apply(simbetas, 1, function(beta) rnorm(1, diet4%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)]))))              

##############
# Simulation and Optim Challenge: Put an interaction term on time and diet1 and optimize.
#############








































X <- cbind(1,ChickWeight$Time, ChickWeight$diet1, ChickWeight$diet2, ChickWeight$diet3, ChickWeight$diet1*ChickWeight$Time)
y <- ChickWeight$weight

my.opt1 <- optim(par = rep(0, ncol(X) + 1), fn = ll.normal, y = y, 
                 X = X, control = list(fnscale = -1), method = "BFGS", hessian = TRUE)

coefficients1 <- my.opt1$par


#Check with the linear model
lm.1 <- lm(weight ~ Time*diet1 + diet2 + diet3, data=ChickWeight)

## Make a pretty output
ses1 <- sqrt(diag(solve(-my.opt1$hessian)))
tabl1 <- cbind(coefficients1, ses1)
row.names(tabl1) <- c("Intercept","Time", "Diet 1", "Diet 2", "Diet 3", "Diet1*Time", "sigma")
xtable(tabl1, digits=3)

#Challenge:  Use simlulation to plot how time influences Chickweight for diet 1 and diet 2 on average




















































library(mvtnorm)

set.seed(12345) 
simbetas <- rmvnorm(1000, mean=my.opt1$par, sigma=(solve(-my.opt1$hessian)))
time <- seq(0,21)
simydiet1 <- matrix(nrow=1000,ncol=22)
simydiet2 <- matrix(nrow=1000,ncol=22)
diet2 <- c(1, median(ChickWeight$Time), 0, 1, 0, 0)
for(i in 1:length(time)){
  diet1 <- c(1, median(ChickWeight$Time), 1, 0, 0, time[i])
  simydiet1[,i] <- apply(simbetas, 1, function(beta) rnorm(1, diet1%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)]))))              
  simydiet2[,i] <- apply(simbetas, 1, function(beta) rnorm(1, diet2%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)]))))              
}

diet1mean <- apply(simydiet1,2,mean)
diet2mean <- apply(simydiet2,2,mean)
plot(time, diet1mean, type="l", col="red", ylab="Average Weight", xlab="Time")
lines(time, diet2mean)
text(10,90, "Diet 1", col="red")
text(16, 110, "Diet 2", col="black")