###### #Computation Section #### ##1: A Normal Example: optim and simulation ##2: Apply() ##3: Challenge problems for apply() ##4: Challenge problem for a normal example ################ ## 1. A Normal Example: optim and simulation ################ library(datasets) library(xtable) #We are going to use a dataset of chicks' weights on different diets data(ChickWeight) head(ChickWeight) #Create a log-likelihood function ll.normal <- function(par,y,X){ beta <- par[1:ncol(X)] sigma2 <- exp(par[ncol(X)+1]) -1/2 * (sum(log(sigma2) + (y -(X%*%beta))^2/sigma2)) } # testing the function: ll.normal(par=c(1,2,3), y=ChickWeight\$weight, X=cbind(1,ChickWeight\$Time)) #create a 1/0 variable for three out of four diets ChickWeight\$diet1 <- as.numeric(ChickWeight\$Diet==1) ChickWeight\$diet2 <- as.numeric(ChickWeight\$Diet==2) ChickWeight\$diet3 <- as.numeric(ChickWeight\$Diet==3) #create the model matrix and dependent variable X <- cbind(1,ChickWeight\$Time, ChickWeight\$diet1, ChickWeight\$diet2, ChickWeight\$diet3) y <- ChickWeight\$weight #optimize to get the MLEs my.opt1 <- optim(par = rep(0, ncol(X) + 1), fn = ll.normal, y = y, X = X, control = list(fnscale = -1), method = "BFGS", hessian = TRUE) coefficients1 <- my.opt1\$par #Check with the linear model lm.1 <- lm(weight ~ Time + diet1 + diet2 + diet3, data=ChickWeight) ## Make a pretty table output ses1 <- sqrt(diag(solve(-my.opt1\$hessian))) tabl1 <- cbind(coefficients1, ses1) row.names(tabl1) <- c("Intercept","Time", "Diet 1", "Diet 2", "Diet 3", "sigma") xtable(tabl1, digits=3) #Simulate: #We want to compare a chick on diet 1, 2, 3, and 4 #Create sets of covariates for chicks on each diet diet1 <- c(1, median(ChickWeight\$Time), 1, 0, 0) diet2 <- c(1, median(ChickWeight\$Time), 0, 1, 0) diet3 <- c(1, median(ChickWeight\$Time), 0, 0, 1) diet4 <- c(1, median(ChickWeight\$Time), 0, 0, 0) #Simulate betas to get estimation uncertainty library(mvtnorm) set.seed(12345) simbetas <- rmvnorm(1000, mean=my.opt1\$par, sigma=(solve(-my.opt1\$hessian))) #We are going to use a for loop to be clear here. #First, create empty vectors for the y's for each diet simydiet1 <- NULL simydiet2 <- NULL simydiet3 <- NULL simydiet4 <- NULL #For each row of simbetas, simulate a normal random variable for each diet for(i in 1:nrow(simbetas)){ betas <- simbetas[i,1:(ncol(simbetas)-1)] sigma2 <- exp(simbetas[i,ncol(simbetas)]) simydiet1[i] <- rnorm(1,diet1%*%betas, sqrt(sigma2)) simydiet2[i] <- rnorm(1,diet2%*%betas, sqrt(sigma2)) simydiet3[i] <- rnorm(1,diet3%*%betas, sqrt(sigma2)) simydiet4[i] <- rnorm(1,diet4%*%betas, sqrt(sigma2)) } #Plot the predicted weights for the different diets. plot(density(simydiet1), main="Predicted Weight", xlab="Weight", xlim=c(-50, 250), ylim=c(0,.015)) lines(density(simydiet2), col="blue") lines(density(simydiet3), col="orange") lines(density(simydiet4), col="red") legend(-40, .01, c("Diet 1", "Diet 2", "Diet 3", "Diet 4"), col=c("black", "blue", "orange", "red"), lty=1) ############ # Part 2: Apply ########### #Make some random numbers set.seed(01238) mat <- matrix(rnorm(20,5,4), nrow=5, ncol=4) #Basic apply use apply(mat,1,mean) apply(mat,2,median) ############### # Part 3: Apply Challenge problems ############## #Challenge: only find the mean of the first three data points in each row apply(mat,1, function (x) mean(x[1:3])) #Challenge: create a new matrix where all columns sum to 1 mat2 <- apply(mat, 1, function (x) x/sum(x)) #Use apply to check to see if you did it right apply(mat2,2,sum) #Challenge: use apply for the simulated data simydiet1 <- apply(simbetas, 1, function(beta) rnorm(1, diet1%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)])))) simydiet2 <- apply(simbetas, 1, function(beta) rnorm(1, diet2%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)])))) simydiet3 <- apply(simbetas, 1, function(beta) rnorm(1, diet3%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)])))) simydiet4 <- apply(simbetas, 1, function(beta) rnorm(1, diet4%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)])))) ############## # Simulation and Optim Challenge: Put an interaction term on time and diet1 and optimize. ############# X <- cbind(1,ChickWeight\$Time, ChickWeight\$diet1, ChickWeight\$diet2, ChickWeight\$diet3, ChickWeight\$diet1*ChickWeight\$Time) y <- ChickWeight\$weight my.opt1 <- optim(par = rep(0, ncol(X) + 1), fn = ll.normal, y = y, X = X, control = list(fnscale = -1), method = "BFGS", hessian = TRUE) coefficients1 <- my.opt1\$par #Check with the linear model lm.1 <- lm(weight ~ Time*diet1 + diet2 + diet3, data=ChickWeight) ## Make a pretty output ses1 <- sqrt(diag(solve(-my.opt1\$hessian))) tabl1 <- cbind(coefficients1, ses1) row.names(tabl1) <- c("Intercept","Time", "Diet 1", "Diet 2", "Diet 3", "Diet1*Time", "sigma") xtable(tabl1, digits=3) #Challenge: Use simlulation to plot how time influences Chickweight for diet 1 and diet 2 on average library(mvtnorm) set.seed(12345) simbetas <- rmvnorm(1000, mean=my.opt1\$par, sigma=(solve(-my.opt1\$hessian))) time <- seq(0,21) simydiet1 <- matrix(nrow=1000,ncol=22) simydiet2 <- matrix(nrow=1000,ncol=22) diet2 <- c(1, median(ChickWeight\$Time), 0, 1, 0, 0) for(i in 1:length(time)){ diet1 <- c(1, median(ChickWeight\$Time), 1, 0, 0, time[i]) simydiet1[,i] <- apply(simbetas, 1, function(beta) rnorm(1, diet1%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)])))) simydiet2[,i] <- apply(simbetas, 1, function(beta) rnorm(1, diet2%*%beta[-length(beta)], sd=sqrt(exp(beta[length(beta)])))) } diet1mean <- apply(simydiet1,2,mean) diet2mean <- apply(simydiet2,2,mean) plot(time, diet1mean, type="l", col="red", ylab="Average Weight", xlab="Time") lines(time, diet2mean) text(10,90, "Diet 1", col="red") text(16, 110, "Diet 2", col="black")