# Goal: To do OLS by MLE. # OLS likelihood function # Note: I am going to write the LF using sigma2=sigma^2 and not sigma. ols.lf1 <- function(theta, y, X) { beta <- theta[-1] sigma2 <- theta[1] if (sigma2 <= 0) return(NA) n <- nrow(X) e <- y - X%*%beta # t() = matrix transpose logl <- ((-n/2)*log(2*pi)) - ((n/2)*log(sigma2)) - ((t(e)%*%e)/(2*sigma2)) return(-logl) # since optim() does minimisation by default. } # Analytical derivatives ols.gradient <- function(theta, y, X) { beta <- theta[-1] sigma2 <- theta[1] e <- y - X%*%beta n <- nrow(X) g <- numeric(length(theta)) g[1] <- (-n/(2*sigma2)) + (t(e)%*%e)/(2*sigma2*sigma2) # d logl / d sigma g[-1] <- (t(X) %*% e)/sigma2 # d logl / d beta return(-g) } X <- cbind(1, runif(1000)) theta.true <- c(2,4,6) # error variance = 2, intercept = 4, slope = 6. y <- X %*% theta.true[-1] + sqrt(theta.true[1]) * rnorm(1000) # Estimation by OLS -- d <- summary(lm(y ~ X[,2])) theta.ols <- c(sigma2 = d$sigma^2, d$coefficients[,1]) cat("OLS theta = ", theta.ols, "\n\n") cat("\nGradient-free (constrained optimisation) --\n") optim(c(1,1,1), method="L-BFGS-B", fn=ols.lf1, lower=c(1e-6,-Inf,-Inf), upper=rep(Inf,3), y=y, X=X) cat("\nUsing the gradient (constrained optimisation) --\n") optim(c(1,1,1), method="L-BFGS-B", fn=ols.lf1, gr=ols.gradient, lower=c(1e-6,-Inf,-Inf), upper=rep(Inf,3), y=y, X=X) cat("\n\nYou say you want a covariance matrix?\n") p <- optim(c(1,1,1), method="L-BFGS-B", fn=ols.lf1, gr=ols.gradient, lower=c(1e-6,-Inf,-Inf), upper=rep(Inf,3), hessian=TRUE, y=y, X=X) inverted <- solve(p$hessian) results <- cbind(p$par, sqrt(diag(inverted)), p$par/sqrt(diag(inverted))) colnames(results) <- c("Coefficient", "Std. Err.", "t") rownames(results) <- c("Sigma", "Intercept", "X") cat("MLE results --\n") print(results) cat("Compare with the OLS results --\n") d$coefficients # Picture of how the loglikelihood changes if you perturb the sigma theta <- theta.ols delta.values <- seq(-1.5, 1.5, .01) logl.values <- as.numeric(lapply(delta.values, function(x) {-ols.lf1(theta+c(x,0,0),y,X)})) plot(sqrt(theta[1]+delta.values), logl.values, type="l", lwd=3, col="blue", xlab="Sigma", ylab="Log likelihood") grid()