############# Goodness of Fit Measures AND Maximum Likelihood Estimation & Data Problems (Code #5c) ############# #### Regression & R^2 ### 3-factor FF Regression for IBM: Compute R^2 & Adj-R^2 SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv", head=TRUE, sep=",") x_ibm <- SFX_da$IBM # extract IBM price data x_Mkt_RF <- SFX_da$Mkt_RF # extract Market excess returns (in %) x_RF <- SFX_da$RF # extract Risk-free rate (in %) x_SMB <- SFX_da$SMB x_HML <- SFX_da$HML # Define log returns & adjust size of variables accordingly T <- length(x_ibm) # sample size lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) # create IBM log returns (in decimal returns) Mkt_RF <- x_Mkt_RF[-1]/100 # Adjust sample size to ( T-1) by removing 1st obs RF <- x_RF[-1]/100 # Adjust sample size and use decimal returns. SMB <- x_SMB[-1]/100 HML <- x_HML[-1]/100 y <- lr_ibm - RF T <- length(y) k <- 4 # Number of regressors (to be used in AIC) fit_ibm_ff3 <- lm(y ~ Mkt_RF + SMB + HML) # 3-factor FF regression summary(fit_ibm_ff3) e_ibm_ff3 <- fit_ibm_ff3$residuals # Extract from lm function OLS residuals RSS_ibm_ff3 <- sum(e_ibm_ff3^2) # RSS R2_ibm_ff3 <- 1 - RSS_ibm_ff3/as.numeric(t(y)%*%y) # R^2 AIC <- log(RSS_ibm_ff3/T) + 2*k/T # Akaike's IC summary(fit_ibm_ff3)$r.squared summary(fit_ibm_ff3)$adj.r.squared ### R^2 for 3-factor FF Regression for IBM + Noise factor Noise_factor <- rnorm(T) # Create irrelevant variable Noise factor fit_ibm_ff3_N <- lm(y ~ Mkt_RF + SMB + HML + Noise_factor) # 3-factor FF regression + Noise factor summary(fit_ibm_ff3_N) summary(fit_ibm_ff3_N)$r.squared # Check that R^2 increased relative to R2_ibm_ff3 summary(fit_ibm_ff3_N)$adj.r.squared #### Maximum Likelihood Example: Toss a coin 100. We get 60 heads p <- .50 # Initial value for p (=.50 if I believe coin is fair) h <- 60 # 60 heads n <- 100 # 100 times factorial(n)/(factorial(n-h)*factorial(h)) # combination of 60 heads out of 100 choose(n,h) # R function for combination of 60 heads out of 100 prob_h <- choose(n,h)*p^h*(1-p)^(n-h) # using binomial probability of observing h heads prob_h dbinom(h,100,p) # R way of calculating binomial prob of observing h heads # Calculate the p that maximizes the probability of observing 60 heads # Step 1 - Create Likelihood function likelihood_b <- function(p){ # Create a prob function with p as an argument dbinom(h, 100, p) } likelihood_b(p) # print likelihood negative_likelihood_b <- function(p){ # R uses a minimization algorithm, change sign of function dbinom(h, 100, p) * (-1) } negative_likelihood_b(p) # Step 2 - Maximize (or Minimize negative Likelihood function) nlm(negative_likelihood_b, p, stepmax=0.5) #nlm minimizes the function #### Normal sample: 5, 6, 7, 8, 9, 10 with unknown mean (& known SD=1) mu <- 0 # Initial value for mu (=mean) x_6 <- seq(5,10,by= 1) # observed data x_6 = (5, 6, 7, 8, 9, 10) x_6 dnorm(5, mu, sd=1) # probability of observing x=5 when data is from N(0,1) dnorm(x_6) # probability of observing whole vector x_6 when data is from N(0,1) prod(dnorm(x_6)) # likelihood function evaluated at observed data l_f <- prod(dnorm(x_6)) # assign l_f to the likelihood function evaluated at observed data l_f log(l_f) # log likelihood function evaluated at observed data sum(log(dnorm(x_6))) # compact notation for log likelihood function evaluated at observed data # Calculate the mu that maximizes the probability of observing {5, 6, 7, 8, 9, 10} # Step 1 - create Likelihood function likelihood_n <- function(mu){ # Create a prob function with mu as an argument sum(log(dnorm(x_6, mu, sd=1))) } likelihood_n(mu) # print likelihood negative_likelihood_n <- function(mu){ # R uses a minimization algorithm, change sign of function sum(log(dnorm(x_6, mu, sd=1))) * (-1) } negative_likelihood_n(mu) # Step 2 - Maximize (or Minimize negative Likelihood function) results_n <- nlm(negative_likelihood_n, mu, stepmax=2) # nlm minimizes the function results_n # Show nlm results mu_max <- results_n$estimate # Extract estimates mu_max # Should be equal to mean likelihood_n(mu_max) # Check max value at mu_max # Standard Error of Mu results_n <- nlm(negative_likelihood_n, mu, stepmax=2, hessian=TRUE) mu_hess <- results_n$hessian # Extract estimated Hessian (Matrix of 2nd derivatives) mu_hess # Show Hessian cov<-solve(mu_hess) # Invert hessian to get cov(mu_MLE) cov # Show variance for mu_MLE stderr<-sqrt(diag(cov)) # Compute S.E. for mu_MLE stderr #### Normal sample: 5, 6, 7, 8, 9, 10 with unknown mean & SD mu <- 0 # Initial value for mu (=mean) sig <- 1 # Initial value for sig (=SD) x_6 <- seq(5,10,by= 1) x_6 dnorm(x_6) sum(log(dnorm(x_6))) # log likelihood function evaluated at observed data x <- c(0,1) # Step 1 - create Likelihood function likelihood_lf <- function(x){ # Create a prob function with mu & sig as an argument mu <- x[1] sig <- x[2] sum(log(dnorm(x_6, mu, sd=sig))) } likelihood_lf(x) # print likelihood evaluated at initial values negative_likelihood_lf <- function(x){ # R uses a minimization algorithm, change sign of function mu <- x[1] sig <- x[2] sum(log(dnorm(x_6, mu, sd=sig))) * (-1) } negative_likelihood_lf(x) # Step 2 - Maximize (or Minimize negative Likelihood function) results_lf <- nlm(negative_likelihood_lf, x, stepmax=4, hessian=TRUE) # nlm minimizes the function results_lf # displays nlm results par_max <- results_lf$estimate # Extract estimates par_max # Should be equal to sample mean likelihood_lf(par_max) # Check max value of likelihood function # If sigma^2 is also unknown par_max[2]^2 # Second element in par_max vector is sigma2 sig_2 <- sum((x_6 - mu_max)^2)/6 # Dividing by T=6 var(x_6) # s^2 => Dividing by (T-1)=5 # Compute S.E. by inverting Hessian coeff_hess <- results_lf$hessian # Extract Hessian coeff_hess # Show Hessian cov_lf <- solve(coeff_hess) # invert Hessian to get cov(MLE estimates) cov_lf # Show covariance se_lf <- sqrt(diag(cov_lf)) # Compute standard errors of MLE estimates se_lf par_max[1]/se_lf[1] # t-ratio for mu par_max[2]/se_lf[2] # t-ratio for sigma #### A more general construction of a Likelihood function under Normality loglike_norm <- function(x, dat) # always use x to hold parameter values { mu <- x[1] sig <- x[2] T <- length(dat) loglike<- -T * log(sig*sqrt(2*pi)) - sum((1/(2*sig^2)) * (dat - mu)^2) loglike <- loglike * (-1) # Negative log likelihood } x0 <-c(0,1) lf <- loglike_norm(x0,x_6) lf results_lf <- nlm(loglike_norm, x0, dat=x_6, stepmax=4, hessian=TRUE) # nlm minimizes the function results_lf # displays nlm results coeff_max <- results_lf$estimate # Extract estimates coeff_hess <- results_lf$hessian # Extract hessian coeff_hess #display mles cov_lf <- solve(coeff_hess) # invert hess to get cov(MLE estimates) cov_lf # Display covariance se_lf <- sqrt(diag(cov_lf)) # Compute standard errors of MLE estimates se_lf #### Linear Model - Fama-French 3-factor Model SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",") names(SFX_da) x_sp <- SFX_da$SP500 x_ibm <- SFX_da$IBM x_Mkt_RF<- SFX_da$Mkt_RF x_SMB <- SFX_da$SMB x_HML <- SFX_da$HML x_RF <- SFX_da$RF T <- length(x_ibm) lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) lr_sp <- log(x_sp[-1]/x_sp[-T]) x0 <- matrix(1,T-1,1) Mkt_RF <- x_Mkt_RF[-1]/100 SMB <- x_SMB[-1]/100 HML <- x_HML[-1]/100 RF <- x_RF[-1]/100 ibm_x <- lr_ibm - RF X <- cbind(x0, Mkt_RF, SMB, HML) # OLS results fit_ibm_ff3 <- lm(ibm_x ~ Mkt_RF + SMB + HML) summary(fit_ibm_ff3) # Step 1 - Negative Likelihood function likelihood_ff3 <- function(theta, y ,X) { N <- nrow(X) k <- ncol(X) beta <- theta[1:k] sigma2 <- theta[k+1]^2 e <- y - X%*%beta logl <- -.5*N*log(2*pi) -.5*N*log(sigma2) - ((t(e)%*%e)/(2*sigma2)) return(-logl) } theta <- c(0, 1, 1, 1, .1) # initial values likelihood_ff3(theta,ibm_x,X) # Step 2 - Maximize (or Minimize negative Likelihood function) results_ff3 <- nlm(likelihood_ff3, theta, hessian=TRUE, y=ibm_x, X=X) # nlm minimizes the function results_ff3 # displays nlm results par_max_ff3 <- results_ff3$estimate # Extract estimates par_max_ff3 # Should be equal to sample mean likelihood_ff3(par_max_ff3,ibm_x,X) # Check max value of likelihood function # Step 3 - Compute S.E. by inverting Hessian coeff_hess_ff3 <- results_ff3$hessian # Extract Hessian coeff_hess_ff3 # Show Hessian matrix cov_ff3 <- solve(coeff_hess_ff3) # invert Hessian to get cov(MLE estimates) cov_ff3 # Show covariance matrix se_ff3 <- sqrt(diag(cov_ff3)) # Compute standard errors of MLE estimates se_ff3 (par_max_ff3[2] -1)/se_ff3[2] # t-test for H0: beta=1 par_max_ff3[5]/se_ff3[5] # t-ratio for sigma #### Inflate data (work with 1% instead of .01) to avoid strange values for (positive) negative log L ibm_x100 <- (lr_ibm - RF)*100 X100 <- cbind(x0, Mkt_RF, SMB, HML)*100 fit_ff3100 <- lm(ibm_x100 ~ X100 - 1) summary(fit_ff3100) theta <- c(0, 1, 1, 1, .1) # initial values likelihood_ff3(theta,ibm_x100,X100) results_ff3 <- nlm(likelihood_ff3, theta, hessian=TRUE, y=ibm_x100, X=X100) # nlm minimizes the function results_ff3 # displays nlm results par_max_ff3 <- results_ff3$estimate # Extract estimates par_max_ff3 # Should be equal to sample mean likelihood_ff3(par_max_ff3,ibm_x100,X100) # Check max value of likelihood function ##### Data Problems: Outliers & Multicollinearity y <- ibm_x x <- cbind(x0, Mkt_RF, SMB, HML) k <- ncol(x) dat_xy <- data.frame(y, x) summary(fit_ibm_ff3) y[10] <- -0.85 y[90] <- 0.95 fit_ibm_ff3_out <- lm(y ~ Mkt_RF + SMB + HML) summary(fit_ibm_ff3_out) # Plot residuals x_resid <- residuals(fit_ibm_ff3) plot(x_resid, typ ="l", col="blue", main ="IBM Residuals from 3 FF Factor Model", xlab="Date", ylab="IBM residuals") # Plot standardized residuals x_stand_resid <- x_resid/sd(x_resid) # standardized residuals plot(x_stand_resid, typ ="l", col="blue", main ="IBM Standardized Residuals from 3 FF Factor Model", xlab="Date", ylab="IBM residuals") cooksd <- cooks.distance(fit_ibm_ff3) # plot Cook's distance plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") # add cutoff line abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line # add labels text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T), names(cooksd),""), col="red") # add labels # influential row numbers influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))]) # print first 10 influential observations. head(data_xy[influential, ], n=10L) install.packages("olsrr") library(olsrr) # need to install package olsrr sum(x_stand_resid > 2) # Rule of thumb count (5% count is OK) x_lev <- ols_leverage(fit_ibm_ff3) # leverage residuals sum(x_lev > (2*k+2)/T) # Rule of thumb count (5% count is OK) sum(cooksd > 4/T) # Rule of thumb count (5% count is OK) sum(x_stand_resid > 2)/T # proportion of "outliers" sum(x_lev > (2*k+2)/T)/T # proportion of "outliers" sum(cooksd > 4/T)/T # proportion of "outliers" ols_plot_resid_stand(fit_ibm_ff3) # Plot Standardize residuals ols_plot_cooksd_bar(fit_ibm_ff3) # Plot Cook’s D measure ols_plot_dfbetas(fit_ibm_ff3) # Plot Difference in betas ols_vif_tol(fit_ibm_ff3) ols_eigen_cindex(fit_ibm_ff3)