####### Code for HW 2 - Version: 2025 #### Question 1 #### SH_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",") x_date <- SH_da$Date x_P <- SH_da$P x_D <- SH_da$D x_E <- SH_da$E T <- length(x_S) cpi_us <- SH_da$CPI i_us <- SH_da$Long_i T <- length(x_P) lr_p <- log(x_P[-1]/x_P[-T]) lr_d <- log(x_D[-1]/x_D[-T]) lr_e <- log(x_E[-1]/x_E[-T]) inf <- log(cpi_us[-1]/cpi_us[-T]) int <- (i_us[-1])/100 T <- length(lr_p) ## a. Report Regression fit_mod <- lm (lr_p ~ lr_e + inf + int) summary(fit_mod) ## b. Interpret R^2 (the model explains % of the variability of stock returns. Or more precisely, % of the variability of stock returns that is explained by the variability of earnings, inflation & interest rates) ## c. Interpret coefficient beta_1 (If earnings increase by 1%, stock returns increase by beta_1% ## d. Report F-stat in fit_mod output. Based on p-value reject reject H0. ## e. F test of Restrictions (beta_1 = beta_3 = 0) e_u <- fit_mod$residuals RSS_u <- sum(e_u^2) # Unrestricted RSS fit_r <- lm (lr_p ~ inf ) # Restricted model e_r <- fit_r$residuals RSS_r <- sum(e_r^2) # Restricted RSS F_test_1 <- ((RSS_r - RSS_u)/2)/(RSS_u/(T-4)) # F-test F_test_1 qf(.95, df1 = 2, df2= T-4) # F-value from table (if larger than F_test_1 cannot reject H0) p_val <- 1 - pf(F_test_1, df1 = 2, df2= T-4) # p-value of F-test p_val ## You could have also used package car. library(car) linearHypothesis(fit_mod, c("lr_e = 0","int = 0"), test="F") # F: exact test ## f. Testing J=2 Hypothesis with R package car library(car) linearHypothesis(fit_mod, c("inf = 0.5","int = -0.1"), test="F") # F€: exact test #### Question 2 #### fit_mod <- lm (lr_p ~ lr_e + inf + int) sh_ols <- summary(fit_mod)$coef[,1] #extracting OLS estimated parameters from lm (1st row in table) dat_Sh <- data.frame(lr_p, lr_e, inf, int) # create R dataframe to use in lmboot sim_size <- 1000 # R will mention that it's recommended to habe B=sim_size > T. You can set B = 2000 library(lmboot) sh_b <- paired.boot(lr_p ~ lr_e + inf + int, data=dat_Sh, B=sim_size) #paired bootstrap in lm ## a. Mean values for b (bootstrap estimates of b) mean(sh_b$bootEstParam[,1]) # print mean of bootstrap samples for constant mean(sh_b$bootEstParam[,2]) # print mean of bootstrap samples for lr_e mean(sh_b$bootEstParam[,3]) # print mean of bootstrap samples for inf mean(sh_b$bootEstParam[,4]) # print mean of bootstrap samples for int # Note: You can get from lmboot the OLS estimated parameters sh_b$origEstParam # OLS ("Original") estimated parameters ## a. bootstrap bias (OLS estimated parameter - Bootstrap estimated parameter) sh_b$origEstParam[1] - mean(sh_b$bootEstParam[,1]) sh_b$origEstParam[2] - mean(sh_b$bootEstParam[,2]) sh_b$origEstParam[3] - mean(sh_b$bootEstParam[,3]) sh_b$origEstParam[4] - mean(sh_b$bootEstParam[,4]) ## b. Use quantile function to get 95% CI for beta_2 (inf) parameter (percentile method) quantile(sh_b$bootEstParam[,3], probs=c(.025, .975)) #### Question 3 #### ## 3.a - Plot residuals x_resid <- residuals(fit_mod) x_stand_resid <- x_resid/sd(x_resid) # standardized residuals plot(x_resid, typ ="l", col="blue", main ="Shiller Residuals from Model", xlab="Date", ylab="Mod residuals") ## 3.a - Cook's D cooksd <- cooks.distance(fit_mod) # Plot Cook's distance plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") # add cutoff line abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line # add labels text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T), names(cooksd),""), col="red") # add labels ## 3.b - Proportion of Violations of Rules of Thumb T <- length(x_resid) k <- length(fit_mod$coefficients) library(olsrr) # need to install package olsrr sum(x_stand_resid > 2) # Rule of thumb count (5% count is OK) x_lev <- ols_leverage(fit_mod) # leverage residuals sum(x_lev > (2*k+2)/T) # Rule of thumb count (5% count is OK) sum(cooksd > 4/T) # Rule of thumb count (5% count is OK) sum(x_stand_resid > 2)/T # proportion of "outliers" - stand resid sum(x_lev > (2*k+2)/T)/T # proportion of "outliers" - leverage sum(cooksd > 4/T)/T # proportion of "outliers" - Cook's D ## Interpretation: Overall, there's no strong evidence of outliers (only leverage has a proportion slightly bigger than 5%), and there's no evidence of Multicollinearity (VIF and Condition index are all lower than the Rules of Thumb) #### Question 4 #### Check Notes