############# PREDICTABILITY (Code #Pred) ############# Pred_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/GW_pred.csv", head=TRUE, sep=",") names(Pred_da) summary(Pred_da) Date <- Pred_da$yyyymm # extract Dates SP <- Pred_da$Index # extract Shiller's Price Index (S&P) D <- Pred_da$Div_12 # extract S&P's Dividend 12-month sums E <- Pred_da$Earn_12 # extract S&P's Earning 12-month sums BM <- Pred_da$BM # extract Book-to-market Rf <- Pred_da$Rfree # extract Risk-free rate T_bill <- Pred_da$T_bill # extract T_bill yields aaa <- Pred_da$AAA # extract AAA's corporate yields baa <- Pred_da$BAA # extract BAA's corporate yields Long_corp <- Pred_da$Long_yield # extract Long-term corporate yields Long_ret <- Pred_da$Long_return # extract Long-term government yields I <- Pred_da$Inflation # extract Inflation rew <- Pred_da$CRSP_SPevw # extract CRSP equally weighted returns (1926:1 on) rvw <- Pred_da$CRSP_SPvwx # extract CRSP value weighted returns (1926:1 on) T <- length(SP) lr <- log((SP[-1]+D[-1]/12)/SP[-T]) # Define Shiller's log returns dy <- log((D[-1]/12)/SP[-T]) # Dividend Yield dp <- log((D[-1]/12)/SP[-1]) # Dividend Price Ratio ep <- log((E[-1]/12)/SP[-1]) # Earnings Price Ratio de <- log((D[-1]/12)/E[-1]/12) # Dividend Payout Ratio bm <- BM[-1] # Book-to-Market rf <- Rf[-1] # Risk-free tbill <- T_bill[-1] # Dividend Yield Infl <- I[-1] # Inflation dfy <- baa[-1] - aaa[-1] # Default Yield Spread dfr <- Long_corp[-1] - Long_ret[-1] # Default Return Spread lr_x <- lr - rf rew_x <- rew[1] - rf rvw_x <- rvw[-1] - rf ### Plot D/P Ratio dp_n <- (D[-1]/12)/SP[-1] # (Nominal) Dividend Price Ratio dp.ts = ts(dp_n, frequency = 12, start=c(1871, 2)) plot.ts(dp.ts, col="blue",ylab ="D/P", main="( Dividend-Price Ratio (1871:Feb-2023:Mar)") abline(h = mean(dp_n), col = "red") ### Check persistence of D/P Ratio dp_acf <- acf(dp_n, main= "ACF for Dividend-Price Ratio (1871-2023)") ### Predictive (In-Sample) Regressions (Whole Sample) T1 <- length(lr_x) T0 <- 1 y <- lr_x[(T0+1):T1] ## DY x1 <- dy[T0:(T1-1)] fit_dy <- lm(y ~ x1) summary(fit_dy) ## DP x2 <- dp[T0:(T1-1)] fit_dp <- lm(y ~ x2) summary(fit_dp) ## EP x3 <- ep[T0:(T1-1)] fit_ep <- lm(y ~ x3) summary(fit_ep) ## DE x4 <- de[T0:(T1-1)] fit_de <- lm(y ~ x4) summary(fit_de) ## DFY x5 <- dfr[T0:(T1-1)] fit_dfr <- lm(y ~ x5) summary(fit_dfr) ## DFR x6 <- dfy[T0:(T1-1)] fit_dfy <- lm(y ~ x6) summary(fit_dfy) ## Lagged Return lag_y <- lr_x[T0:(T1-1)] fit_lag_y <- lm(y ~ lag_y) summary(fit_lag_y) ## All Together ("The kitchen sink") - Ooooops! fit_all <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + lag_y) summary(fit_all) ## All Together ("The kitchen sink") fit_all <- lm(y ~ x1 + x2 + x3 + x5 + x6 + lag_y) summary(fit_all) ### Predictive (In-Sample) Regressions (1926-end Sample) T0 <- 660 # Decemer 1925 Date[T0] y <- lr[(T0+1):T1] y <- rvw[(T0+1):T1] ## DY x1_25 <- dy[T0:(T1-1)] fit_dy1_25 <- lm(y ~ x1_25) summary(fit_dy1_25) ## DP x2_25 <- dp[T0:(T1-1)] fit_dp1_25 <- lm(y ~ x2_25) summary(fit_dp1_25) ## EP x3_25 <- ep[T0:(T1-1)] fit_ep1_25 <- lm(y ~ x3_25) summary(fit_ep1_25) ## DE x4_25 <- de[T0:(T1-1)] fit_de1_25 <- lm(y ~ x4_25) summary(fit_de1_25) ## DFY x5_25 <- dfy[T0:(T1-1)] fit_dfy1_25 <- lm(y ~ x5_25) summary(fit_dfy1_25) ## DFR x6_25 <- dfr[T0:(T1-1)] fit_dfr1_25 <- lm(y ~ x6_25) summary(fit_dfr1_25) ## Lagged Return lag_y_25 <- lr_x[T0:(T1-1)] fit_lag_y1_25 <- lm(y ~ lag_y_25) summary(fit_lag_y1_25) ## BM x7_25 <- bm[T0:(T1-1)] fit_bm1 <- lm(y ~ x7_25) summary(fit_bm1) ## T_bill x8_25 <- tbill[T0:(T1-1)] fit_tb1 <- lm(y ~ x8_25) summary(fit_tb1) ## All Together ("The kitchen sink") fit_all_25 <- lm(y ~ x1_25 + x2_25 + x3_25 + x5_25 + x6_25 + lag_y_25 + x7_25 + x8_25) summary(fit_all_25) ### Predictive (In-Sample) Regressions k-compounded regressions T0 <- 660 # December 1925 Date[T0] y <- lr_x[(T0+1):T1] y <- rvw[(T0+1):T1] x <- dp[T0:(T1-1)] fit_x <- lm(y ~ x) summary(fit_x) T2 <- length(y) k <- 2 T_k <- as.integer((T2-k)/k)+1 y_k <- matrix(0,T_k,1) # vector to accumulate compounded returns x_k <- matrix(0,T_k,1) # vector to accumulate compounded returns t <- 1 a <- 1 while (t <= T2-k) { y_k[a] <- sum(y[t:(k+t-1)]) x_k[a] <- sum(x[t:(k+t-1)]) a <- a + 1 t <- t + k } fit_per_k <- lm(y_k ~ x_k) summary(fit_per_k) ###### Reading Annual Data and Predictive Annual Reg (Data update up to 2020) Pred_da_a <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/goyal-welch-a_27.csv",head=TRUE,sep=",") names(Pred_da_a) lr_sp <- Pred_da_a$sp_ret # Value weighted S&P returns (including distributions) SP_a <- Pred_da_a$sp500index # Value weighted S&P returns (including distributions) D_a <- Pred_da_a$sp500d12 # extract S&P's Dividend 12-month sums E_a <- Pred_da_a$sp500e12 # extract S&P's Earning 12-month sums aaa_a <- Pred_da_a$aaa # extract AAA's corporate yields baa_a <- Pred_da_a$baa # extract BAA's corporate yields Long_corp_a <- Pred_da_a$ltrate # extract Long-term corporate yields Long_ret_a <- Pred_da_a$ltyld10 # extract Long-term government yields infl_a <- Pred_da_a$infl # extract Inflation bm_a <- Pred_da_a$bkmk # extract Book-to-market SVar_a <- Pred_da_a$svar # extract Variance Rf_a <- Pred_da_a$tbill # extract Risk-free rate eqis <- Pred_da_a$eqis # Equity Share in New Issues ntis <- Pred_da_a$ntis # Net Equity Expansion cay <- Pred_da_a$cay # Lettau and Ludvigson (2001) Cay (consumption, wealth, income) ik <- Pred_da_a$ik # Investment-to-capital csp <- Pred_da_a$csp # Cross-sectional premium TA <- length(lr_sp) dp_a <- log((D_a)/SP_a) # Dividend Price Ratio ep_a <- log((E_a)/SP_a) # Earnings Price Ratio de_a <- log((D_a)/E_a) # Dividend Payout Ratio dfy_a <- baa_a - aaa_a # Default Yield Spread dfr_a <- Long_corp_a - Long_ret_a # Default Return Spread term_a <- Long_ret_a - Rf_a # Long gov bond yield Spread over tbills T0 <- 1 # 1927 y <- lr_sp[(T0+1):TA] - Rf_a[(T0+1):TA]/100 ## DP x1_a <- dp_a[T0:(TA-1)] fit_dpa <- lm(y ~ x1_a) summary(fit_dpa) ## EP x2_a <- ep_a[T0:(TA-1)] fit_epa <- lm(y ~ x2_a) summary(fit_epa) ## DE x3_a <- de_a[T0:(TA-1)] fit_dea <- lm(y ~ x3_a) summary(fit_dea) ## DFY x4_a <- dfy_a[T0:(TA-1)] fit_dfy_a <- lm(y ~ x4_a) summary(fit_dfy_a) ## DFR x5_a <- dfr_a[T0:(TA-1)] fit_dfr_a <- lm(y ~ x5_a) summary(fit_dfr_a) ## BM x6_a <- bm_a[T0:(TA-1)] fit_bma <- lm(y ~ x6_a) summary(fit_bma) ## Price Variance x7_a <- SVar_a[T0:(TA-1)] fit_sv <- lm(y ~ x7_a) summary(fit_sv) ## New Shares in Equity x8_a <- eqis[T0:(TA-1)] fit_eq <- lm(y ~ x8_a) summary(fit_eq) ## Net Equity Expansion x9_a <- ntis[T0:(TA-1)] fit_ntis <- lm(y ~ x9_a) summary(fit_ntis) ## Term x10_a <- term_a[T0:(TA-1)] fit_ntis <- lm(y ~ x10_a) summary(fit_ntis) ## Lagged Return lag_y_a <- lr_sp[T0:(TA-1)] fit_lag_y <- lm(y ~ lag_y_a) summary(fit_lag_y) ## Kitchen Sink x7_a <- ntis[T0:(TA-1)] fit_kit_sink_a <- lm(y ~ x1_a + x2_a + x4_a + x5_a + x7_a + x8_a + x9_a + x10_a + lag_y_a) summary(fit_kit_sink_a) ## csp TCS <- 11 y_a_csp <- lr_sp[(TCS+1):TA] - Rf_a[(TCS+1):TA]/100 csp_a <- csp[TCS:(TA-1)] fit_lag_y_csp <- lm(y_a_csp ~ csp_a) summary(fit_lag_y_csp) ## cay TC <- 19 y_a_c <- lr_sp[(TC+1):TA] - Rf_a[(TC+1):TA]/100 cay_a <- cay[TC:(TA-1)] fit_lag_y_c <- lm(y_a_c ~ cay_a) summary(fit_lag_y_c) ## ik TI <- 21 y_a_ik <- lr_sp[(TI+1):TA] - Rf_a[(TI+1):TA]/100 ik_a <- ik[TI:(TA-1)] fit_lag_y_ik <- lm(y_a_ik ~ ik_a) summary(fit_lag_y_ik) ###### Out-of-sample one-step-ahead Forecasting with Rolling Regression (example: ik as predictor) yy <- y_a_ik # Dependent variable (excess returns) at time t+1 (y_t+1) xx <- ik_a # Independent variable (excess returns) at time t (x_t) Alles = NULL # Initialize empty (a space to put forecast errors) k_for <- 40 # Start of Rolling Sample i <- k_for TF <- length(yy) while (i <= TF-1) { y_tp1 <- yy[1:i] x_t <- xx[1:i] pred_reg <- lm (y_tp1 ~ x_t) # OLS predictive regression b_hat <- pred_reg$coefficients # Extract coefficient y_hat <- b_hat[1]+b_hat[2]*xx[i+1] # t+1 forecast f_e_a <- y_hat - yy[i+1] # t+1 forecast error for model f_e_n <- mean(y_tp1) - yy[i+1] # t+1 forecast error for mean f_2e <- c(f_e_a, f_e_n) # Combine both forecast errors in a vector Alles = rbind(Alles,f_2e) # accumulate forecast errors in rows (two columns) i <- i+1 } # Checking accuracy of forecasts with OOS R^2 mse <- colSums(Alles^2)/(TF-k_for) # MSEs (MSE_A & MSE_N) r2_oos <- as.numeric(1 - mse[1]/mse[2]) # OOS R^2 r2_oos # Testing accuracy of forecasts with Diebold-Mariano dm.test(Alles[,1], Alles[,2], power=2) dm.test(Alles[,1], Alles[,2], power=1) ###### In-of-sample one-step-ahead Forecasting with Rolling Regression yy <- y_a_ik xx <- ik_a Alles = NULL # Initialize empty (a space to put forecast errors) k_for <- 40 # Start of Rolling Sample i <- k_for TF <- length(yy) while (i <= TF) { y_tp1 <- yy[1:i] x_t <- xx[1:i] pred_reg <- lm (y_tp1 ~ x_t) # OLS predictive regression y_hat <- pred_reg$fitted f_e_a <- y_hat[i] - y_tp1[i] f_e_n <- mean(y_tp1) - y_tp1[i] f_2e <- c(f_e_a, f_e_n) Alles = rbind(Alles,f_2e) # accumulate forecast errors in rows (two columns) i <- i+1 } # Checking accuracy of forecasts with OOS R^2 mse <- colSums(Alles^2)/(TF-k_for) r2_is <- as.numeric(1 - mse[1]/mse[2]) # An adapted in-sample R^2 from OOS R^2 -i.e., not a true in-sample R^2 r2_is # Testing accuracy of forecasts with Diebold-Mariano dm.test(Alles[,1], Alles[,2], power=2) dm.test(Alles[,1], Alles[,2], power=1) #### Replicationg Cooper & Priestly (2008) Sp <- dataGoyalWelchAnnual$Index D12 <- dataGoyalWelchAnnual$D12 E12 <- dataGoyalWelchAnnual$E12 rf <- dataGoyalWelchAnnual$Rfree Rf <- 1+rf Rm <- log(Sp+D12)-lag(log(Sp),-1) eRm <- na.omit(Rm-log(Rf)) dp <- na.omit(log(D12)-log(Sp)) dy <- na.omit(log(D12)-lag(log(Sp),-1)) ep <- na.omit(log(E12)-log(Sp)) library(dyn) in sample diff calc annual <− function(predictor){ # for annual data start when having 21 yearsfirst obs <− 21 data <− merge(eRm,predictor,all=FALSE) names(data) <− c("eRm","predictor") demeaned2 <− zoo((as.numeric(data$eRm − mean(data$eRm))^2), order.by=index(data$eRm)) 10 regr <− dyn$lm(data$eRm ˜lag(data$predictor,−1)) res2 <− as.numeric(regr$residuals^2) res2 <− zoo(res2,order.by=index(data$eRm)[−1]) # there is one less residual than mean differetmp <− merge(demeaned2,res2,all=FALSE) diff <− cumsum(tmp$demeaned2)−cumsum(tmp$res2) diff <− diff − as.numeric(diff[first obs]) # to align at zero on the first oos observationreturn (diff) } lnIP <- log(mIndProd) lnIP <- window(lnIP,start=c(1947,11)) t <- 1:length(lnIP) t2 <- t^2 regr <- lm(lnIP ~ t + t2) Gap1 <- regr$residuals data <- merge(eRm,lag(Gap1,-2),all=FALSE) EqtyPrem <- data$eRm gap1 <- data[,2] regr1 <- lm(EqtyPrem ~ gap1)