############# LM Tests & Model Specification, Dummy Variables and Structural Change (Code #9) ############# SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",") x_ibm <- SFX_da$IBM x_xom <- SFX_da$XOM x_ge <- SFX_da$GE x_Mkt_RF<- SFX_da$Mkt_RF x_SMB <- SFX_da$SMB x_HML <- SFX_da$HML x_RF <- SFX_da$RF T <- length(x_ibm) lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) lr_xom <- log(x_ibm[-1]/x_ibm[-T]) x0 <- matrix(1,T-1,1) Mkt_RF <- x_Mkt_RF[-1]/100 SMB <- x_SMB[-1]/100 HML <- x_HML[-1]/100 RF <- x_RF[-1]/100 ibm_x <- lr_ibm - RF # IBM excess returns ### Specification Test: LM Tests to test for Omitted Variables (SMB + HML in CAPM) T <- length(ibm_x) fit_ibm_capm <- lm (ibm_x ~ Mkt_RF) resid_r <- fit_ibm_capm$residuals # extract residuals from R model fit_lm <- lm (resid_r ~ Mkt_RF + SMB + HML) # auxiliary regression summary(fit_lm) R2_r <- summary(fit_lm)$r.squared # extract R2 from fit_lm R2_r LM_test <- R2_r * T LM_test qchisq(.95, df = 2) # chi-squared (df=2) value at 5% level p_val <- 1 - pchisq(LM_test, df = 2) # p-value of LM_test p_val ### Specification Test: LM Tests to test for Non linearities (Omitted Variable are Mkt_RF^2, SMB^2 + HML^2) fit_ibm_ff3 <- lm (ibm_x ~ Mkt_RF + SMB + HML) Mkt_RF2 <- Mkt_RF^2 SMB2 <- SMB^2 HML2 <- HML^2 resid_r <- fit_ibm_ff3$residuals fit_lm <- lm (resid_r ~ Mkt_RF + SMB + HML + Mkt_RF2 + SMB2 + HML2) R2_r <- summary(fit_lm)$r.squared LM_test <- R2_r * T LM_test qchisq(.95, df = 3) # 95% quantile for Chi-square distribution p_val <- 1 - pchisq(LM_test, df = 3) # p-value of LM_test p_val ### Specification Test: RESET Test y_hat <- fitted(fit_ibm_ff3) y_hat2 <- y_hat^2 fit_ramsey <- lm(ibm_x ~ Mkt_RF + SMB + HML + y_hat2) summary(fit_ramsey) y_hat4 <- y_hat^4 fit_ramsey2 <- lm(ibm_x ~ Mkt_RF + SMB + HML + y_hat4) summary(fit_ramsey2) fit_ramsey2 <- lm(ibm_x ~ Mkt_RF + SMB + HML + y_hat2 + y_hat4) summary(fit_ramsey2) ## Using R package lmtest library(lmtest) resettest(fit_ibm_ff3, power=2, type="fitted") resettest(fit_ibm_ff3, power=4, type="fitted") #### Dummy Variables ## Creating a Dummy Variable # By Characteristc (Male, Female) df <- data.frame(income=c(45000, 48000, 49000, 51000, 54000, 57000, 65000, 69000, 77000, 78000, 83000, 98000, 104000, 107000, 120000), age=c(23, 25, 24, 29, 29, 30, 38, 36, 40, 59, 52, 64, 53, 65, 57), status=c('Female', 'Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Male')) # view data frame df #create dummy variables male <- ifelse(df$status == 'Male', 1, 0) female <- ifelse(df$status == 'Female', 1, 0) male lm_gen <- lm(df$income ~ df$age + male) summary(lm_gen) # By Greater/Lower Than a threshold over30 <- ifelse(df$age >= 30, 1, 0) over30 lm_age <- lm(df$income ~ over30 + male) summary(lm_age) # By Time of the Year (Seasonal. Example: Quarterly) q1 <- rep(c(1,0,0,0), 5) # First Quarter Dummy q1 q4 <- rep(c(0,0,0,1), 5) # Fourth Quarter Dummy q4 # By Time of the Year (Regimes. Example: Structural Break (SB) at t_sb = 10) T <- 22 # Length of Data t_sb <- 10 # Timing of SB T1 <- T-t_sb # Number of Observations after SB D_c0 <- rep(0,10) # Dummy_t = 0 if t <= t_sb D_c1 <- rep(1,T1) # Dummy_t = 1 of t > t_sb D_c <- c(D_c0,D_c1) # SB Dummy variable ### Introducing Seasonal Factors in XOM x_xom <- SFX_da$XOM # Extract XOM prices T <- length(x_xom) lr_xom <- log(x_xom[-1]/x_xom[-T]) xom_x <- lr_xom - RF fit_xom_ff3 <- lm(xom_x ~ Mkt_RF + SMB + HML) # FF regression for XOM returns summary(fit_xom_ff3) T <- length(xom_x) Summ <- rep(c(0,0,0,0,0,0,1,1,1,0,0,0), round(T/12)+1) # Create Summer dummy Fall <- rep(c(0,0,0,0,0,0,0,0,0,1,1,1), round(T/12)+1) # Create Fall dummy Wint <- rep(c(1,1,1,0,0,0,0,0,0,0,0,0), round(T/12)+1) # Create Winter dummy T1 <- T+1 Fall_1 <- Fall[2:T1] # Adjusting sample (starts in Feb) Wint_1 <- Wint[2:T1] Summ_1 <- Summ[2:T1] # Model Seasonal Factors only in constant fit_xom_sea <- lm(xom_x ~ Mkt_RF + SMB + HML + Fall_1 + Wint_1 + Summ_1) summary(fit_xom_sea) ## F-test for seasonal factors (constant only) library(lmtest) waldtest(fit_xom_sea, fit_xom_ff3) # Model with Seasonal Dummies is the U Model ## Model Seasonal factors in Constant and Slopes (only interacting Winter Dummy) Mkt_W <- Mkt_RF*Wint_1 SMB_W <- SMB*Wint_1 HML_W <- HML*Wint_1 fit_xom_s2 <- lm(xom_x ~ Mkt_RF + SMB + HML + Fall_1 + Wint_1 + Summ_1 + Mkt_W + SMB_W + HML_W) summary(fit_xom_s2) ## F-test for seasonal factors (constant and slopes) waldtest(fit_xom_s2, fit_xom_ff3) ### January Effect ## XOM T <- length(ibm_x) Jan <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (round(T)/12+1)) # Create January dummy T2 <- T+1 Jan_1 <- Jan[2:T2] # Adjust sample fit_xom_ff3_Jan <- lm (xom_x ~ Mkt_RF + SMB + HML + Jan_1) # Auxiliary Regression summary(fit_xom_ff3_Jan) ## IBM fit_ibm_ff3_Jan <- lm (ibm_x ~ Mkt_RF + SMB + HML + Jan_1) summary(fit_ibm_ff3_Jan) res_ibm_ff3_Jan <- fit_ibm_ff3_Jan$residuals plot(res_ibm_ff3_Jan, type="l", main = "IBM residuals") ### Removing one Observation dec_1992 <- rep(0,T) # Define Dec 1992 dummy dec_1992[239] <- 1 # Define Dec 1992 dummy (=1 if Dec 1992) fit_d92 <- lm (ibm_x ~ Mkt_RF + SMB + HML + Jan_1 + dec_1992) summary(fit_d92) res_d92 <- fit_d92$residuals plot(res_d92, type="l", col="blue", main = "IBM residuals withoud Dec 1992 observation") #### Structural Test - Chow Test for 3-factor FF Model for IBM computing RSS_U & RSS_R y <- ibm_x x0 <- matrix(1,T,1) x <- cbind(Mkt_RF,SMB,HML) # Regressors (constant included) T <- length(y) t_s <- 342 # Structural break date (End of 1st-regime) ## Restricted Model (Pooling all data) fit_R <- lm (y ~ x) # Restricted Regression e_R <- fit_R$residuals # regression residuals, e k <- length(fit_R$coefficients) # Number of parameters RSS_R <- sum(e_R^2) # Restricted RSS ## Unrestricted Model (Computing RSS1 & RSS2) # RSS1 y_1 <- y[1:t_s] x_u1 <- x[1:t_s,] fit_ibm_U1 <- lm (y_1 ~ x_u1) # Unrestricted Regression 1 summary(fit_ibm_U1) e_U1 <- fit_ibm_U1$residuals # regression residuals, e RSS1 <- sum(e_U1^2) # Unrestricted RSS 1 # RSS2 kk = t_s+1 # Starting observation for 2nd-regime y_2 <- y[kk:T] x_u2 <- x[kk:T,] fit_ibm_U2 <- lm (y_2 ~ x_u2) # Unrestricted Regression 2 summary(fit_ibm_U2) e_U2 <- fit_ibm_U2$residuals # regression residuals, e RSS2 <- sum(e_U2^2) # Unrestricted RSS 2 # Chow-test (F-test) F <- ((RSS_R - (RSS1+RSS2))/k)/((RSS1+RSS2)/(T - 2*k)) # F-test F df2 <- T -2 *k qf(.95, df1=k, df2) # 95% quantile value of F-test p_val <- 1 - pf(F,df1=k, df2) # p-value of F-test p_val #### Structural Test - Chow Test for 3-factor FF model for IBM using library strucchange library(strucchange) sctest(y ~ x, type = "Chow", point = t_s) #### Structural Test - Effect on Coefficients (GDP growth AR(1) model with SB at October 1973 GDP_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/GDP_q.csv", head=TRUE, sep=",") x_date <- GDP_da$DATE x_gdp <- GDP_da$GDP x_dummy <- GDP_da$D73 T <- length(x_gdp) t_s <- 108 # TSB = Oct 1973 x_date[108] ## Plot GDP Level x.ts = ts(x_gdp, frequency = 4, start=c(1947, 2)) plot.ts(x.ts, col="blue",ylab ="GDP level", main="U.S. GDP: 1947-2023") ## Changes and Plot of changes lr_gdp <- log(x_gdp[-1]/x_gdp[-T]) e.ts = ts(lr_gdp, frequency = 4, start=c(1947, 2)) plot.ts(e.ts, col="green",ylab ="GDP growth rate ", main="GDP quarterly growth rate: 1947-2023") mean(lr_gdp[1:t_s])*4 mean(lr_gdp[t_s:T])*4 sd(lr_gdp0[1:t_s])*sqrt(4) sd(lr_gdp0[t_s:T])*sqrt(4) ## Create gdp_g(t) & gdp_g(t-1) T <- length(lr_gdp) lr_gdp0 <- lr_gdp[-1] # GDP growth rate: We remove first observation lr_gdp1 <- lr_gdp[-T] # Lagged GDP growth rate: We remove last observation T <- length(lr_gdp0) t_s <- t_s -1 # Adjust t_s (we lost the first observation) ## Restricted Model (No structural change) fit_ar1 <- lm(lr_gdp0 ~ lr_gdp1) summary(fit_ar1) #### Structural Test - Chow Test for GDP growth with strucchange library library(strucchange) sctest(lr_gdp0 ~ lr_gdp1, type = "Chow", point = t_s) #### Structural Test - Chow Test for GDP growth computing RSS_U & RSS_R y <- lr_gdp0 x1 <- lr_gdp1 T <- length(y) x0 <- matrix(1,T,1) x <- cbind(x0,x1) k <- ncol(x) ## Restricted Model (Pooling all data) fit_ar1 <- lm(lr_gdp0 ~ lr_gdp1) # Fitting AR(1) (Restricted) Model summary(fit_ar1) e_ar1_R <- fit_ar1$residuals # regression residuals, e RSS_R <- sum(e_ar1_R^2) # Restricted RSS ## Unrestricted Model (Computing RSS1 & RSS2) # RSS1 y_1 <- y[1:t_s] x_u1 <- x1[1:t_s] fit_ar1_U1 <- lm (y_1 ~ x_u1) # Unrestricted Regression 1 e_ar1_U1 <- fit_ar1_U1$residuals # regression residuals, e RSS1 <- sum(e_ar1_U1^2) # Unrestricted RSS 1 # RSS2 kk <- t_s + 1 # Starting observation for 2nd-regime y_2 <- y[kk:T] x_u2 <- x1[kk:T] fit_ar1_U2 <- lm (y_2 ~ x_u2) # Unrestricted Regression 2 e_ar1_U2 <- fit_ar1_U2$residuals # regression residuals, e RSS2 <- sum(e_ar1_U2^2) # Unrestricted RSS 2 # Chow-test (F-test) b_ar1 <- fit_ar1_U2$coefficients k <- length(b_ar1) F <- ((RSS_R - (RSS1+RSS2))/k)/((RSS1+RSS2)/(T - 2*k)) # F-test F # Unrestricted Model with a Before & After Dummy (D73) (Introducing different coefficients for both Regimes) T1 <- T - t_s # Number of Observations after SB D73_0 <- rep(0,t_s) # Dummy_t = 0 if t <= t_s D73_1 <- rep(1,T1) # Dummy_t = 1 of t > t_s D73 <- c(D73_0,D73_1) # SB Dummy variable t_s <- 108 lr_gdp1_D73 <- lr_gdp1 * D73 # interactive dummy (effect on slope) fit_ar1_d_2 <- lm(lr_gdp0 ~ lr_gdp1 + D73 + lr_gdp1_D73) summary(fit_ar1_d_2) # F-test for structural break (D73 no impact on constant and slope) library(car) linearHypothesis(fit_ar1_d_2, c("D73 = 0","lr_gdp1_D73 = 0"), test="F") # Forecating with AR(1) Model with and without Structural break gdp_2024_1 <- lr_gdp0[T] # GDP 2024:I # Forecast 2024:II with structural break (0.008953 + 0.003779) + (0.467457 + (-0.326809)) * gdp_2024_1 # Forecast 2024:II without structural break 0.011411 + 0.263353 * gdp_2024_1