######################## CODE FOR CLASS PROJECT: PPP (As run in class for SEK/USD) ##################### ## You need to adapt it to your currency and your data (ppp_m.csv) #### FMX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/ppp_proj_m.csv", head=TRUE, sep=",") names(FMX_da) summary(FMX_da) x_date <- FMX_da$Date us_CPI <- FMX_da$US_CPI swed_CPI <- FMX_da$SWED_CPI mal_CPI <- FMX_da$MAL_CPI S_sek <- FMX_da$SEK_USD S_myr <- FMX_da$MYR_USD S_usd <- FMX_da$USD_BROAD x_Mkt <- FMX_da$Mkt_RF x_SMB <- FMX_da$SMB x_HML <- FMX_da$HML x_RF <- FMX_da$RF x_oil <- FMX_da$Crude_WTI_Oil x_gold <- FMX_da$Gold T <- length(us_CPI) us_I <- log(us_CPI[-1]/us_CPI[-T]) swed_I <- log(swed_CPI[-1]/swed_CPI[-T]) mal_I <- log(mal_CPI[-1]/mal_CPI[-T]) e_sek <- log(S_sek[-1]/S_sek[-T]) e_myr <- log(S_myr[-1]/S_myr[-T]) e_usd <- log(S_usd[-1]/S_usd[-T]) oil <- log(x_oil[-1]/x_oil[-T]) gold <- log(x_gold[-1]/x_gold[-T]) inf_d <- swed_I - us_I inf_dm <- mal_I - us_I Mkt_RF <- x_Mkt[-1] SMB <- x_SMB[-1] HML <- x_HML[-1] RF <- x_RF[-1] S <- S_sek[-1] #### 3.1-A Visual Plots plot(e_sek, inf_d, col="blue", ylab ="(I_d - I_f)", xlab ="e_f") title("SEK/USD: Inflation rate differential vs Changes in S_t") plot(e_myr, inf_dm, col="blue", ylab ="(I_d - I_f)", xlab ="e_f") title("MYR/USD: Inflation rate differential vs Changes in S_t") #### 3.1-B PPP Regression & Tests ### B.1 Regression fit_ppp <- lm(e_sek ~ inf_d) summary(fit_ppp) fit_ppp_m <- lm(e_myr ~ inf_dm) summary(fit_ppp_m) #### B.2 Outliers library(olsrr) # need to install package olsrr dat_xy <- data.frame(e_sek, inf_d) cooksd <- cooks.distance(fit_ppp) # plot cook's distance plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") # add cutoff line abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line # add labels text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T), names(cooksd),""), col="red") # add labels # influential row numbers influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))]) # print first 10 influential observations. head(dat_xy[influential, ], n=10) b_s <- fit_ppp$coefficients k <- length(b_s) # k: number of parameters x_resid <- residuals(fit_ppp ) x_stand_resid <- x_resid/sd(x_resid) # standardized residuals sum(x_stand_resid > 2) # Rule of thumb count (5% count is OK) sum(x_stand_resid > 2)/T # Proportion of stand residuals > 5% x_lev <- ols_leverage(fit_ppp) # leverage residuals sum(x_lev > (2*k+2)/T) # Rule of thumb count (5% count is OK) sum(x_lev > (2*k+2)/T)/T # Proportion of lev obs > 5% sum(cooksd > 4/T) # Rule of thumb count (5% count is OK) sum(cooksd > 4/T)/T # Proportion of cook's D > 5% ### (Extra) Plots of outlier detection stats ols_plot_resid_stand(fit_ppp) # Plot standardized residuals ols_plot_cooksd_bar(fit_ppp) # Plot Cook’s D measure ols_plot_dffits(fit_ppp) # Plot Difference in fits ols_plot_dfbetas(fit_ppp) # Plot Difference in betas #### B.3 QLRt: Structural Break - Estimating breaking point date (with library desk) library(desk) T<- length(e_sek) pie <- .15 T0 <- round(T * pie) T1 <- round(T *(1-pie)) my.qlr <- qlr.test(fit_ppp, from = T0, to = T1, sig.level = 0.05, details = TRUE) plot(my.qlr, col = "red", main = "QLR Test: Relative PPP Model - 1973-2024") # Plot test results max(my.qlr$f.stats) my.qlr$breakpoint # Extract breakpoint observation SFX_da$Date[my.qlr$breakpoint] # Print date qlr.cv(T, L = 2, sig.level = 0.05) #### B.4 - Jarque-Bera Normality test using library tseries library(tseries) e_s <- fit_ppp$residuals jarque.bera.test(e_s) #### B.5 - Heteroscedasticity: GQ test using library lmtest library(lmtest) gqtest(fit_ppp, fraction = .20) #### Heteroscedasticity: White test e_s <- fit_ppp$residuals e_s2 <- e_s^2 # Step 2 – squared residuals inf_d2 <- inf_d^2 fit_W <- lm (e_s2 ~ inf_d + inf_d2) # Step 2 – Auxiliary regression b_W <- fit_W$coefficients m_df <- length(b_W) - 1 # degrees of freedom Re_2 <- summary(fit_W)$r.squared # Step 2 – Keep R^2 from Auxiliary reg LM_W_test <- Re_2 * T # Step 3 – Compute LM Test: R^2 * T LM_W_test p_val <- 1 - pchisq(LM_W_test, df = m_df) # p-value of LM_test p_val #### (Extra) Heteroscedasticity: BP test bptest(fit_ppp) #### B.6 - Heteroscedasticity: LB for squared residuals (time-varying volatility) Box.test(e_s2, lag=4, type="Ljung-Box") Box.test(e_s2, lag=12, type="Ljung-Box") #### B.7 - Autocorelation: BG test bgtest(fit_ppp, order=4) bgtest(fit_ppp, order=12) #### B.8 - Autocorrelation: DW test dwtest(fit_ppp) #### (Extra) Autocorrelation: Q & LB tests (on residuals) Box.test(e_s, lag=4, type="Ljung-Box") Box.test(e_s, lag=12, type="Ljung-Box") #### B.9 - Test PPP (alpha=0 & Beta=1) library(car) linearHypothesis(fit_ppp,c("(Intercept) = 0","inf_d = 1"),test="Chisq") # Asymptotic Chi-squar test #### B.10 - Robust SE: OLS, & NW fit_ppp <- lm(e_sek ~ inf_d) # OLS Regression with lm b_sek <-fit_ppp$coefficients # Extract OLS coefficients from fit_ppp SE_OLS <- sqrt(diag(vcov(fit_ppp))) # Extract OLS SE from fit_ppp t_OLS <- b_sek/SE_OLS # Calculate OLS t-values b_sek SE_OLS t_OLS library(sandwich) NW <- NeweyWest(fit_ppp, lag = 4, prewhite = FALSE) SE_NW <- diag(sqrt(abs(NW))) t_NW <- b_sek/SE_NW SE_NW t_NW NW <- NeweyWest(fit_ppp, lag = 12, prewhite = FALSE) SE_NW <- diag(sqrt(abs(NW))) t_NW <- b_sek/SE_NW SE_NW t_NW #### B.11 - MSE: Model and RW T <- length(e_s2) MSE_ppp <- sum(e_s2[2:T])/(T-1) MSE_ppp e_RW <- e_sek[2:T] MSE_RW <- sum(e_RW^2)/(T-1) MSE_RW #### B.12 - Augmented (General) Model fit_ppp_aug <- lm(e_sek ~ inf_d + e_usd + Mkt_RF + SMB + HML + RF + oil + gold) summary(fit_ppp_aug) b_sek_aug <-fit_ppp_aug$coefficients #### B.13 - NW_SE in Augmented (General) Model NW <- NeweyWest(fit_ppp_aug, lag = 12, prewhite = FALSE) SE_NW <- diag(sqrt(abs(NW))) t_NW <- b_sek_aug/SE_NW SE_NW t_NW #### B.14 - Reduced (Specific) Model fit_ppp_mod <- lm(e_sek ~ inf_d + Mkt_RF + RF + oil) summary(fit_ppp_mod) b_sek_mod <-fit_ppp_mod$coefficients e_mod <- fit_ppp_mod$residuals MSE_mod <- sum(e_mod[2:T]^2)/(T-1) MSE_mod MSE_RW #### (Extra) Check for Multicollinearity ols_vif_tol(fit_ppp_mod) ols_eigen_cindex(fit_ppp_mod) T <- length(e_sek) ######## 3.2 - Forecasting FX Rates: Forecasts and Accuracy (MSE) #### 3.2-1 - Report Regression of Reduced Model y <- e_sek xx <- cbind(inf_d, Mkt_RF, RF, oil) T0 <- 1 T1 <- 563 # End of Estimation Period (Dec 2019) T2 <- T1+1 # Start of Validation Period (Jan 2020) y1 <- y[T0:T1] x1 <- xx[T0:T1,] fit_ppp_est <- lm(y1 ~ x1) # Estimation Period Regression (1973 - 2019) b_est <- fit_ppp_est$coefficients # Extract OLS coefficients from regression summary(fit_ppp_est) #### 3.2-2 - Estimate AR(1) For Independent Variables x1_1 <- xx[1:(T1-1),1] # Estimation period data x1_0 <- xx[2:T1,1] # Estimation period data fit_1 <- lm(x1_0 ~ x1_1) b_1 <- fit_1$coefficients summary(fit_1) x2_1 <- xx[1:(T1-1),2] # Estimation period data x2_0 <- xx[2:T1,2] # Estimation period data fit_2 <- lm(x2_0 ~ x2_1) b_2 <- fit_2$coefficients summary(fit_2) x3_1 <- xx[1:(T1-1),3] # Estimation period data x3_0 <- xx[2:T1,3] # Estimation period data fit_3 <- lm(x3_0 ~ x3_1) b_3 <- fit_3$coefficients summary(fit_3) x4_1 <- xx[1:(T1-1),4] # Estimation period data x4_0 <- xx[2:T1,4] # Estimation period data fit_4 <- lm(x4_0 ~ x4_1) b_4 <- fit_4$coefficients summary(fit_4) #### 3.2-2 - AR(1) Forecast for Independent Variables T_val <- T1 + 1 # start of Validation period (Jan 2020) T_1 <- T -1 xx_cons <- rep(1,T-T_val+1) xx1_0 <- cbind(xx_cons,xx[T1:T_1,1]) %*% b_1 # Forecast x1 with Validation period data xx2_0 <- cbind(xx_cons,xx[T1:T_1,2]) %*% b_2 # Forecast x2 with Validation period data xx3_0 <- cbind(xx_cons,xx[T1:T_1,3]) %*% b_3 # Forecast x3 with Validation period data xx4_0 <- cbind(xx_cons,xx[T1:T_1,4]) %*% b_4 # Forecast x4 with Validation period data #### 3.2.3 - Reduced Model Forecast S_t+1 k_for <- T - T_val+1 e_sek_mod_0 <- cbind(xx_cons,xx1_0,xx2_0,xx3_0,xx4_0)%*%b_est # Forecast for e_f,t S_mod_f0 <- S[T1:(T-1)]*(1+e_sek_mod_0) # Forecast for S_t e_mod_f0 <- S[T_val:T] - S_mod_f0 # Forecasat error mse_e_f0 <- sum(e_mod_f0^2)/k_for # MSE mse_e_f0 #### 3.2-4 - AR(1) For Dependent Variable (y = e_sek) y_1 <- y[1:(T1-1)] # Estimation period data y_0 <- y[2:T1] # Estimation period data fit_y <- lm(y_0 ~ y_1) # Fit AR(1) model for e_f,t b_y <- fit_y$coefficients summary(fit_y) #### AR(1) Forecast S_t+1 y_f0 <- cbind(xx_cons,y[T_val:T])%*% b_y # b_est coefficients from estimation period regresssion S_ar1_f0 <- S[T1:(T-1)]*(1+y_f0) # Forecast for S_t, using validation data e_ar1_f0 <- S[T_val:T] - S_ar1_f0 # Forecasat error mse_e_ar1_f0 <- sum(e_ar1_f0^2)/k_for # MSE mse_e_ar1_f0 # MSE(2) #### 3.2-5 - RW Forecast S_t+1 e_rw_f0 <- S[T_val:T] - S[T1:(T-1)] # Error for RW model => et (1) mse_e_rw_f0 <- sum(e_rw_f0^2)/k_for mse_e_rw_f0 # MSE(1) <= (1) is the higher MSE. #### 3.2-5 - Testing Equality of MSE: Mod vs RW ### MSE's: Mod, AR(1), & RW mse_e_f0 mse_e_ar1_f0 mse_e_rw_f0 ### Testing Equality of MSE: Mod vs RW z_mgn <- e_rw_f0 + e_mod_f0 x_mgn <- e_rw_f0 - e_mod_f0 fit_mgn <- lm(z_mgn ~ x_mgn) summary(fit_mgn) # Check t-stat ### Testing Equality of MSE: AR(1) vs RW z_mgn <- e_rw_f0 + e_ar1_f0 x_mgn <- e_rw_f0 - e_ar1_f0 fit_mgn <- lm(z_mgn ~ x_mgn) summary(fit_mgn) # Check t-stat ### Testing Equality of MSE: AR(1) vs Mod z_mgn <- e_mod_f0 + e_ar1_f0 x_mgn <- e_mod_f0 - e_ar1_f0 fit_mgn <- lm(z_mgn ~ x_mgn) summary(fit_mgn) # Check t-stat