######################## CODE FOR CLASS PROJECT: PPP (As run in class for SEK/USD) #####################
## You need to adapt it to your currency and your data (ppp_m.csv) ####

FMX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/ppp_proj_m.csv", head=TRUE, sep=",")
names(FMX_da)
summary(FMX_da)

x_date <- FMX_da$Date
us_CPI <- FMX_da$US_CPI
swed_CPI <- FMX_da$SWED_CPI
mal_CPI <- FMX_da$MAL_CPI
S_sek <- FMX_da$SEK_USD
S_myr <- FMX_da$MYR_USD
S_usd <- FMX_da$USD_BROAD
x_Mkt <- FMX_da$Mkt_RF
x_SMB <- FMX_da$SMB
x_HML <- FMX_da$HML
x_RF <- FMX_da$RF
x_oil <- FMX_da$Crude_WTI_Oil
x_gold <- FMX_da$Gold

T <- length(us_CPI)
us_I <- log(us_CPI[-1]/us_CPI[-T])
swed_I <- log(swed_CPI[-1]/swed_CPI[-T])
mal_I <- log(mal_CPI[-1]/mal_CPI[-T])
e_sek <- log(S_sek[-1]/S_sek[-T])
e_myr <- log(S_myr[-1]/S_myr[-T])
e_usd <- log(S_usd[-1]/S_usd[-T])
oil <- log(x_oil[-1]/x_oil[-T])
gold <- log(x_gold[-1]/x_gold[-T])
inf_d <- swed_I - us_I
inf_dm <- mal_I - us_I
Mkt_RF <- x_Mkt[-1]
SMB <- x_SMB[-1]
HML <- x_HML[-1]
RF <- x_RF[-1]
S <- S_sek[-1]


#### 3.1-A Visual Plots
plot(e_sek, inf_d, col="blue", ylab ="(I_d - I_f)", xlab ="e_f")
title("SEK/USD: Inflation rate differential vs Changes in S_t")

plot(e_myr, inf_dm, col="blue", ylab ="(I_d - I_f)", xlab ="e_f")
title("MYR/USD: Inflation rate differential vs Changes in S_t")


#### 3.1-B PPP Regression & Tests

### B.1 Regression
fit_ppp <- lm(e_sek ~ inf_d) 
summary(fit_ppp)

fit_ppp_m <- lm(e_myr ~ inf_dm) 
summary(fit_ppp_m)


#### B.2  Outliers
library(olsrr)						# need to install package olsrr

dat_xy <- data.frame(e_sek, inf_d)
cooksd <- cooks.distance(fit_ppp)
# plot cook's distance
plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") 
# add cutoff line
abline(h = 4*mean(cooksd, na.rm=T), col="red")  # add cutoff line
# add labels
text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T), names(cooksd),""), col="red")  # add labels

# influential row numbers
influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))])  
# print first 10 influential observations.
head(dat_xy[influential, ], n=10)

b_s <- fit_ppp$coefficients
k <- length(b_s)					# k: number of parameters

x_resid <- residuals(fit_ppp )
x_stand_resid <- x_resid/sd(x_resid)		 	# standardized residuals
sum(x_stand_resid > 2)					# Rule of thumb count (5% count is OK)
sum(x_stand_resid > 2)/T				# Proportion of stand residuals > 5%
x_lev <- ols_leverage(fit_ppp)				# leverage residuals
sum(x_lev > (2*k+2)/T) 					# Rule of thumb count (5% count is OK)
sum(x_lev > (2*k+2)/T)/T				# Proportion of lev obs > 5%
sum(cooksd > 4/T)					# Rule of thumb count (5% count is OK)
sum(cooksd > 4/T)/T					# Proportion of cook's D > 5%

### (Extra) Plots of outlier detection stats
ols_plot_resid_stand(fit_ppp)				# Plot standardized residuals 
ols_plot_cooksd_bar(fit_ppp) 				# Plot Cook’s D measure
ols_plot_dffits(fit_ppp) 				# Plot Difference in fits
ols_plot_dfbetas(fit_ppp) 				# Plot Difference in betas


#### B.3  QLRt: Structural Break - Estimating breaking point date (with library desk)
library(desk)
T<- length(e_sek)
pie <- .15
T0 <- round(T * pie)
T1 <- round(T *(1-pie))
my.qlr <- qlr.test(fit_ppp, from = T0, to = T1, sig.level = 0.05, details = TRUE)

plot(my.qlr, col = "red", main = "QLR Test: Relative PPP Model - 1973-2024") # Plot test results
max(my.qlr$f.stats)

my.qlr$breakpoint		# Extract breakpoint observation
SFX_da$Date[my.qlr$breakpoint]	# Print date

qlr.cv(T, L = 2, sig.level = 0.05)


#### B.4 - Jarque-Bera Normality test using library tseries
library(tseries)
e_s <- fit_ppp$residuals
jarque.bera.test(e_s)


#### B.5 - Heteroscedasticity: GQ test using library lmtest
library(lmtest)
gqtest(fit_ppp, fraction = .20)

#### Heteroscedasticity: White test 
e_s <- fit_ppp$residuals
e_s2 <- e_s^2						# Step 2 – squared residuals
inf_d2 <- inf_d^2
fit_W <- lm (e_s2 ~ inf_d + inf_d2) 			# Step 2 – Auxiliary regression 
b_W <- fit_W$coefficients
m_df <- length(b_W) - 1					# degrees of freedom
Re_2 <- summary(fit_W)$r.squared			# Step 2 – Keep R^2 from Auxiliary reg
LM_W_test <- Re_2 * T					# Step 3 – Compute LM Test: R^2 * T
LM_W_test	
p_val <- 1 - pchisq(LM_W_test, df = m_df)  		# p-value of LM_test 
p_val

#### (Extra) Heteroscedasticity: BP test
bptest(fit_ppp)


#### B.6 - Heteroscedasticity: LB for squared residuals (time-varying volatility)
Box.test(e_s2, lag=4, type="Ljung-Box")
Box.test(e_s2, lag=12, type="Ljung-Box")


#### B.7 - Autocorelation: BG test
bgtest(fit_ppp, order=4)
bgtest(fit_ppp, order=12)


#### B.8 - Autocorrelation: DW test
dwtest(fit_ppp)

#### (Extra) Autocorrelation: Q & LB tests (on residuals)
Box.test(e_s, lag=4, type="Ljung-Box")
Box.test(e_s, lag=12, type="Ljung-Box")


#### B.9 - Test PPP (alpha=0 & Beta=1)
library(car)
linearHypothesis(fit_ppp,c("(Intercept) = 0","inf_d = 1"),test="Chisq")	# Asymptotic Chi-squar  test


#### B.10 - Robust SE: OLS, & NW 
fit_ppp <- lm(e_sek ~ inf_d) 					# OLS Regression with lm
b_sek <-fit_ppp$coefficients					# Extract OLS coefficients from fit_ppp
SE_OLS <- sqrt(diag(vcov(fit_ppp)))				# Extract OLS SE from fit_ppp
t_OLS <- b_sek/SE_OLS						# Calculate  OLS t-values
b_sek
SE_OLS
t_OLS

library(sandwich)
NW <- NeweyWest(fit_ppp, lag = 4, prewhite = FALSE)
SE_NW <- diag(sqrt(abs(NW)))
t_NW <- b_sek/SE_NW
SE_NW
t_NW 

NW <- NeweyWest(fit_ppp, lag = 12, prewhite = FALSE)
SE_NW <- diag(sqrt(abs(NW)))
t_NW <- b_sek/SE_NW
SE_NW
t_NW 


#### B.11 - MSE: Model and RW
T <- length(e_s2)
MSE_ppp <- sum(e_s2[2:T])/(T-1)
MSE_ppp
e_RW <- e_sek[2:T] 
MSE_RW <- sum(e_RW^2)/(T-1)
MSE_RW


#### B.12 - Augmented (General) Model
fit_ppp_aug <- lm(e_sek ~ inf_d + e_usd + Mkt_RF + SMB + HML + RF + oil + gold) 
summary(fit_ppp_aug)
b_sek_aug <-fit_ppp_aug$coefficients	


#### B.13 - NW_SE in Augmented (General) Model
NW <- NeweyWest(fit_ppp_aug, lag = 12, prewhite = FALSE)
SE_NW <- diag(sqrt(abs(NW)))
t_NW <- b_sek_aug/SE_NW
SE_NW
t_NW 


#### B.14 - Reduced (Specific) Model
fit_ppp_mod <- lm(e_sek ~ inf_d + Mkt_RF + RF + oil) 
summary(fit_ppp_mod)
b_sek_mod <-fit_ppp_mod$coefficients
e_mod <- fit_ppp_mod$residuals
MSE_mod <- sum(e_mod[2:T]^2)/(T-1)
MSE_mod
MSE_RW


#### (Extra) Check for Multicollinearity
ols_vif_tol(fit_ppp_mod)
ols_eigen_cindex(fit_ppp_mod)

T <- length(e_sek)


######## 3.2 - Forecasting FX Rates: Forecasts and Accuracy (MSE)

#### 3.2-1 - Report Regression of Reduced Model
y <- e_sek
xx <- cbind(inf_d, Mkt_RF, RF, oil)
T0 <- 1
T1 <- 563							# End of Estimation Period (Dec 2019)
T2 <- T1+1							# Start of Validation Period (Jan 2020)
y1 <- y[T0:T1]
x1 <- xx[T0:T1,]
fit_ppp_est <- lm(y1 ~  x1)					# Estimation Period Regression (1973 - 2019)
b_est <- fit_ppp_est$coefficients				# Extract OLS coefficients from regression
summary(fit_ppp_est)


#### 3.2-2 - Estimate AR(1) For Independent Variables	
x1_1 <- xx[1:(T1-1),1]						# Estimation period data
x1_0 <- xx[2:T1,1]						# Estimation period data
fit_1 <- lm(x1_0 ~ x1_1)
b_1 <- fit_1$coefficients
summary(fit_1)

x2_1 <- xx[1:(T1-1),2]						# Estimation period data
x2_0 <- xx[2:T1,2]						# Estimation period data
fit_2 <- lm(x2_0 ~ x2_1)
b_2 <- fit_2$coefficients
summary(fit_2)

x3_1 <- xx[1:(T1-1),3]						# Estimation period data
x3_0 <- xx[2:T1,3]						# Estimation period data
fit_3 <- lm(x3_0 ~ x3_1)
b_3 <- fit_3$coefficients
summary(fit_3)

x4_1 <- xx[1:(T1-1),4]						# Estimation period data
x4_0 <- xx[2:T1,4]						# Estimation period data
fit_4 <- lm(x4_0 ~ x4_1)
b_4 <- fit_4$coefficients
summary(fit_4)

	
#### 3.2-2 - AR(1) Forecast for Independent Variables	
T_val <- T1 + 1							# start of Validation period (Jan 2020)
T_1 <- T -1
xx_cons <- rep(1,T-T_val+1)
xx1_0 <- cbind(xx_cons,xx[T1:T_1,1]) %*% b_1			# Forecast x1 with Validation period data
xx2_0 <- cbind(xx_cons,xx[T1:T_1,2]) %*% b_2			# Forecast x2 with Validation period data
xx3_0 <- cbind(xx_cons,xx[T1:T_1,3]) %*% b_3			# Forecast x3 with Validation period data
xx4_0 <- cbind(xx_cons,xx[T1:T_1,4]) %*% b_4			# Forecast x4 with Validation period data


#### 3.2.3 - Reduced Model Forecast S_t+1
k_for <- T - T_val+1
e_sek_mod_0 <- cbind(xx_cons,xx1_0,xx2_0,xx3_0,xx4_0)%*%b_est	# Forecast for e_f,t
S_mod_f0 <- S[T1:(T-1)]*(1+e_sek_mod_0)				# Forecast for S_t
e_mod_f0 <- S[T_val:T] - S_mod_f0				# Forecasat error
mse_e_f0 <- sum(e_mod_f0^2)/k_for				# MSE
mse_e_f0


#### 3.2-4 - AR(1) For Dependent Variable	(y = e_sek)	
y_1 <- y[1:(T1-1)]						# Estimation period data
y_0 <- y[2:T1]							# Estimation period data
fit_y <- lm(y_0 ~ y_1)						# Fit AR(1) model for e_f,t
b_y <- fit_y$coefficients
summary(fit_y)

#### AR(1) Forecast S_t+1
y_f0 <-  cbind(xx_cons,y[T_val:T])%*% b_y			# b_est coefficients from estimation period regresssion
S_ar1_f0 <- S[T1:(T-1)]*(1+y_f0)				# Forecast for S_t, using validation data
e_ar1_f0 <- S[T_val:T] - S_ar1_f0				# Forecasat error
mse_e_ar1_f0 <- sum(e_ar1_f0^2)/k_for				# MSE
mse_e_ar1_f0							# MSE(2)


#### 3.2-5 - RW Forecast S_t+1
e_rw_f0 <- S[T_val:T] - S[T1:(T-1)]				# Error for RW model =>  et (1) 	
mse_e_rw_f0 <- sum(e_rw_f0^2)/k_for	
mse_e_rw_f0							# MSE(1)		<= (1) is the higher MSE.


#### 3.2-5 - Testing Equality of MSE: Mod vs RW
### MSE's: Mod, AR(1), & RW 
mse_e_f0
mse_e_ar1_f0
mse_e_rw_f0

### Testing Equality of MSE: Mod vs RW
z_mgn <- e_rw_f0 + e_mod_f0 
x_mgn <- e_rw_f0 - e_mod_f0
fit_mgn <- lm(z_mgn ~ x_mgn)	
summary(fit_mgn)						# Check t-stat

### Testing Equality of MSE: AR(1) vs RW
z_mgn <- e_rw_f0 + e_ar1_f0
x_mgn <- e_rw_f0 - e_ar1_f0
fit_mgn <- lm(z_mgn ~ x_mgn)
summary(fit_mgn)						# Check t-stat

### Testing Equality of MSE: AR(1) vs Mod
z_mgn <- e_mod_f0 + e_ar1_f0 
x_mgn <- e_mod_f0 - e_ar1_f0 
fit_mgn <- lm(z_mgn ~ x_mgn)
summary(fit_mgn)						# Check t-stat