###############   Practice Exam # 2 - Code (Fall 2025)    ###############

FMX_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/FX_USA_MX.csv", head=TRUE, sep=",")
summary(FMX_da)

x_years <- FMX_da$Years
us_CPI <- FMX_da$US_CPI
us_M1 <- FMX_da$US_M1
us_i <- FMX_da$US_int
us_GDP <- FMX_da$US_GDP
us_CA <- FMX_da$US_CA
mx_CPI <- FMX_da$MX_CPI
mx_M1 <- FMX_da$MX_M1
mx_i <- FMX_da$MX_int
mx_GDP <- FMX_da$MX_GDP
mx_CA <- FMX_da$MX_CA
S_mx <- FMX_da$MXN_USD

T <- length(us_CPI)
mx_I <- log(mx_CPI[-1]/mx_CPI[-T])	# Mex Inflation rate
mx_y <- log(mx_GDP[-1]/mx_GDP[-T])	# Mex Income growth rate
mx_m <- log(mx_M1[-1]/mx_M1[-T])	# Mex M1 growth rate
e_mx <- log(S_mx[-1]/S_mx[-T])		# Change in FX rate (MXN/USD)
us_i_1 <- us_i[-1]/100			# Adjust to decimal percentage and sample size
mx_i_1 <- mx_i[-1]/100			# Adjust to decimal percentage and sample size
mx_i_0 <- mx_i[-T]/100			# Adjust to decimal percentage and sample size

T_new <- length(mx_i_1)

###### Q2.1 ###### 

## 2.1.a - Report Regression
fit_i <- lm(mx_i_1 ~ us_i_1 + e_mx + mx_I + mx_m +  mx_y)
summary(fit_i)

## 2.1.b - Report & interpret R^2 & Beta_1
# R2 = 0.8327 (83% of the variability of mexican interest rates is explained by the variables in the model
# Beta_ 1 = 0.67142 (a 1% change in US interest rate increases Mex interest rates by 0.67%)

## 2.1.c - Drivers of regression 
# US interest rates & Mex inflation rates

## 2.1.d - Heteroscedasticity Tests: GQ
gqtest(fit_i)

## 2.1.e - Heteroscedasticity Tests: LM-BP
e_fit <- fit_i$residuals
e_fit2 <- e_fit^2		# Potential driver of variance
mx_I2 <- mx_I^2			# Potential driver of variance
e_mx2 <- e_mx^2			# Potential driver of variance

bptest(formula = fit_i, varformula = ~ us_i_1 + mx_I2 + e_mx2)


## 2.1.f - White SE
library(sandwich)
## White SE 
White <- vcovHC(fit_i, type = "HC0")
coeftest(fit_i, vcov = White)

## 2.1.g - DW Test for AR(1)
dwtest(fit_i)

## 2.1.h - LM Test for AR(p)
bgtest(fit_i, order=4)

## 2.1.i -  NW SE 
Var_NW <- NeweyWest(fit_i, lag = 12)

coeftest(fit_i, vcov = Var_NW)


###### Q2.2 ###### 
## 2.2.a -  Estimation Period
y <- mx_i_1
xx_i <- cbind(us_i_1, e_mx, mx_I, mx_m, mx_y)		# X matrix

T0 <- 1
T1 <- 175							# End of Estimation Period (2021.4)
T2 <- T1+1							# Start of Validation Period (2022.1)
y1 <- y[T0:T1]
x1 <- xx_i[T0:T1,]
fit_i_est <- lm(y1 ~  x1)					# Estimation Period Regression (1971.2 - 2020.4)
b_est <- fit_i_est$coefficients					# Extract OLS coefficients from regression
b_est								# OLS coefficients
summary(fit_i_est)

# RW Assumption For Independent Variables
xx_cons <- rep(1,T_new-T2+1)
k_for <- length(xx_cons)					# Create a constant for Validation forecasts
y_mod_f0 <- cbind(xx_cons,xx_i[T1:(T_new-1),]) %*% b_est	# Validation period data
e_mod_f0 <- y[T2:T_new] - y_mod_f0				# Forecasat error
mse_e_f0 <- sum(e_mod_f0^2)/k_for				# MSE
mse_e_f0

## 2.1.g -  Out of sample forecast for Mex interest rate in 2025.3
y_mod_f_1 <- c(xx_cons[1],xx_i[T_new,]) %*% b_est
y_mod_f_1 


###### Q2.3 ###### 

RE_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Real_Estate_2025.csv", head=TRUE, sep=",")
summary(RE_da)

x_date <- RE_da$DATE
x_sd <- RE_da$SD_c
u_sd <- RE_da$SD_u
x_tech <- RE_da$Tech_c
Cind <- RE_da$Cind_c
ec_sd <- RE_da$LAS_EC
x_Mkt <- RE_da$Mkt_RF
x_SMB <- RE_da$SMB
x_HML <- RE_da$HML
x_RMW <- RE_da$RMW
x_CMA <- RE_da$CMA
x_RF <- RE_da$RF
Mkt_RF <- x_Mkt/100
SMB <- x_SMB/100
HML <- x_HML/100
RMW <- x_RMW/100
CMA <- x_CMA/100
RF <- x_RF/100

zz <- x_sd
T <- length(x_sd)
T_sb <- 224
T_f <- T - T_sb
Fin_c0 <- rep(0,T_sb)
Fin_c1 <- rep(1,T_f)	
Fin_c <- c(Fin_c0,Fin_c1)						# Create 2008 Financial crisis dummy
Feb1 <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create January dummy
Mar1 <- rep(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create March dummy
Apr1 <- rep(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create April dummy
May1 <- rep(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create May dummy
Jun1 <- rep(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create June dummy
Jul1 <- rep(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Jul dummy
Aug1 <- rep(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Aug dummy
Sep1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), (length(zz)/12+1))	# Create Sep dummy
Oct1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Nov1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Dec1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (length(zz)/12+1))	# Create Oct dummy
Feb <- Feb1[1:T]
Mar <- Mar1[1:T]
Apr <- Apr1[1:T]
May <- May1[1:T]
Jun <- Jun1[1:T]
Jul <- Jul1[1:T]
Aug <- Aug1[1:T]
Sep <- Sep1[1:T]
Oct <- Oct1[1:T]
Nov <- Nov1[1:T]
Dec <- Dec1[1:T]
Spring <- Mar + Apr + May
Summ <- Jun + Jul + Aug
Fall <- Sep + Oct + Nov
u_sd2 <- u_sd^2
x_tech2 <- x_tech^2
Cind2 <- Cind^2
u_sd_Cind <- u_sd*Cind
u_sd_tech <- u_sd*x_tech
u_sd_Spring <- u_sd*Spring
u_sd_Summ <- u_sd*Summ
u_sd_Fall <- u_sd*Fall
Cind_Spring <- Cind*Spring
Cind_Summ <- Cind*Summ
Cind_Fall <- Cind*Fall
tech_Spring <- x_tech*Spring
tech_Summ <- x_tech*Summ
tech_Fall <- x_tech*Fall
u_sd_Finc <- u_sd*Fin_c
Cind_Finc <- Cind*Fin_c
tech_Finc <- x_tech*Fin_c
Finc_Spring <- Fin_c*Spring
Finc_Summ <- Fin_c*Summ
Finc_Fall <- Fin_c*Fall


## 2.3.a - GUM and Reduced (Specific) Models

fit_sd_gum <- lm(formula = x_sd ~ u_sd + x_tech + Cind + + Mkt_RF + SMB + HML + RMW + CMA +  
                   u_sd2 + Nind2 + x_tech2 + Spring + Summ + Fall + Fin_c + u_sd_Cind + u_sd_tech + u_sd_Spring + 
                   u_sd_Summ + u_sd_Fall + Cind_Spring + Cind_Summ + Cind_Fall + 
                   tech_Spring + tech_Summ + tech_Fall + u_sd_Finc + Cind_Finc + 
                   tech_Finc + Finc_Spring + Finc_Summ + Finc_Fall)

summary(fit_sd_gum)

## Keeeping all variables with a p-value of 10% or less.
fit_sd_red <- lm(x_sd ~ Cind + CMA + u_sd2 + x_tech2 + Fin_c +u_sd_Finc + Cind_Finc)
summary(fit_sd_red)

## 2.3.b -  Drivers (variables with significant t-stats at 5% level)
# Drivers: Cind, CAM, u_sd2,  x_tech2, Fin_c + 2 interaction terms with Financial crisis: unemployment & Fall)

## 2.3.c -  Financial Crisis?
# Yes. Fin_c is significant.

## 2.3.d - JB Test, using package tseries (function jarque.bera.test)
e_sd <- fit_sd_gum$residuals

library(tseries)
jarque.bera.test(e_sd)

## 2.3.e - White and NW SE
# At 5% level, we have evidence of both heteroscedasticiy & autocorrelation. Then, use NW SE.
Var_NW <- NeweyWest(fit_sd_red, lag = 12)
SE_NW <- sqrt(diag(Var_NW))
b_sd_red <- fit_sd_red$coefficients
t_b_NW <- b_sd_red/SE_NW
t_b_NW

# Once, we take into account autocorrelation and heteroscedasticity, only Cind, u_sd2, u_sd_Finc & Cind_Finc are signficant. 


###### Q2.4 ######
SFX_da <-
  read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep
           =",") 
x_dat <- SFX_da$Date
x_cat <- SFX_da$CAT
x_Mkt_RF<- SFX_da$Mkt_RF
x_SMB <- SFX_da$SMB
x_HML <- SFX_da$HML
x_CMA <- SFX_da$CMA
x_RMW <- SFX_da$RMW
x_RF <- SFX_da$RF

T <- length(x_cat)
lr_cat <- log(x_cat[-1]/x_cat[-T])
Mkt_RF <- x_Mkt_RF[-1]/100
SMB <- x_SMB[-1]/100
HML <- x_HML[-1]/100
CMA <- x_CMA[-1]/100
RMW <- x_RMW[-1]/100
RF <- x_RF[-1]/100

cat_x <- lr_dis - RF

## 4.a
fit1 <- lm(cat_x ~ Mkt_RF + SMB + HML)		# Model 1
summary(fit1)

fit2 <- lm(cat_x ~ Mkt_RF + CMA + RMW) 		# Model 2
summary(fit2)


###### Q2.5 ###### (NOT COVERED)
#3 2.5.a.
Phi_1 <- 0.73 	# less than 1, stationary
Theta_1 <- 0.30 	# less than 1, invertible.

## 2.5.b.	Decaying ACF
#		Decaying PACF

## 2.5.c.
 	rho_1 <- (1 + Phi_1 * Theta_1) * (Phi_1 + Theta_1)/(1 + Phi_1^2 + 2 * Phi_1 * Theta_1) 		 # =  0.6370541
	rho_2 <- Phi_1^2 * (1 + Phi_1 * Theta_1) * (Phi_1 + Theta_1)/(1 + Phi_1^2 + 2 * Phi_1 * Theta_1) # = 0.3394861
	rho_3 <- Phi_1^3 * (1 + Phi_1 * Theta_1) * (Phi_1 + Theta_1)/(1 + Phi_1^2 + 2 * Phi_1 * Theta_1) # = 0.2478249

#3 2.5.d.
Y_0 <- 2
e_0 <- 0.6
	Y_T_p1 <- 0.1 + Phi_1 * Y_0 + Theta_1 * e_0 		# 1.74
	Y_T_p2 <- 0.1 + Phi_1 * (Y_T_p1) 			# 1.3702
	Y_T_p3 <- 0.1 + Phi_1 * (Y_T_p2)			# 1.100246


###### Q2.6 ######
y <- x_sd

## 2.6.a
acf_m <- acf(y)
pacf(y)	  #> ARMA(0,0)

##  2.6.b
T_y <- length(y)

Feb <-rep(c(1,0,0,0,0,0,0,0,0,0,0,0),T_y/12+1) # Create January dummy
Mar <-rep(c(0,1,0,0,0,0,0,0,0,0,0,0),T_y/12+1) # Create February dummy
Apr <-rep(c(0,0,1,0,0,0,0,0,0,0,0,0),T_y/12+1) 
May <-rep(c(0,0,0,1,0,0,0,0,0,0,0,0),T_y/12+1) 
Jun <-rep(c(0,0,0,0,1,0,0,0,0,0,0,0),T_y/12+1) 
Jul <-rep(c(0,0,0,0,0,1,0,0,0,0,0,0),T_y/12+1) 
Aug <-rep(c(0,0,0,0,0,0,1,0,0,0,0,0),T_y/12+1) 
Sep <-rep(c(0,0,0,0,0,0,0,1,0,0,0,0),T_y/12+1) 
Oct <-rep(c(0,0,0,0,0,0,0,0,1,0,0,0),T_y/12+1) 
Nov <-rep(c(0,0,0,0,0,0,0,0,0,1,0,0),T_y/12+1) 
Dec <-rep(c(0,0,0,0,0,0,0,0,0,0,1,0),T_y/12+1) 
seas1 <-cbind(Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec)

seas <- seas1[1:T_y,]
nrow(seas)
length(y)
length(Feb)

fit_y_seas <- lm(y ~ seas)
summary(fit_y_seas)

## 2.6.c
library(forecast)
auto.arima(y)

library(weakARMA)
ar_y <- ARMA.selec(y,P=3,Q=3)

library(timsac) 			# uses only AIC
autoarmafit(y, max.order = NULL)

fit_13 <- arima(y, order=c(1,0,3))
fit_13 

##  2.6.d. Stationarity?
library(forecast)
plot(fit_13)
  
##  2.6.e 
checkresiduals(fit_13)

## 2.6.f	(NOT COVERED)
library(forecast)
fcat_13 <- forecast(fit_13, h=3, level=.95)
fcat_13
plot(forecast(fcat_13,h=3))


###### Q2.7 - THEORY REVIEW ######
# 2.5.a. False. Violations of Assumption (A3) make us use OLS with appropriate Standard Errors (White or NW)
# 2.5.b. False. White SE are used when only heteroscedasticy is present. For autocorrelated errors, we use NW.
# 2.5.c. True. (A3) does not affect unbiaseded (or consistency)
# 2.5.d. False. The J-test can reject both models
# 2.5.e. False. The Chow test for structural test is conditional on a given date. IF the data is wrong, the Chow test is looking at the wrong time for structural change.