############# LM Tests & Model Specification, Dummy Variables and Structural Change (Code #9) #############

SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",")
x_ibm <- SFX_da$IBM
x_xom <- SFX_da$XOM
x_ge <- SFX_da$GE
x_Mkt_RF<- SFX_da$Mkt_RF
x_SMB <- SFX_da$SMB
x_HML <- SFX_da$HML
x_RF <- SFX_da$RF

T <- length(x_ibm)
lr_ibm <- log(x_ibm[-1]/x_ibm[-T])
lr_xom <- log(x_ibm[-1]/x_ibm[-T])
x0 <- matrix(1,T-1,1)
 Mkt_RF <- x_Mkt_RF[-1]/100
 SMB <- x_SMB[-1]/100
 HML <- x_HML[-1]/100
 RF <- x_RF[-1]/100
 ibm_x <- lr_ibm - RF				# IBM excess returns 


### Specification Test: LM Tests to test for Omitted Variables (SMB + HML in CAPM)

T <- length(ibm_x)
fit_ibm_capm <- lm (ibm_x ~ Mkt_RF)
resid_r <- fit_ibm_capm$residuals		# extract residuals from R model
fit_lm <- lm (resid_r ~ Mkt_RF + SMB + HML)	# auxiliary regression
summary(fit_lm)
R2_r <- summary(fit_lm)$r.squared		# extract  R2 from fit_lm 
R2_r

LM_test <- R2_r * T
LM_test

qchisq(.95, df = 2) 				# chi-squared (df=2) value at 5% level

p_val <- 1 - pchisq(LM_test, df = 2)  		# p-value of LM_test 
p_val


### Specification Test: LM Tests to test for Non linearities (Omitted Variable are Mkt_RF^2, SMB^2 + HML^2)

fit_ibm_ff3 <- lm (ibm_x ~ Mkt_RF + SMB + HML)
Mkt_RF2 <- Mkt_RF^2
SMB2 <- SMB^2
HML2 <- HML^2

resid_r <- fit_ibm_ff3$residuals
fit_lm <- lm (resid_r ~ Mkt_RF + SMB + HML + Mkt_RF2 + SMB2 + HML2)

R2_r <- summary(fit_lm)$r.squared
LM_test <- R2_r * T
LM_test

qchisq(.95, df = 3) 					# 95% quantile for Chi-square distribution
p_val <- 1 - pchisq(LM_test, df = 3)  			# p-value of LM_test 
p_val


### Specification Test: RESET Test
y_hat <- fitted(fit_ibm_ff3)
y_hat2 <- y_hat^2
fit_ramsey <- lm(ibm_x ~ Mkt_RF + SMB + HML + y_hat2)
summary(fit_ramsey)

y_hat4 <- y_hat^4
fit_ramsey2 <- lm(ibm_x ~ Mkt_RF + SMB + HML + y_hat4)
summary(fit_ramsey2)

fit_ramsey2 <- lm(ibm_x ~ Mkt_RF + SMB + HML + y_hat2 + y_hat4)
summary(fit_ramsey2)


## Using R package lmtest
library(lmtest)
resettest(fit_ibm_ff3, power=2, type="fitted")

resettest(fit_ibm_ff3, power=4, type="fitted")


#### Dummy Variables

## Creating a Dummy Variable 

# By Characteristc (Male, Female)
df <- data.frame(income=c(45000, 48000, 49000, 51000, 54000, 57000, 65000, 69000,
                          77000, 78000, 83000, 98000, 104000, 107000, 120000),
                 age=c(23, 25, 24, 29, 29, 30, 38, 36, 40, 59, 52, 64, 53, 65, 57),
                 status=c('Female', 'Female', 'Male', 'Male', 'Male', 'Female',
                          'Male', 'Female', 'Male', 'Female', 'Female',
                          'Female', 'Female', 'Male', 'Male'))

# view data frame
df

#create dummy variables
male <- ifelse(df$status == 'Male', 1, 0)
female <- ifelse(df$status == 'Female', 1, 0)

male

lm_gen <- lm(df$income ~ df$age + male)
summary(lm_gen)


# By Greater/Lower Than a threshold
over30 <- ifelse(df$age >= 30, 1, 0)
over30 

lm_age <- lm(df$income ~ over30 + male)
summary(lm_age)


# By Time of the Year (Seasonal. Example: Quarterly)
q1 <- rep(c(1,0,0,0), 5)		# First Quarter Dummy
q1

q4 <- rep(c(0,0,0,1), 5)		# Fourth Quarter Dummy
q4


# By Time of the Year (Regimes. Example: Structural Break (SB) at t_sb = 10)
T <- 22					# Length of Data
t_sb <- 10				# Timing of SB
T1 <- T-t_sb				# Number of Observations after SB
D_c0 <- rep(0,10)			# Dummy_t = 0 if t <= t_sb
D_c1 <- rep(1,T1)			# Dummy_t = 1 of t > t_sb
D_c <- c(D_c0,D_c1)			# SB Dummy variable


### Introducing Seasonal Factors in XOM
x_xom <- SFX_da$XOM					# Extract XOM prices
T <- length(x_xom)
lr_xom <- log(x_xom[-1]/x_xom[-T])
xom_x <- lr_xom - RF

fit_xom_ff3 <- lm(xom_x ~ Mkt_RF + SMB + HML)		# FF regression for XOM returns
summary(fit_xom_ff3)

T <- length(xom_x) 
Summ <- rep(c(0,0,0,0,0,0,1,1,1,0,0,0), round(T/12)+1) 		# Create Summer dummy
Fall <- rep(c(0,0,0,0,0,0,0,0,0,1,1,1), round(T/12)+1) 		# Create Fall dummy
Wint <- rep(c(1,1,1,0,0,0,0,0,0,0,0,0), round(T/12)+1) 		# Create Winter dummy
T1 <- T+1
Fall_1 <- Fall[2:T1]						# Adjusting sample (starts in Feb)
Wint_1 <- Wint[2:T1]
Summ_1 <- Summ[2:T1]

# Model Seasonal Factors only in constant
fit_xom_sea <- lm(xom_x ~ Mkt_RF + SMB + HML + Fall_1 + Wint_1 + Summ_1) 
summary(fit_xom_sea)


## F-test for seasonal factors (constant only)
library(lmtest)
waldtest(fit_xom_sea, fit_xom_ff3)			# Model with Seasonal Dummies is the U Model


## Model Seasonal factors in Constant and Slopes (only interacting Winter Dummy)
Mkt_W <- Mkt_RF*Wint_1
SMB_W <- SMB*Wint_1
HML_W <- HML*Wint_1 
fit_xom_s2 <- lm(xom_x ~ Mkt_RF + SMB + HML + Fall_1 + Wint_1 + Summ_1 + Mkt_W + SMB_W + HML_W)
summary(fit_xom_s2)


## F-test for seasonal factors (constant and slopes)
waldtest(fit_xom_s2, fit_xom_ff3)


### January Effect

## XOM
T <- length(ibm_x)
Jan <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (round(T)/12+1))	# Create January dummy
T2 <- T+1
Jan_1 <- Jan[2:T2]						# Adjust sample
fit_xom_ff3_Jan <- lm (xom_x ~ Mkt_RF + SMB + HML + Jan_1)	# Auxiliary Regression
summary(fit_xom_ff3_Jan)

## IBM
fit_ibm_ff3_Jan <- lm (ibm_x ~ Mkt_RF + SMB + HML + Jan_1)
summary(fit_ibm_ff3_Jan)


res_ibm_ff3_Jan <- fit_ibm_ff3_Jan$residuals
plot(res_ibm_ff3_Jan, type="l", main = "IBM residuals")

### Removing one Observation
dec_1992 <- rep(0,T)						# Define Dec 1992 dummy
dec_1992[239] <- 1						# Define Dec 1992 dummy (=1 if Dec 1992)
fit_d92 <- lm (ibm_x ~ Mkt_RF + SMB + HML + Jan_1 + dec_1992)
summary(fit_d92)

res_d92 <- fit_d92$residuals
plot(res_d92, type="l", col="blue", main = "IBM residuals withoud Dec 1992 observation")


#### Structural Test - Chow Test for 3-factor FF Model for IBM computing RSS_U & RSS_R
y <- ibm_x 
x0 <- matrix(1,T,1)
x <- cbind(Mkt_RF,SMB,HML)			# Regressors (constant included)
T <- length(y)
t_s <- 342					# Structural break date (End of 1st-regime)

## Restricted Model (Pooling all data)
fit_R <- lm (y ~ x)				# Restricted Regression
e_R <- fit_R$residuals				# regression residuals, e
k <- length(fit_R$coefficients)			# Number of parameters
RSS_R <- sum(e_R^2)				# Restricted RSS

## Unrestricted Model (Computing RSS1 & RSS2)
# RSS1
y_1 <- y[1:t_s]
x_u1 <- x[1:t_s,]

fit_ibm_U1 <- lm (y_1 ~ x_u1)			# Unrestricted Regression 1
summary(fit_ibm_U1)
e_U1 <- fit_ibm_U1$residuals			# regression residuals, e
RSS1 <- sum(e_U1^2)				# Unrestricted RSS 1

# RSS2
kk = t_s+1					# Starting observation for 2nd-regime
y_2 <- y[kk:T]
x_u2 <- x[kk:T,]

fit_ibm_U2 <- lm (y_2 ~ x_u2)			# Unrestricted Regression 2
summary(fit_ibm_U2)
e_U2 <- fit_ibm_U2$residuals			# regression residuals, e
RSS2 <- sum(e_U2^2)				# Unrestricted RSS 2

# Chow-test (F-test)
F <- ((RSS_R - (RSS1+RSS2))/k)/((RSS1+RSS2)/(T - 2*k))	# F-test
F

df2 <- T -2 *k

qf(.95, df1=k, df2)				# 95% quantile value of F-test
p_val <- 1 - pf(F,df1=k, df2)			# p-value of F-test
p_val


#### Structural Test - Chow Test for 3-factor FF model for IBM using library strucchange
library(strucchange)
sctest(y ~ x, type = "Chow", point = t_s)


#### Structural Test - Effect on Coefficients (GDP growth AR(1) model with SB at October 1973
GDP_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/GDP_q.csv", head=TRUE, sep=",")
x_date <- GDP_da$DATE
x_gdp <- GDP_da$GDP
x_dummy <- GDP_da$D73
T <- length(x_gdp)
t_s <- 108					# TSB = Oct 1973	
x_date[108]

## Plot GDP Level
x.ts = ts(x_gdp, frequency = 4, start=c(1947, 2))
plot.ts(x.ts, col="blue",ylab ="GDP level", main="U.S. GDP: 1947-2023")

## Changes and Plot of changes
lr_gdp <- log(x_gdp[-1]/x_gdp[-T])

e.ts = ts(lr_gdp, frequency = 4, start=c(1947, 2))
plot.ts(e.ts, col="green",ylab ="GDP growth rate ", main="GDP quarterly growth rate: 1947-2023")

mean(lr_gdp[1:t_s])*4
mean(lr_gdp[t_s:T])*4

sd(lr_gdp0[1:t_s])*sqrt(4)
sd(lr_gdp0[t_s:T])*sqrt(4)

## Create gdp_g(t) & gdp_g(t-1) 
T <- length(lr_gdp)
lr_gdp0 <- lr_gdp[-1]				# GDP growth rate: We remove first observation
lr_gdp1 <- lr_gdp[-T]				# Lagged GDP growth rate: We remove last observation
T <- length(lr_gdp0)
t_s <- t_s -1					# Adjust t_s (we lost the first observation)

## Restricted Model (No structural change)
fit_ar1 <- lm(lr_gdp0 ~ lr_gdp1)
summary(fit_ar1)

#### Structural Test - Chow Test for GDP growth with strucchange library
library(strucchange)
sctest(lr_gdp0 ~ lr_gdp1, type = "Chow", point = t_s)


#### Structural Test - Chow Test for GDP growth computing RSS_U & RSS_R
y <- lr_gdp0 
x1 <-  lr_gdp1
T <- length(y)
x0 <- matrix(1,T,1)
x <- cbind(x0,x1)
k <- ncol(x)

## Restricted Model (Pooling all data)
fit_ar1 <- lm(lr_gdp0 ~ lr_gdp1)		# Fitting AR(1) (Restricted) Model
summary(fit_ar1)
e_ar1_R <- fit_ar1$residuals			# regression residuals, e
RSS_R <- sum(e_ar1_R^2)				# Restricted RSS

## Unrestricted Model (Computing RSS1 & RSS2)
# RSS1
y_1 <- y[1:t_s]
x_u1 <- x1[1:t_s]
fit_ar1_U1 <- lm (y_1 ~ x_u1)			# Unrestricted Regression 1
e_ar1_U1 <- fit_ar1_U1$residuals		# regression residuals, e
RSS1 <- sum(e_ar1_U1^2)				# Unrestricted RSS 1

# RSS2
kk <- t_s + 1					# Starting observation for 2nd-regime
y_2 <- y[kk:T]
x_u2 <- x1[kk:T]
fit_ar1_U2 <- lm (y_2 ~ x_u2)			# Unrestricted Regression 2
e_ar1_U2 <- fit_ar1_U2$residuals		# regression residuals, e
RSS2 <- sum(e_ar1_U2^2)				# Unrestricted RSS 2

# Chow-test (F-test)
b_ar1 <- fit_ar1_U2$coefficients
k <- length(b_ar1)
F <- ((RSS_R - (RSS1+RSS2))/k)/((RSS1+RSS2)/(T - 2*k))	# F-test
F


# Unrestricted Model with a Before & After Dummy (D73) (Introducing different coefficients for both Regimes)	
T1 <- T - t_s					# Number of Observations after SB
D73_0 <- rep(0,t_s)				# Dummy_t = 0 if t <= t_s
D73_1 <- rep(1,T1)				# Dummy_t = 1 of t > t_s
D73 <- c(D73_0,D73_1)				# SB Dummy variable t_s <- 108
lr_gdp1_D73 <- lr_gdp1 * D73			# interactive dummy (effect on slope)
fit_ar1_d_2 <- lm(lr_gdp0 ~ lr_gdp1 + D73 + lr_gdp1_D73)
summary(fit_ar1_d_2)

# F-test for structural break (D73 no impact on constant and slope)
library(car)
linearHypothesis(fit_ar1_d_2, c("D73 = 0","lr_gdp1_D73 = 0"), test="F") 

# Forecating with AR(1) Model with and without Structural break
gdp_2024_1 <- lr_gdp0[T]			# GDP 2024:I

# Forecast 2024:II with structural break
(0.008953 + 0.003779) +  (0.467457 + (-0.326809)) * gdp_2024_1

# Forecast 2024:II without structural break
0.011411 + 0.263353 * gdp_2024_1