############# Regression with lm function and Linear Algebra (Code #3B - Used in Lecture 6) ############# #### Regression using R with lm fuction SFX_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv", head=TRUE, sep=",") names(SFX_da) summary(SFX_da) # Extract variables from imported data x_ibm <- SFX_da$IBM # extract IBM price data x_Mkt_RF <- SFX_da$Mkt_RF # extract Market excess returns (in %) x_RF <- SFX_da$RF # extract Risk-free rate (in %) # Define log returns & adjust size of variables accordingly T <- length(x_ibm) # sample size lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) # create IBM log returns (in decimal returns) Mkt_RF <- x_Mkt_RF[-1]/100 # Adjust sample size to ( T-1) by removing 1st obs RF <- x_RF[-1]/100 # Adjust sample size and use decimal returns. # Scatter plot of the two varialbes plot(lr_ibm, Mkt_RF, main="Scatterplot: IBM & Market", xlab="IBM returns ", ylab="Market returns ", pch=19) # Regression using lm ibm_x <- lr_ibm - RF # IBM excess returns T <- length(ibm_x) fit_ibm_capm <- lm(ibm_x ~ Mkt_RF) # lm (=linear model) package in R summary(fit_ibm_capm) # print lm results # Scatter plot of the two varialbes with regression line plot(lr_ibm, Mkt_RF, main="Scatterplot: IBM & Market", xlab="IBM returns ", ylab="Market returns ", pch=19) abline(fit_ibm_capm, col="blue") # Expected (CAPM) vs Realized IBM return exp_ret <- fit_ibm_capm$coefficients[2] * mean(Mkt_RF) # Monthly expected return according to CAPM exp_ret mean(ibm_x) # Realized return mean(ibm_x) - exp_ret # Over-r-performance if positive / Under-performance if negative # Regression using lm with R data frame, with different sample sizes data_full <- data.frame(ibm_x, Mkt_RF) fit_ibm_capm_full <- lm(ibm_x ~ Mkt_RF, data = data_full) # lm reg with full data summary(fit_ibm_capm_full) mean(Mkt_RF) * 12 # Annualized Market excess return T0 <- 1 T1 <- 200 # August 1989 data_short <- data_full[T0:T1, ] fit_ibm_capm_short <- lm(ibm_x ~ Mkt_RF, data = data_short) # lm reg with shorter data summary(fit_ibm_capm_short) mean(Mkt_RF[T0:T1]) * 12 # Annualized Market excess return T0 <- 201 T1 <- 400 # May 2006 data_short <- data_full[T0:T1, ] fit_ibm_capm_short <- lm(ibm_x ~ Mkt_RF, data = data_short) # lm reg with shorter data summary(fit_ibm_capm_short) mean(Mkt_RF[T0:T1]) * 12 # Annualized Market excess return T0 <- 401 T1 <- T # End of Sample (Dec 2024) data_short <- data_full[T0:T1, ] fit_ibm_capm_short <- lm(ibm_x ~ Mkt_RF, data = data_short) # lm reg with shorter data summary(fit_ibm_capm_short) mean(Mkt_RF[T0:T1]) * 12 # Annualized Market excess return ## Note: Beta coeff are "time-varying" - The beta for the whole period is an average of betas over different periods (0.779049 * 223 + 1.076962 * 200 + 0.790583 * 200)/T fit_ibm_capm_full$coefficients[2] #### Matrices and Regression with Linear Algebra v1 <- c(1, 3, 8) # a (3x1) vector (vectors are usually treated as a column list) v1 A <- matrix(c(3, 2, 1, 9, 8, 7), ncol = 3) # a (2x3) matrix A B <- matrix(c(1, 2, 0, 1, 1, 1), nrow = 3) B v1 <- c(1, 3, 8) # a (3x1) vector v2 <- c(2, 7, 9) # Use rbind A <- rbind(v1, v2) A # a (2x3) matrix # Use cbind v3 <- c(1, 2, 0) v4 <- c(1, 1, 1) B <- cbind(v3,v4) B # a (3x2) matrix # Matrix multiplication: %*% C <- A%*%B #A is 2x3; B is 3x2 => C is 2x2 C # Scalar multiplication: * 2 * C # elementwise multiplication of C by scalar 2 # Dot product of 2 vectors: v1 "." v2 - Sum of elementwise multiplied elements of the two vectos t(v1) %*% v2 # v1 <- c(1, 3, 8) & v2 <- c(2, 7, 9) => sum = 1*2 + 3*7 + 8*9 # Dot product with a vector itself: v1 "." - v1 produces a sum of the square elements of vector t(v1) %*% v1 # sum = 1^2 + 3^2 + 8^2 # Dot product with ί (a vector of ones): sum of elements of vector i <- c(1,1,1); t(i) %*% v1 # v1 <- c(1, 3, 8) => sum = 1 + 3 + 8 # Product of 2 vectors: v1 & t(v2): A (3x3) matrix. v1%*%t(v2) # v1 <- c(1, 3, 8) --a (3x1) vector x (1x3) vector: v2 <- c(2, 7, 9) # Transpose B t(B) #B is 3x2 => t(B) is 2x3 # X'X (a symmetric matrix) t(B)%*%B # Determinant: det(D) (D matrix needs to be square) det(t(B)%*%B) # Inverse of (X'X): solve solve(t(B)%*%B) # Diagonal elements of a matrix A: diag() diag(solve(t(B)%*%B)) # Square root of (positive) elements of a matrix A: sqrt() sqrt(diag(solve(t(B)%*%B))) # Regression with Linear Algebra y <- lr_ibm - RF T <- length(ibm_x) x0 <- matrix(1,T,1) x <- cbind(x0, Mkt_RF) b <- solve(t(x)%*% x)%*% t(x)%*%y # b = inv(X′X) X′y (OLS regression) b y_fit <- x%*%b # fitted values e <- y - x%*%b # OLS "errors" = residuals e2 <- e^2 # squared residual vector sum_e2 <- sum(e2) # sum of squared residuals sum_e2_1 <- t(e)%*%e # sum of squared residuals using dot product plot(y_fit, type="l", main="CAPM fitted values for IBM") lines(y, col="red") plot(e, type="l", main="CAPM residuals for IBM") ### Multiple Regression with Linear Algebra (with the 3 Fama-French factors) x_SMB <- SFX_da$SMB # extract Size factor (SMB returns, in %) x_HML <- SFX_da$HML # extract Value factor (HML returns, in %) SMB <- x_SMB[-1]/100 # Adjust size (remove 1st observation) and convert to decimal returns. HML <- x_HML[-1]/100 # Adjust size (remove 1st observation) and convert to decimal returns. y <- ibm_x T <- length(y) x0 <- matrix(1,T,1) x <- cbind(x0, Mkt_RF, SMB, HML) # Matrix X (with 4 columns = constant + 3 factors) k <- ncol(x) # number of regressors (including constant) k b <- solve(t(x)%*% x)%*% t(x)%*%y # b = (X'X)^(-1) X' y (OLS regression) b # We get the same results using lm() fit_capm_ff3 <- lm (y ~ Mkt_RF + SMB + HML) summary(fit_capm_ff3)