############# Regression with lm function and Linear Algebra (Code #3) #############


#### VaR for FX Transaction

chfusd <- read.csv("http://www.bauer.uh.edu/rsusmel/4386/chfusd.csv",sep=",")  # Data
S <- chfusd$CHF_USD			# Extract CHF_USD column of the data
T <- length(S)				# Check total T (1971:1 to 2017:1)
Tstart <- 229				# Start of sample period: 1990:1
SP <- S[Tstart: T]			# FX Rate from relevante period (1990:1 on)
T <- length(SP)				# Number of observations 1990:1 to 2017:1
Val <- 1000000				# Value of transaction in FC (in M)
S_0 <- SP[T]				# FX Rate at T (Today's S_t)
e_f <- log(SP[-1]/SP[-T])		# Long changes in S_t
T_s <- length(e_f)			# Length of e_f (lost one observation)


## VaR assuming a Normal
TE <- Val *  S_0
mu_ef <- mean(e_f)
s_ef <- sd(e_f) 
mu_ef
s_ef
z_95 <- 1.96
CI_lb <- mu_ef - z_95 * s_ef		# Lower bound
CI_ub <- mu_ef + z_95 * s_ef		# Upper bound
CI_lb
CI_ub

TE_lb <- TE * (1+CI_lb)
TE_lb


## VaR using a Bootstrap
alpha = .05				# Specify alpha level for VaR
T_s_low <- round(T_s*alpha/2)		# Obs corresponding to alpha/2*T_s
TE_o <- Val*S_0*(1+e_f)				# calculate Original TE values 
STE_o <- sort(TE_o)				# sort Original TE
VaR_o <- STE_o[T_s_low] 			# Original VaR

# function to obtain VaR from the data
varisk <- function(data, i) {
	d <-data[i]
TE <- Val*S_0*(1+d)					# calculate R TE values 
STE <- sort(TE)					# sort TE
VaR <- STE[T_s_low]
return(VaR)
}

library(boot)
sim_size <- 1000
boot.samps <- boot(data=e_f, statistic=varisk, R=sim_size)

boot.ci(boot.samps, type = "basic")

mean(boot.samps$t)

sd(boot.samps$t)



#### Regression using R with lm fuction

SFX_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv", head=TRUE, sep=",")
names(SFX_da)
summary(SFX_da)

# Extract variables from imported data
x_ibm <- SFX_da$IBM			# extract IBM price data
x_Mkt_RF <- SFX_da$Mkt_RF		# extract Market excess returns (in %)
x_RF <- SFX_da$RF			# extract Risk-free rate (in %)

# Define log returns & adjust size of variables accordingly
T <- length(x_ibm)			# sample size
lr_ibm <- log(x_ibm[-1]/x_ibm[-T])	# create IBM log returns (in decimal returns)
Mkt_RF <- x_Mkt_RF[-1]/100		# Adjust sample size to ( T-1) by removing 1st obs 
RF <- x_RF[-1]/100			# Adjust sample size and use decimal returns.

# Scatter plot of the two varialbes
plot(lr_ibm, Mkt_RF, main="Scatterplot: IBM & Market",
   xlab="IBM returns ", ylab="Market returns ", pch=19)

# Regression using lm
ibm_x <- lr_ibm - RF			# IBM excess returns
fit_ibm_capm <- lm(ibm_x ~ Mkt_RF)	# lm (=linear model) package in R
summary(fit_ibm_capm)			# print lm results

# Scatter plot of the two varialbes with regression line
plot(lr_ibm, Mkt_RF, main="Scatterplot: IBM & Market",
   xlab="IBM returns ", ylab="Market returns ", pch=19)
abline(fit_ibm_capm, col="blue")


# Regression using lm with R data frame, with different sample sizes
data_full <- data.frame(ibm_x, Mkt_RF)

fit_ibm_capm_full <- lm(ibm_x ~ Mkt_RF, data = data_full)	# lm reg with full data
summary(fit_ibm_capm_full)			

T0 <- 1
T1 <- 200
data_short <- data_full[T0:T1, ]

fit_ibm_capm_short <- lm(ibm_x ~ Mkt_RF, data = data_short)	# lm reg with shorter data
summary(fit_ibm_capm_short)

T0 <- 200
T1 <- 400
data_short <- data_full[T0:T1, ]

fit_ibm_capm_short <- lm(ibm_x ~ Mkt_RF, data = data_short)	# lm reg with shorter data
summary(fit_ibm_capm_short)



#### Matrices and Regression with Linear Algebra

v1 <- c(4, 5, 6)		# a (3x1) vector (vectors are usually treated as a column list)
v1

A <- matrix(c(3, 2, 1, 9, 8, 7), ncol = 3)	# a (2x3) matrix
A

B <- matrix(c(1, 2, 0, 1, 1, 1), nrow = 3)
B

v1 <- c(1, 3, 8)		# a (3x1) vector
v2 <- c(2, 7, 9)

# Use rbind
A <- rbind(v1, v2)
A				# a (2x3) matrix

# Use cbind
 v3 <- c(1, 2, 0)
 v4 <- c(1, 1, 1)
B <- cbind(v3,v4)
B				# a (3x2) matrix

# Matrix multiplication: %*%
C <- A%*%B			#A is 2x3; B is 3x2	=> C is 2x2
C

# Scalar multiplication: * 	
2 * C				# elementwise multiplication of C by scalar 2

# Dot product of 2 vectors: v1 "." v2 - Sum of te elementwise multiplied elements of the two vectos
t(v1) %*% v2			# v1 <- c(1, 3, 8) & v2 <- c(2, 7, 9)

# Dot product with a vector itself: v1 "." - v1 produces a sum of the square elements of vector
t(v1) %*% v1

# Dot product with ί (a vector of ones): sum of elements of vector
i <- c(1,1,1); 
t(i) %*% v1			# v1 <- c(1, 3, 8)

# Product of 2 vectors: v1 & t(v2): A (3x3) matrix. 
v1%*%t(v2)	 		# v1 <- c(1, 3, 8)  --a (3x1) vector x (1x3) vector

# Transpose
B
t(B) 			#B is 3x2  => t(B) is 2x3

# X'X (a symmetric matrix)
t(B)%*%B

# Determinant: det(D)  		(D matrix needs to be square)
det(t(B)%*%B)

# Inverse of (X'X): solve	
solve(t(B)%*%B) 	

# Diagonal elements of a matrix A: diag()
diag(solve(t(B)%*%B))

# Square root of (positive) elements of a matrix A: sqrt()
sqrt(diag(solve(t(B)%*%B)))


# Regression with Linear Algebra
y <- lr_ibm - RF
T <- length(ibm_x)
x0 <- matrix(1,T,1)
x <- cbind(x0, Mkt_RF)
b <- solve(t(x)%*% x)%*% t(x)%*%y 		# b = (X′X)-1X′ y  (OLS regression)
b

y_fit <- x%*%b					# fitted values
e <- y - x%*%b					# OLS "errors" = residuals
e2 <- e^2					# squared residual vector
sum_e2 <- sum(e2)				# sum of squared residuals
sum_e2_1 <- t(e)%*%e				# sum of squared residuals using dot product

plot(y_fit, type="l", main="CAPM fitted values for IBM")
lines(y, col="red")

plot(e, type="l", main="CAPM residuals for IBM")