############# Brief R Intro  (Code #1) #############

## Use R as a Calculator

(77 + 93)/7

4*log(sqrt(5))/sin(2.5*pi) - exp(-3^2)


## Assing values to variables with <-, combine them and print them to screen

x <- 10
y <- -3

# Print to screen x+y, (x+y)/2
x+y
(x+y)/2

# Define a new variable and print to screen
z <- (x+y)/2
z

## Use R to create data
dat <- c(6, 35, 13, 0.5, -8.4, 13, 31, 21, 15, -3, 5, 27, 14.5, 8, 3, 18, 4, 16, 6, -0.5, 7, 5, 12, 7, -3.5, 9, 17, -2, 16, 25)	# We create this vector of data using the combine command c().


## Use R to manipuate data: Multiply/divide & add/subsract each observation of dat
dat2 <- dat * 2
dat_2 <- dat/2
dat_p1 <- dat + 1
dat_m5 <- dat - 5
dat2_m5 <- dat * 2 - 5


## Use R to define new variables by manipulating data: De-mean dat
sum(dat)						# sum of data
length(dat)						# length of data
mean(dat)						# mean of data
sd(dat)							# SD of data
dat - mean(dat)						# Transform data: De-mean data


## Use R to define & manipulate data: Compute percentage change for dat
dat[-1]							# remove first element from dat vector
dat[-length(dat)]					# remove last element from dat vector
r_dat <- (dat[-1]-dat[-length(dat)])/dat[-length(dat)]	# arithmetic changes for dat
r_dat
length(r_dat)						# check: one element is lost
mean(r_dat)						# operate over transformed data
	

## Define new variables, by manipulating the data
m_dat <- mean(dat)
sd_dat <- sd(dat)
n_dat <-length(dat)
z_dat <- (dat - m_dat)/sd_dat	 		# We standardized dat
dat_m_2 <- (dat - mean(dat))^2			# We square the deviations from mean
var_dat <- sum(dat_m_2)/(n_dat-1)


## Print to screen mean and SD 
m_dat
sd_dat	

# Fancier print to screen using cat
cat("Mean is:", m_dat, "\n")
cat("SD is:", sd_dat, "\n")

## Print to screen the new variables
z_dat
mean(z_dat)	
sd(z_dat)


## Write functions in R. 
Mean_f <- function(dat_f) {		#dat_f is not a real data, is not an object that we created. It's a placeholder.
	return(sum(dat_f)/length(dat_f))
}

Mean_f(dat)


## Quick Summary of Data (Distribution) 
summary(dat)


## Sort data
sort(dat)


## Use loops to sum and accumulate values. In this case, a "for loop"
dat_2 <- 0
sum_2 <- 0
for(i in 1:n_dat) {
  # i-th element of `dat` squared into `i`-th position of `dat_2`
  dat_2[i] <- dat[i]^2
  sum_2 <- sum_2 + dat_2[i]
}
sum_2
dat_2
print(i)


## In general, we will read the data from a data file. But, we can create/input data in different ways:
dat_1 <- rep(5,6)			# Repetition of 5, 6 times
dat_2 <-seq(from=1, to=10, by=3)	# Sequence of number from 1 to 10, increasing by 3
dat_3 <- runif(10)			# Generate 10 random values from a Uniform(0,1)
dat_4 <- rnorm(10)			# Generate 10 random values from a Normal(0,1)
dat_5 <- rnorm(10, mean=2, sd=2)	# Generate 10 random values from a Normal(2,4)  
dat_6 <- c(dat_1, dat_2, dat_3, dat_4)	# Combine all the data together


## We create a matrix, using rbind() ("row bind") or cbind() ("column bind"):
A <- rbind(dat_3,dat_4, dat_5)		# 3 rows, 10 columns
B <- cbind(dat_3,dat_4, dat_5)		# 10 rows, 3 columns
A
B


## Data frame. The function data.frame converts a matrix or collection of vectors into a data frame:
dat_df <- data.frame(B)
names(dat_df)

dat_df$dat_3  # Extract column from data_df with $

mean(dat_df$dat_3)				# Mean of dat_3
mean(dat_df[,1])				# Mean of dat_3, read as the first column of data frame
colMeans(dat_df)				# Mean of all columns of data frame


## Compute mean with Missing Values
data_NA <- dat_df                       	# Create example data with NA (Not Avaiable)
data_NA$dat_3[c(1, 3, 6)] <- NA			# Assign NA to observation 1, 3 & 6 from column 1 of data frame
data_NA

mean(data_NA$dat_3, na.rm = TRUE)         	# Ignore NA values with na.rm = TRUE


## Graphs & Plots
plot(dat, type="l", main = "Plot of Data")	# All the data consecutively, only interesting if a time series

dat_prop <- dat/n_dat				# Data as a proportion of size
hist_d <- hist(dat_prop, breaks=6, main="Histogram with breaks", xlab="Generated Data")
lines(density(dat_prop), col='red') 		# lines makes a curve, default bandwidth


## Application with Real Data
# First, import data with read function, usually followed by the type of data we are importing. 
PPP_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/ppp_2021_m.csv",head=TRUE,sep=",")
names(PPP_da)					# Check names of imported variables (columns)
x_chf <- PPP_da$CHF_USD 			# Extract CHF/USD exchange rate
T <- length(x_chf)				# Size of data (T or N)
e_chf <- log(x_chf[-1]/x_chf[-T])		# log returns - could have use also: e_chf <- diff(log(x_chf))


# Plotting Data
plot(x_chf, type="l", main="CHF/USD Exchange Rate", xlab="time", ylab="CHF/USD")

plot(e_chf, type="l", main="CHF/USD Exchange Rate: Monthly Changes", xlab="time", ylab="CHF/USD (%)")


# Nicer Plot with Dates, using ggplot2 package
# First, package needs to be installed
install.packages("ggplot2") 
# Second, we need to call it, using the library function.
library(ggplot2)
# Now, we are ready to use the ggplot package any time we want.

Date1 <- PPP_da$Date 				# Extract CHF/USD exchange rate
x_Date_1 <- as.Date(PPP_da$Date, "%m/%d/%Y")	# Read the dates, telling R how the dates are written.
ppp <- data.frame(x_Date_1, x_chf)		# Create a data frame to use in ggplot

# One line
ggplot(data = ppp, aes(x = x_Date_1, y = x_chf)) +
geom_line(color="blue") +
labs(x = "Date", y = "CHF/USD", col = "blue", title = "CHF/USD Exchange Rate",
         subtitle = "Period: December 1989: July 2021")

# Histogram
hist_d <- hist(e_chf,breaks=10, main="Histogram for CHF/USD Changes", xlab="CHF/USD (%)")
lines(density(e_chf), col='red') 		# lines makes a curve, default bandwidth


## Moments
x <- e_chf					# Series to be analyzed
n <- length(x)         				# Number of observations
m1 <- sum(x)/n         				# Mean
m2 <- sum((x-m1)^2)/n	  			# Used in denominator of both
m3 <- sum((x-m1)^3)/n  				# For numerator of S
m4 <- sum((x-m1)^4)/n	  			# For numerator of K
b1 <- m3/m2^(3/2) 		 		# Sample Skewness
b2 <- (m4/m2^2)  			      	# Sample Kurtosis
s2 <- sum((x-m1)^2)/(n-1)  			# Sample Variance
sd_s <- sqrt(s2)				# Sample SD

m1		         			# Mean
s2		         			# Sample Variance 
sd_s		         			# Sample SD 
b1	  					# Sample Skewness
b2		  				# Sample Kurtosis


## Arithmetic Returns using diff
e_chf_ar <- diff(x_chf)/x_chf[-T]		# arithmetic returns

## Functions. We can write a function to compute all the moments
Mom_f <- function(dat_f) {		#dat_f is not a real data; a placeholder,
x <- dat_f
n <- length(x)         				# Number of observations
m1 <- sum(x)/n         				# Mean
m2 <- sum((x-m1)^2)/n	  			# Used in denominator of both
m3 <- sum((x-m1)^3)/n  				# For numerator of S
m4 <- sum((x-m1)^4)/n	  			# For numerator of K
b1 <- m3/m2^(3/2) 		 		# Sample Skewness
b2 <- (m4/m2^2)  			      	# Sample Kurtosis
s2 <- sum((x-m1)^2)/(n-1)  			# Sample Variance
sd_s <- sqrt(s2)				# Sample SD
mom <- c(m1, sd_s, b1, b2)			# to return more than one value in a function, need to 						# concatenate values with c() function or use list() function
	return(mom)
}

Mom_f(e_chf)
Mom_f(e_chf_ar)


# Use Mom_f function with a different dataset.

Sh_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",")
SP <- Sh_da$P					# Extract P = S&P500 series
T <- length(SP)
lr <- log(SP[-1]/SP[-T])			# Define log returns

Mom_f(lr)