############# Brief R Intro (Code #1) ############# ## Use R as a Calculator (77 + 93)/7 4*log(sqrt(5))/sin(2.5*pi) - exp(-3^2) ## Assing values to variables with <-, combine them and print them to screen x <- 10 y <- -3 # Print to screen x+y, (x+y)/2 x+y (x+y)/2 # Define a new variable and print to screen z <- (x+y)/2 z ## Use R to create data dat <- c(6, 35, 13, 0.5, -8.4, 13, 31, 21, 15, -3, 5, 27, 14.5, 8, 3, 18, 4, 16, 6, -0.5, 7, 5, 12, 7, -3.5, 9, 17, -2, 16, 25) # We create this vector of data using the combine command c(). ## Use R to manipuate data: Multiply/divide & add/subsract each observation of dat dat2 <- dat * 2 dat_2 <- dat/2 dat_p1 <- dat + 1 dat_m5 <- dat - 5 dat2_m5 <- dat * 2 - 5 ## Use R to define new variables by manipulating data: De-mean dat sum(dat) # sum of data length(dat) # length of data mean(dat) # mean of data sd(dat) # SD of data dat - mean(dat) # Transform data: De-mean data ## Use R to define & manipulate data: Compute percentage change for dat dat[-1] # remove first element from dat vector dat[-length(dat)] # remove last element from dat vector r_dat <- (dat[-1]-dat[-length(dat)])/dat[-length(dat)] # arithmetic changes for dat r_dat length(r_dat) # check: one element is lost mean(r_dat) # operate over transformed data ## Define new variables, by manipulating the data m_dat <- mean(dat) sd_dat <- sd(dat) n_dat <-length(dat) z_dat <- (dat - m_dat)/sd_dat # We standardized dat dat_m_2 <- (dat - mean(dat))^2 # We square the deviations from mean var_dat <- sum(dat_m_2)/(n_dat-1) ## Print to screen mean and SD m_dat sd_dat # Fancier print to screen using cat cat("Mean is:", m_dat, "\n") cat("SD is:", sd_dat, "\n") ## Print to screen the new variables z_dat mean(z_dat) sd(z_dat) ## Write functions in R. Mean_f <- function(dat_f) { #dat_f is not a real data, is not an object that we created. It's a placeholder. return(sum(dat_f)/length(dat_f)) } Mean_f(dat) ## Quick Summary of Data (Distribution) summary(dat) ## Sort data sort(dat) ## Use loops to sum and accumulate values. In this case, a "for loop" dat_2 <- 0 sum_2 <- 0 for(i in 1:n_dat) { # i-th element of `dat` squared into `i`-th position of `dat_2` dat_2[i] <- dat[i]^2 sum_2 <- sum_2 + dat_2[i] } sum_2 dat_2 print(i) ## In general, we will read the data from a data file. But, we can create/input data in different ways: dat_1 <- rep(5,6) # Repetition of 5, 6 times dat_2 <-seq(from=1, to=10, by=3) # Sequence of number from 1 to 10, increasing by 3 dat_3 <- runif(10) # Generate 10 random values from a Uniform(0,1) dat_4 <- rnorm(10) # Generate 10 random values from a Normal(0,1) dat_5 <- rnorm(10, mean=2, sd=2) # Generate 10 random values from a Normal(2,4) dat_6 <- c(dat_1, dat_2, dat_3, dat_4) # Combine all the data together ## We create a matrix, using rbind() ("row bind") or cbind() ("column bind"): A <- rbind(dat_3,dat_4, dat_5) # 3 rows, 10 columns B <- cbind(dat_3,dat_4, dat_5) # 10 rows, 3 columns A B ## Data frame. The function data.frame converts a matrix or collection of vectors into a data frame: dat_df <- data.frame(B) names(dat_df) dat_df$dat_3 # Extract column from data_df with $ mean(dat_df$dat_3) # Mean of dat_3 mean(dat_df[,1]) # Mean of dat_3, read as the first column of data frame colMeans(dat_df) # Mean of all columns of data frame ## Compute mean with Missing Values data_NA <- dat_df # Create example data with NA (Not Avaiable) data_NA$dat_3[c(1, 3, 6)] <- NA # Assign NA to observation 1, 3 & 6 from column 1 of data frame data_NA mean(data_NA$dat_3, na.rm = TRUE) # Ignore NA values with na.rm = TRUE ## Graphs & Plots plot(dat, type="l", main = "Plot of Data") # All the data consecutively, only interesting if a time series dat_prop <- dat/n_dat # Data as a proportion of size hist_d <- hist(dat_prop, breaks=6, main="Histogram with breaks", xlab="Generated Data") lines(density(dat_prop), col='red') # lines makes a curve, default bandwidth ## Application with Real Data # First, import data with read function, usually followed by the type of data we are importing. PPP_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/ppp_2021_m.csv",head=TRUE,sep=",") names(PPP_da) # Check names of imported variables (columns) x_chf <- PPP_da$CHF_USD # Extract CHF/USD exchange rate T <- length(x_chf) # Size of data (T or N) e_chf <- log(x_chf[-1]/x_chf[-T]) # log returns - could have use also: e_chf <- diff(log(x_chf)) # Plotting Data plot(x_chf, type="l", main="CHF/USD Exchange Rate", xlab="time", ylab="CHF/USD") plot(e_chf, type="l", main="CHF/USD Exchange Rate: Monthly Changes", xlab="time", ylab="CHF/USD (%)") # Nicer Plot with Dates, using ggplot2 package # First, package needs to be installed install.packages("ggplot2") # Second, we need to call it, using the library function. library(ggplot2) # Now, we are ready to use the ggplot package any time we want. Date1 <- PPP_da$Date # Extract CHF/USD exchange rate x_Date_1 <- as.Date(PPP_da$Date, "%m/%d/%Y") # Read the dates, telling R how the dates are written. ppp <- data.frame(x_Date_1, x_chf) # Create a data frame to use in ggplot # One line ggplot(data = ppp, aes(x = x_Date_1, y = x_chf)) + geom_line(color="blue") + labs(x = "Date", y = "CHF/USD", col = "blue", title = "CHF/USD Exchange Rate", subtitle = "Period: December 1989: July 2021") # Histogram hist_d <- hist(e_chf,breaks=10, main="Histogram for CHF/USD Changes", xlab="CHF/USD (%)") lines(density(e_chf), col='red') # lines makes a curve, default bandwidth ## Moments x <- e_chf # Series to be analyzed n <- length(x) # Number of observations m1 <- sum(x)/n # Mean m2 <- sum((x-m1)^2)/n # Used in denominator of both m3 <- sum((x-m1)^3)/n # For numerator of S m4 <- sum((x-m1)^4)/n # For numerator of K b1 <- m3/m2^(3/2) # Sample Skewness b2 <- (m4/m2^2) # Sample Kurtosis s2 <- sum((x-m1)^2)/(n-1) # Sample Variance sd_s <- sqrt(s2) # Sample SD m1 # Mean s2 # Sample Variance sd_s # Sample SD b1 # Sample Skewness b2 # Sample Kurtosis ## Arithmetic Returns using diff e_chf_ar <- diff(x_chf)/x_chf[-T] # arithmetic returns ## Functions. We can write a function to compute all the moments Mom_f <- function(dat_f) { #dat_f is not a real data; a placeholder, x <- dat_f n <- length(x) # Number of observations m1 <- sum(x)/n # Mean m2 <- sum((x-m1)^2)/n # Used in denominator of both m3 <- sum((x-m1)^3)/n # For numerator of S m4 <- sum((x-m1)^4)/n # For numerator of K b1 <- m3/m2^(3/2) # Sample Skewness b2 <- (m4/m2^2) # Sample Kurtosis s2 <- sum((x-m1)^2)/(n-1) # Sample Variance sd_s <- sqrt(s2) # Sample SD mom <- c(m1, sd_s, b1, b2) # to return more than one value in a function, need to # concatenate values with c() function or use list() function return(mom) } Mom_f(e_chf) Mom_f(e_chf_ar) # Use Mom_f function with a different dataset. Sh_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",") SP <- Sh_da$P # Extract P = S&P500 series T <- length(SP) lr <- log(SP[-1]/SP[-T]) # Define log returns Mom_f(lr)