Library

For those who have not installed tidyverse previously: please install and define library i.e. run the below line: install.packages("tidyverse")
library(tidyverse)
Note that installation may take a while (up to 25 minutes on the class computers)

For those who have installed tidyverse previously: please define library i.e. run line (you should do this each time you open new R session):
library(tidyverse)

Data frame

Creating data frame

Create FAANG data frame (Facebook, Amazon, Apple, Netflix, Google) that contains:

  • Date column (taken from fb data frame)
  • Close column for each company (closing price)

Get the data files: copy and run the below code to download the separate data frames for each company:

url <- "http://coin.wne.uw.edu.pl/gzakrzewski/data_for_students/"
fb <- read.csv2(paste0(url, "FB.csv"), header = TRUE,  sep = ",", dec = ".") 
amzn <- read.csv2(paste0(url,"AMZN.csv"), header = TRUE,  sep = ",", dec = ".") 
aapl <- read.csv2(paste0(url, "AAPL.csv"), header = TRUE,  sep = ",", dec = ".") 
nflx <- read.csv2(paste0(url,"NFLX.csv"), header = TRUE,  sep = ",", dec = ".") 
goog <- read.csv2(paste0(url,"GOOG.csv"), header = TRUE,  sep = ",", dec = ".") 
faang <- data.frame(date = fb$Date[1:10],
                    fb   = fb$Close[1:10],
                    amzn = amzn$Close[1:10],
                    aapl = aapl$Close[1:10],
                    nflx = nflx$Close[1:10],
                    goog = goog$Close[1:10])

Rate of return

Calculating rate of return

Arithmetic rate of return:
\(rr = \frac{price_t-price_{t-1}}{price_{t-1}}\)

Helpful functions:

  • lag(x) - returns the previous value in a vector
  • lead(x) - returns the next value in a vector

Example Create vector of returns based on Close vector in goog data frame:

# inspect goog data frame:
head(goog)
# create vector of returns based on Close vector in goog data frame:
goog_rr <- (goog$Close - lag(goog$Close)) / lag(goog$Close)

Example
If you need returns in the data frame, create new vector in faang using goog vector existing in faang. First inspect faang dataframe using head().

head(faang)
##         date    fb   amzn     aapl     nflx     goog
## 1 2014-02-24 70.78 351.78 75.36429 63.85714 602.3374
## 2 2014-02-25 69.85 358.32 74.58000 64.71858 606.0582
## 3 2014-02-26 69.26 359.80 73.90714 64.11285 606.1426
## 4 2014-02-27 68.94 360.13 75.38143 64.60429 605.6657
## 5 2014-02-28 68.46 362.10 75.17714 63.66143 603.8972
## 6 2014-03-03 67.41 359.78 75.39429 63.65572 597.4591

Then create new vector called goog_rr containing rate of return:

faang$goog_rr <- (faang$goog - lag(faang$goog)) / lag(faang$goog)
head(faang)
##         date    fb   amzn     aapl     nflx     goog       goog_rr
## 1 2014-02-24 70.78 351.78 75.36429 63.85714 602.3374            NA
## 2 2014-02-25 69.85 358.32 74.58000 64.71858 606.0582  0.0061772106
## 3 2014-02-26 69.26 359.80 73.90714 64.11285 606.1426  0.0001393794
## 4 2014-02-27 68.94 360.13 75.38143 64.60429 605.6657 -0.0007868263
## 5 2014-02-28 68.46 362.10 75.17714 63.66143 603.8972 -0.0029199160
## 6 2014-03-03 67.41 359.78 75.39429 63.65572 597.4591 -0.0106609383

Excercise 1
calculate logarithmic rate of return for Apple: \(rr = ln(\frac{price_t}{price_{t-1}})\)

aapl_rr <- log(faang$aapl / lag(faang$aapl))

Excercise 2
Create data frame faang_rr containing:

  • Date as a first vector
  • rates of returns (arithmetic) as a separate vectors for Facebook, Amazon, Apple and Netflix
faang_rr <- data.frame(date = fb$Date,
                       fb = (fb$Close - lag(fb$Close)) / lag(fb$Close),
                       amzn = (amzn$Close - lag(amzn$Close)) / lag(amzn$Close),
                       aapl = (aapl$Close - lag(aapl$Close)) / lag(aapl$Close),
                       nflx = (nflx$Close - lag(nflx$Close)) / lag(nflx$Close))

Excercise 2a
Add to data frame faang_rr rates of returns (arithmetic) for Google

faang_rr$goog <- (goog$Close - lag(goog$Close)) / lag(goog$Close)

Excercise 3
Create data frame faang_log_rr containings:

  • Date as a first vector
  • rates of returns (log) as a separate vectors for each FAANG company
faang_log_rr <- data.frame(date = fb$Date,
                       fb   = log(fb$Close    / lag(fb$Close)),
                       amzn = log(amzn$Close / lag(amzn$Close)),
                       aapl = log(aapl$Close / lag(aapl$Close)),
                       nflx = log(nflx$Close / lag(nflx$Close)),
                       goog = log(goog$Close / lag(goog$Close)))

Other helpful functions

Probabilities (optional)

  • Normal distribution:
  • density: dnorm(x, mean = 0, sd = 1, log = FALSE)
  • distribution: pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
  • quantile: qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
  • random deviates: rnorm(n, mean = 0, sd = 1)

The normal distribution density: \(f(x) = \frac{1}{\sqrt{2 πσ^2} } * e^{-\frac{(x - μ)^2}{2σ^2}}\)

# X ~ N(0.01, (0.05)^2)
mu_x <- 0.01
sigma_x <- 0.05

# Pr(X < -0.10)
pnorm(-0.10, mean = mu_x, sd = sigma_x)

# Pr(X > 0.10)
1 - pnorm(0.10, mean = mu_x, sd = sigma_x)

# Pr(-0.05 < X < 0.15)
pnorm(0.15, mean = mu_x, sd = sigma_x) - pnorm(-0.05, mean = mu_x, sd = sigma_x)

# Quantiles: 1%, 5%, 10%, 90%, 95% and 99% 
qnorm(c(0.01, 0.05, 0.1, 0.9, 0.95, 0.99), mean = mu_x, sd = sigma_x)
  • Quantile from empirical distribution: quantile(x, q)
# Create sample of 100 values from Normal distriburion with mean = 0 and sigma = 1
sample <- sort(rnorm(100)) # we sorted the data for better visualisation
# Calculate quantile
quantile(sample, 0.1)

Simulations (optional)

Drawing the distribution:

  • generate sequence of x, eg. from -5 till 5
  • use density function to generate the distribution
  • use plot() to draw the distribution
x <- seq(-5, 5, by = 0.01)
x_norm <- dnorm(x, mean = 0, sd = 1)
plot(x_norm)

x_norm <- dnorm(x, mean = 0, sd = 2)
plot(x_norm)

x_norm <- dnorm(x, mean = 0.05, sd = 0.25)
plot(x_norm)

# Random normal variables mean = 0, sd = 1
y_norm  <- rnorm(1000)
plot(y_norm)

# plot(rnorm(100))

Monte Carlo (optional)

Calculate \(\int_0^1(x)=x^3\)

Monte Carlo solution:

  • Create a sequence of pairs of random variables coming from uniform dsitribution (x, y): for generating x and y use rununif() function.
  • check the value of f(x) vs. value of y.
## [1] 0.235
n = 100
x <- runif(n, min = 0, max = 1)
y <- runif(n, min = 0, max = 1)

integral <- sum(y < x ^ 3) / n
# note that condition (y < x ^ 3) returns bolean vector of TRUE i.e. 1 or FALSE i.e. 0 for each elements of vector x and y
integral

Excercise 4
Calculate \(\int_0^{10}(x)=5+4*\frac{x^3}{e^x}*sin(x)\)

n <- 1000
x <- runif(n, 0, 10)
y <- runif(n, 0, 10)

integral <- sum(y < 5 + 4 * x ^ 3/ exp(x) * sin(x)) / (n)
integral
## [1] 0.471

Own functions

Writing simple functions

Calculate PV of CF = 1000 paid at the end of 5th year. Discount rate is 8%.

\(PV = \frac{CF}{(1+i)^t}\)

pv = 1000 / (1 + 0.08) ^ 5
pv
## [1] 680.5832

How to automate calculations in order not to duplicate work: write a function calculationg PV

function_name <- function(par1, par2, ...) {body of the function}

Body part contains actual computations the function should perform.

# the function for single payment, t defined as number of years:
get_PV <- function(cf, i, t) {
  cf / (1 + i) ^ t
}
# cf - cash flow, i - discount rate, t - time.
get_PV(1000, 0.08, 5)
## [1] 680.5832
get_PV(1000, 0.06, 5)
## [1] 747.2582

Excercise 5
Write function calculating present value of a yearly payments. Function should take two arguments:

  • vector of cash flows cf, eg. cf <- c(12, 12, 12, 12, 12, 112)
  • discount rate i, eg. i = 0.06
  • payments are made at end of consecutive years.

Note: length(x) returns the length of vector x (number of elements)

# the vector for sequence of payments:
cf <- c(12, 12, 12, 12, 12, 112)
# the vector of years:
t <- seq(1:length(cf))
t
## [1] 1 2 3 4 5 6

Instead of t we use directly seq(1:length(cf)) in the formula:

get_PV_annual_CF <- function(cf, i) {
  sum(cf / (1 + i) ^ seq(1:length(cf)))
}

get_PV_annual_CF(cf, 0.06)
## [1] 129.5039

Excercise 6
Write function calculating present value of a payments with different frequency of capitalisation (m). Function should take arguments:

  • vector of cash flows cf, eg. cf <- c(3, 3, 3, 3, 3, 3, 3, 3, 112)
  • discount rate i, eg. i = 0.06
  • frequency of capitalisation (m) within a year, eg. m = 4 for quarterly payments, m = 12 for monthly payments.
# the function for sequence of payments:
get_PV_CF <- function(cf, i, m = 1) {
  sum(cf / (1 + i / m) ^ (m * seq(1:length(cf))))
}
cf <- c(3, 3, 3, 3, 3, 3, 3, 3, 112)
get_PV_CF(cf, 0.015, 4)
## [1] 120.3304

Note that you may define default value of the argument, in the get_PV_CF function m is set to 1 by default. It means that if you do not put n argument when executing the function, R will calculate for m=1 (i.e. annual payments). Define function get_PV_CF and try for:

cf <- c(3, 3, 3, 3, 3, 3, 3, 3, 112)
get_PV_CF(cf, 0.015)
## [1] 120.4121

Plotting functions

Simple way with curve()

Simple way of ploting a function:

  • curve(f(x), from = , to = ) draws a curve corresponding to a function over the interval [from, to]

Excercise 7
Plot \(f(x) = 2x ^ 2 - 3x + 4,\) in the interval [-5, 10]

curve(2 * x ^ 2 - 3 * x + 4, -5, 10)

Excercise 8

Polt \(f(x)=(cos(x)+tan(x), -10, 3 * \pi)\) in the interval [-1, 3 \(\pi\) ]

curve(cos(x)+tan(x), -10, 3 * pi)

Example
To plot two functions on one chart use argument add = TRUE in the curve() function.

curve(dnorm(x, mean = 0, sd = 0.5), -5, 5)
curve(dnorm(x, mean = 0, sd = 1), -5, 5, add = TRUE, col = "red")
curve(dnorm(x, mean = 0, sd = 2), -5, 5, add = TRUE, col = "green")

Excercise 9

Plot the PV of CF = 1000 paid at the end of 15th year for discount rates ranging from 0% up to 15%. Add to the plot:

  • blue line presenting PV of CF = 1000 paid at the end of 8th year for discount rates ranging from 0% up to 15%.
  • green line presenting PV of CF = 1000 paid at the end of 4th year for discount rates ranging from 0% up to 15%.
curve(get_PV(1000, x, 15), 0, 0.15)
curve(get_PV(1000, x, 8), 0, 0.15, add = TRUE, col = "blue")
curve(get_PV(1000, x, 4), 0, 0.15, add = TRUE, col = "green")

Using plot() function

plot() is a basic way to plot objects. To create user defined layout use arguments (for more details see ?plot):

  • type: visualisation type
  • col: colour of series
  • ylab: y label
  • ylim: y axis scale
head(faang)
##         date    fb   amzn     aapl     nflx     goog       goog_rr
## 1 2014-02-24 70.78 351.78 75.36429 63.85714 602.3374            NA
## 2 2014-02-25 69.85 358.32 74.58000 64.71858 606.0582  0.0061772106
## 3 2014-02-26 69.26 359.80 73.90714 64.11285 606.1426  0.0001393794
## 4 2014-02-27 68.94 360.13 75.38143 64.60429 605.6657 -0.0007868263
## 5 2014-02-28 68.46 362.10 75.17714 63.66143 603.8972 -0.0029199160
## 6 2014-03-03 67.41 359.78 75.39429 63.65572 597.4591 -0.0106609383
plot(x = fb$Close,
     type = "l",
     ylab = "stock price",
     col = "red",
     ylim = c(50, 250))
lines(aapl$Close, type = "l", col = "blue",  lwd = 2)

Excercise 10
Plot rate of return of Apple.
Hint: use existing faang_rr data frame

plot(x = faang_rr$aapl,
     type = "l",
     ylab = "stock price",
     col = "red",
     main = "Apple stock RR")

Solving equations

For solving simple equations we can use uniroot(f, lower = , upper = ) function. The function uniroot searches the interval from lower to upper for a root (i.e., zero) of the function f with respect to its first argument.

Examples:
Find solutions for:

  • \(2x - 4 = 0\)
  • \(3\sqrt{log(x)} - 4x = 0\)
f <- function(x) {2 * x - 4}
curve(f, 0, 4)

uniroot(f, lower = -2, upper = 4)$root
## [1] 2
f <- function(x) {3 * sqrt(log(x))  - 4}
curve(f, 1, 10)

uniroot(f, lower = 1, upper = 10)$root
## [1] 5.916694

Excercise 11
Find x that solves the equation:
\(-97+3*x^{-1}+3*x^{-2}+103*x^{-3}=0\)

f <- function(x) {-97+3*x^-1+3*x^-2+103*x^-3}
curve(f, 0.8, 1.8)

uniroot(f, lower = 1, upper = 10)$root

Matrix

Matrix: definition

Matrix as a special case of a two-dimensional array

A matrix is the special case of a two-dimensional array, where the objects are of the same type (note that in data frame the vectors may be of the different data types: numertic, character, …)

How to create matrix:

  • most frequently used way in our examples: binding consecutive vectors into using cbind function:
vector_1 <- seq(1, 5, by = 1)
vector_2 <- seq(101, 105, by = 1)
vector_3 <- seq(-5, -1, by = 1)

my_matrix <- cbind(vector_1, vector_2, vector_3)
my_matrix
##      vector_1 vector_2 vector_3
## [1,]        1      101       -5
## [2,]        2      102       -4
## [3,]        3      103       -3
## [4,]        4      104       -2
## [5,]        5      105       -1
  • You can use matrix function creating matrix from single vector (sequence):
vector_4 <- seq(1, 15)
my_matrix_2 <- matrix(data = vector_4, nrow = 5, ncol = 3) # put values by column
my_matrix_2
my_matrix_2 <- matrix(data = vector_4, nrow = 5, ncol = 3, byrow = TRUE) # put values by row
my_matrix_2

Matrix: column names and selection

Column names and selection of values from matrix

Create FAANG matrix (Facebook, Amazon, Apple, Netflix, Google):
Hint: Copy and run the below code to download the data frames (unless you did it previously and have not changed the files):

url <- "http://coin.wne.uw.edu.pl/gzakrzewski/data_for_students/"
fb <- read.csv2(paste0(url, "FB.csv"), header = TRUE,  sep = ",", dec = ".") 
amzn <- read.csv2(paste0(url,"AMZN.csv"), header = TRUE,  sep = ",", dec = ".") 
aapl <- read.csv2(paste0(url, "AAPL.csv"), header = TRUE,  sep = ",", dec = ".") 
nflx <- read.csv2(paste0(url,"NFLX.csv"), header = TRUE,  sep = ",", dec = ".") 
goog <- read.csv2(paste0(url,"GOOG.csv"), header = TRUE,  sep = ",", dec = ".") 

Create matrix consosting of first 20 observations of Close price of FAANG companies.

faang <- cbind(fb$Close[1:10],
              amzn$Close[1:10],
              aapl$Close[1:10],
              nflx$Close[1:10],
              goog$Close[1:10])
faang

Name the column of the matrix:

colnames(faang) <- c("fb", "amzn", "aapl", "nflx", "goog")
faang

Selecting:

  • column: faang[ , 3], faang[ , "aapl"]
  • row: faang[4, ]
  • element: intersection of n row and m column faang[n, m]
faang[ , 5]
faang[ , "goog"]
faang[3, 4]

Inspecting the matrix:

  • head(matrix) - prints begining of matrix
  • tail(matrix) - pronts last rows of matrix
head(faang)
tail(faang)

Matrix simple operations

Matrix multiplication %*%

Assme there are two matrices (v1 and m1) defined:

v1 
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
m1
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    5    9   13   17
## [2,]    2    6   10   14   18
## [3,]    3    7   11   15   19
## [4,]    4    8   12   16   20

Use %*% to multiply the matrices:

v_m <- v1 %*% m1
v_m
##      [,1] [,2] [,3] [,4] [,5]
## [1,]   30   70  110  150  190

Correlation

For correlation calculation one may use cor() function. Example of use for two variables: (we will use faang_rr dataframe created at the begining of the lesson)

# Inspect faang_rr:
head(faang_rr)
##         date           fb          amzn         aapl          nflx
## 1 2014-02-24           NA            NA           NA            NA
## 2 2014-02-25 -0.013139319  0.0185911877 -0.010406600  1.348999e-02
## 3 2014-02-26 -0.008446614  0.0041303331 -0.009021976 -9.359307e-03
## 4 2014-02-27 -0.004620271  0.0009172235  0.019947855  7.665109e-03
## 5 2014-02-28 -0.006962619  0.0054702496 -0.002710124 -1.459433e-02
## 6 2014-03-03 -0.015337351 -0.0064070891  0.002888484 -8.975607e-05
##            goog
## 1            NA
## 2  0.0061772106
## 3  0.0001393794
## 4 -0.0007868263
## 5 -0.0029199160
## 6 -0.0106609383
# Create data frame consisting of two vectors of our interest (Apple and Facebook rates of return):
df <- faang_rr[-1, c(2, 4)] # "-" before the column or row number remove the column / row
# Plot the rates of return against each other to asses correlation visualy:
plot(df)

# Calculate correlations:
cor(df)
##             fb      aapl
## fb   1.0000000 0.4361795
## aapl 0.4361795 1.0000000

Example of use for multi variables:

# Create data frame consisting of all companies (this time keep rates of return for all the FAANG comapnies but remove first row and first column):
df <- faang_rr[-1, -1] # "-" before the column or row number remove the column / row
  
plot(df)

cor(df)
##             fb      amzn      aapl      nflx      goog
## fb   1.0000000 0.5538736 0.4361795 0.4122462 0.6027777
## amzn 0.5538736 1.0000000 0.4557642 0.4701487 0.6460855
## aapl 0.4361795 0.4557642 1.0000000 0.3474320 0.4968930
## nflx 0.4122462 0.4701487 0.3474320 1.0000000 0.4761786
## goog 0.6027777 0.6460855 0.4968930 0.4761786 1.0000000

Excercise 12

  1. Calculate correlations between log rate of returns of FAANG companies.
  2. Create vector containing correlations of Apple with other FAANG companies.
# Create data frame consisting of two vectors of our interest (this time rates of return for all the FAANG comapnies):
df <- faang_log_rr[-1, -1] # "-" before the column or row number remove the column / row
  
# plot(df)

cor(df)
# select rasults for Apple:
cor(df)[ , 3] 
cor(df)[3 , ] 
cor(df)["aapl", ]