For those who have not installed tidyverse previously: please install and define library i.e. run the below line: install.packages("tidyverse")
library(tidyverse)
Note that installation may take a while (up to 25 minutes on the class computers)
For those who have installed tidyverse previously: please define library i.e. run line (you should do this each time you open new R session):
library(tidyverse)
Creating data frame
Create FAANG data frame (Facebook, Amazon, Apple, Netflix, Google) that contains:
Date
column (taken from fb data frame)Close
column for each company (closing price)Get the data files: copy and run the below code to download the separate data frames for each company:
url <- "http://coin.wne.uw.edu.pl/gzakrzewski/data_for_students/"
fb <- read.csv2(paste0(url, "FB.csv"), header = TRUE, sep = ",", dec = ".")
amzn <- read.csv2(paste0(url,"AMZN.csv"), header = TRUE, sep = ",", dec = ".")
aapl <- read.csv2(paste0(url, "AAPL.csv"), header = TRUE, sep = ",", dec = ".")
nflx <- read.csv2(paste0(url,"NFLX.csv"), header = TRUE, sep = ",", dec = ".")
goog <- read.csv2(paste0(url,"GOOG.csv"), header = TRUE, sep = ",", dec = ".")
faang <- data.frame(date = fb$Date[1:10],
fb = fb$Close[1:10],
amzn = amzn$Close[1:10],
aapl = aapl$Close[1:10],
nflx = nflx$Close[1:10],
goog = goog$Close[1:10])
Calculating rate of return
Arithmetic rate of return:
\(rr = \frac{price_t-price_{t-1}}{price_{t-1}}\)
Helpful functions:
lag(x)
- returns the previous value in a vectorlead(x)
- returns the next value in a vectorExample Create vector of returns based on Close
vector in goog
data frame:
# inspect goog data frame:
head(goog)
# create vector of returns based on Close vector in goog data frame:
goog_rr <- (goog$Close - lag(goog$Close)) / lag(goog$Close)
Example
If you need returns in the data frame, create new vector in faang
using goog
vector existing in faang
. First inspect faang dataframe using head()
.
head(faang)
## date fb amzn aapl nflx goog
## 1 2014-02-24 70.78 351.78 75.36429 63.85714 602.3374
## 2 2014-02-25 69.85 358.32 74.58000 64.71858 606.0582
## 3 2014-02-26 69.26 359.80 73.90714 64.11285 606.1426
## 4 2014-02-27 68.94 360.13 75.38143 64.60429 605.6657
## 5 2014-02-28 68.46 362.10 75.17714 63.66143 603.8972
## 6 2014-03-03 67.41 359.78 75.39429 63.65572 597.4591
Then create new vector called goog_rr
containing rate of return:
faang$goog_rr <- (faang$goog - lag(faang$goog)) / lag(faang$goog)
head(faang)
## date fb amzn aapl nflx goog goog_rr
## 1 2014-02-24 70.78 351.78 75.36429 63.85714 602.3374 NA
## 2 2014-02-25 69.85 358.32 74.58000 64.71858 606.0582 0.0061772106
## 3 2014-02-26 69.26 359.80 73.90714 64.11285 606.1426 0.0001393794
## 4 2014-02-27 68.94 360.13 75.38143 64.60429 605.6657 -0.0007868263
## 5 2014-02-28 68.46 362.10 75.17714 63.66143 603.8972 -0.0029199160
## 6 2014-03-03 67.41 359.78 75.39429 63.65572 597.4591 -0.0106609383
Excercise 1
calculate logarithmic rate of return for Apple: \(rr = ln(\frac{price_t}{price_{t-1}})\)
aapl_rr <- log(faang$aapl / lag(faang$aapl))
Excercise 2
Create data frame faang_rr
containing:
Date
as a first vectorfaang_rr <- data.frame(date = fb$Date,
fb = (fb$Close - lag(fb$Close)) / lag(fb$Close),
amzn = (amzn$Close - lag(amzn$Close)) / lag(amzn$Close),
aapl = (aapl$Close - lag(aapl$Close)) / lag(aapl$Close),
nflx = (nflx$Close - lag(nflx$Close)) / lag(nflx$Close))
Excercise 2a
Add to data frame faang_rr
rates of returns (arithmetic) for Google
faang_rr$goog <- (goog$Close - lag(goog$Close)) / lag(goog$Close)
Excercise 3
Create data frame faang_log_rr
containings:
Date
as a first vectorfaang_log_rr <- data.frame(date = fb$Date,
fb = log(fb$Close / lag(fb$Close)),
amzn = log(amzn$Close / lag(amzn$Close)),
aapl = log(aapl$Close / lag(aapl$Close)),
nflx = log(nflx$Close / lag(nflx$Close)),
goog = log(goog$Close / lag(goog$Close)))
dnorm(x, mean = 0, sd = 1, log = FALSE)
pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
rnorm(n, mean = 0, sd = 1)
The normal distribution density: \(f(x) = \frac{1}{\sqrt{2 πσ^2} } * e^{-\frac{(x - μ)^2}{2σ^2}}\)
# X ~ N(0.01, (0.05)^2)
mu_x <- 0.01
sigma_x <- 0.05
# Pr(X < -0.10)
pnorm(-0.10, mean = mu_x, sd = sigma_x)
# Pr(X > 0.10)
1 - pnorm(0.10, mean = mu_x, sd = sigma_x)
# Pr(-0.05 < X < 0.15)
pnorm(0.15, mean = mu_x, sd = sigma_x) - pnorm(-0.05, mean = mu_x, sd = sigma_x)
# Quantiles: 1%, 5%, 10%, 90%, 95% and 99%
qnorm(c(0.01, 0.05, 0.1, 0.9, 0.95, 0.99), mean = mu_x, sd = sigma_x)
quantile(x, q)
# Create sample of 100 values from Normal distriburion with mean = 0 and sigma = 1
sample <- sort(rnorm(100)) # we sorted the data for better visualisation
# Calculate quantile
quantile(sample, 0.1)
Drawing the distribution:
x
, eg. from -5 till 5plot()
to draw the distributionx <- seq(-5, 5, by = 0.01)
x_norm <- dnorm(x, mean = 0, sd = 1)
plot(x_norm)
x_norm <- dnorm(x, mean = 0, sd = 2)
plot(x_norm)
x_norm <- dnorm(x, mean = 0.05, sd = 0.25)
plot(x_norm)
# Random normal variables mean = 0, sd = 1
y_norm <- rnorm(1000)
plot(y_norm)
# plot(rnorm(100))
Calculate \(\int_0^1(x)=x^3\)
Monte Carlo solution:
rununif()
function.## [1] 0.235
n = 100
x <- runif(n, min = 0, max = 1)
y <- runif(n, min = 0, max = 1)
integral <- sum(y < x ^ 3) / n
# note that condition (y < x ^ 3) returns bolean vector of TRUE i.e. 1 or FALSE i.e. 0 for each elements of vector x and y
integral
Excercise 4
Calculate \(\int_0^{10}(x)=5+4*\frac{x^3}{e^x}*sin(x)\)
n <- 1000
x <- runif(n, 0, 10)
y <- runif(n, 0, 10)
integral <- sum(y < 5 + 4 * x ^ 3/ exp(x) * sin(x)) / (n)
integral
## [1] 0.471
Writing simple functions
Calculate PV of CF = 1000 paid at the end of 5th year. Discount rate is 8%.
\(PV = \frac{CF}{(1+i)^t}\)
pv = 1000 / (1 + 0.08) ^ 5
pv
## [1] 680.5832
How to automate calculations in order not to duplicate work: write a function calculationg PV
function_name <- function(par1, par2, ...) {body of the function}
Body part contains actual computations the function should perform.
# the function for single payment, t defined as number of years:
get_PV <- function(cf, i, t) {
cf / (1 + i) ^ t
}
# cf - cash flow, i - discount rate, t - time.
get_PV(1000, 0.08, 5)
## [1] 680.5832
get_PV(1000, 0.06, 5)
## [1] 747.2582
Excercise 5
Write function calculating present value of a yearly payments. Function should take two arguments:
cf
, eg. cf <- c(12, 12, 12, 12, 12, 112)
i
, eg. i = 0.06
Note: length(x)
returns the length of vector x (number of elements)
# the vector for sequence of payments:
cf <- c(12, 12, 12, 12, 12, 112)
# the vector of years:
t <- seq(1:length(cf))
t
## [1] 1 2 3 4 5 6
Instead of t
we use directly seq(1:length(cf))
in the formula:
get_PV_annual_CF <- function(cf, i) {
sum(cf / (1 + i) ^ seq(1:length(cf)))
}
get_PV_annual_CF(cf, 0.06)
## [1] 129.5039
Excercise 6
Write function calculating present value of a payments with different frequency of capitalisation (m). Function should take arguments:
cf
, eg. cf <- c(3, 3, 3, 3, 3, 3, 3, 3, 112)
i
, eg. i = 0.06
m = 4
for quarterly payments, m = 12
for monthly payments.# the function for sequence of payments:
get_PV_CF <- function(cf, i, m = 1) {
sum(cf / (1 + i / m) ^ (m * seq(1:length(cf))))
}
cf <- c(3, 3, 3, 3, 3, 3, 3, 3, 112)
get_PV_CF(cf, 0.015, 4)
## [1] 120.3304
Note that you may define default value of the argument, in the get_PV_CF function m
is set to 1 by default. It means that if you do not put n argument when executing the function, R will calculate for m=1 (i.e. annual payments). Define function get_PV_CF
and try for:
cf <- c(3, 3, 3, 3, 3, 3, 3, 3, 112)
get_PV_CF(cf, 0.015)
## [1] 120.4121
curve()
Simple way of ploting a function:
curve(f(x), from = , to = )
draws a curve corresponding to a function over the interval [from, to]Excercise 7
Plot \(f(x) = 2x ^ 2 - 3x + 4,\) in the interval [-5, 10]
curve(2 * x ^ 2 - 3 * x + 4, -5, 10)
Excercise 8
Polt \(f(x)=(cos(x)+tan(x), -10, 3 * \pi)\) in the interval [-1, 3 \(\pi\) ]
curve(cos(x)+tan(x), -10, 3 * pi)
Example
To plot two functions on one chart use argument add = TRUE
in the curve()
function.
curve(dnorm(x, mean = 0, sd = 0.5), -5, 5)
curve(dnorm(x, mean = 0, sd = 1), -5, 5, add = TRUE, col = "red")
curve(dnorm(x, mean = 0, sd = 2), -5, 5, add = TRUE, col = "green")
Excercise 9
Plot the PV of CF = 1000 paid at the end of 15th year for discount rates ranging from 0% up to 15%. Add to the plot:
curve(get_PV(1000, x, 15), 0, 0.15)
curve(get_PV(1000, x, 8), 0, 0.15, add = TRUE, col = "blue")
curve(get_PV(1000, x, 4), 0, 0.15, add = TRUE, col = "green")
plot()
functionplot()
is a basic way to plot objects. To create user defined layout use arguments (for more details see ?plot
):
type
: visualisation typecol
: colour of seriesylab
: y labelylim
: y axis scalehead(faang)
## date fb amzn aapl nflx goog goog_rr
## 1 2014-02-24 70.78 351.78 75.36429 63.85714 602.3374 NA
## 2 2014-02-25 69.85 358.32 74.58000 64.71858 606.0582 0.0061772106
## 3 2014-02-26 69.26 359.80 73.90714 64.11285 606.1426 0.0001393794
## 4 2014-02-27 68.94 360.13 75.38143 64.60429 605.6657 -0.0007868263
## 5 2014-02-28 68.46 362.10 75.17714 63.66143 603.8972 -0.0029199160
## 6 2014-03-03 67.41 359.78 75.39429 63.65572 597.4591 -0.0106609383
plot(x = fb$Close,
type = "l",
ylab = "stock price",
col = "red",
ylim = c(50, 250))
lines(aapl$Close, type = "l", col = "blue", lwd = 2)
Excercise 10
Plot rate of return of Apple.
Hint: use existing faang_rr
data frame
plot(x = faang_rr$aapl,
type = "l",
ylab = "stock price",
col = "red",
main = "Apple stock RR")
For solving simple equations we can use uniroot(f, lower = , upper = )
function. The function uniroot searches the interval from lower to upper for a root (i.e., zero) of the function f with respect to its first argument.
Examples:
Find solutions for:
f <- function(x) {2 * x - 4}
curve(f, 0, 4)
uniroot(f, lower = -2, upper = 4)$root
## [1] 2
f <- function(x) {3 * sqrt(log(x)) - 4}
curve(f, 1, 10)
uniroot(f, lower = 1, upper = 10)$root
## [1] 5.916694
Excercise 11
Find x that solves the equation:
\(-97+3*x^{-1}+3*x^{-2}+103*x^{-3}=0\)
f <- function(x) {-97+3*x^-1+3*x^-2+103*x^-3}
curve(f, 0.8, 1.8)
uniroot(f, lower = 1, upper = 10)$root
Matrix as a special case of a two-dimensional array
A matrix is the special case of a two-dimensional array, where the objects are of the same type (note that in data frame the vectors may be of the different data types: numertic, character, …)
How to create matrix:
cbind
function:vector_1 <- seq(1, 5, by = 1)
vector_2 <- seq(101, 105, by = 1)
vector_3 <- seq(-5, -1, by = 1)
my_matrix <- cbind(vector_1, vector_2, vector_3)
my_matrix
## vector_1 vector_2 vector_3
## [1,] 1 101 -5
## [2,] 2 102 -4
## [3,] 3 103 -3
## [4,] 4 104 -2
## [5,] 5 105 -1
matrix
function creating matrix from single vector (sequence):vector_4 <- seq(1, 15)
my_matrix_2 <- matrix(data = vector_4, nrow = 5, ncol = 3) # put values by column
my_matrix_2
my_matrix_2 <- matrix(data = vector_4, nrow = 5, ncol = 3, byrow = TRUE) # put values by row
my_matrix_2
Column names and selection of values from matrix
Create FAANG matrix (Facebook, Amazon, Apple, Netflix, Google):
Hint: Copy and run the below code to download the data frames (unless you did it previously and have not changed the files):
url <- "http://coin.wne.uw.edu.pl/gzakrzewski/data_for_students/"
fb <- read.csv2(paste0(url, "FB.csv"), header = TRUE, sep = ",", dec = ".")
amzn <- read.csv2(paste0(url,"AMZN.csv"), header = TRUE, sep = ",", dec = ".")
aapl <- read.csv2(paste0(url, "AAPL.csv"), header = TRUE, sep = ",", dec = ".")
nflx <- read.csv2(paste0(url,"NFLX.csv"), header = TRUE, sep = ",", dec = ".")
goog <- read.csv2(paste0(url,"GOOG.csv"), header = TRUE, sep = ",", dec = ".")
Create matrix consosting of first 20 observations of Close price of FAANG companies.
faang <- cbind(fb$Close[1:10],
amzn$Close[1:10],
aapl$Close[1:10],
nflx$Close[1:10],
goog$Close[1:10])
faang
Name the column of the matrix:
colnames(faang) <- c("fb", "amzn", "aapl", "nflx", "goog")
faang
Selecting:
faang[ , 3]
, faang[ , "aapl"]
faang[4, ]
n
row and m
column faang[n, m]
faang[ , 5]
faang[ , "goog"]
faang[3, 4]
Inspecting the matrix:
head(matrix)
- prints begining of matrixtail(matrix)
- pronts last rows of matrixhead(faang)
tail(faang)
Matrix multiplication %*%
Assme there are two matrices (v1 and m1) defined:
v1
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
m1
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 5 9 13 17
## [2,] 2 6 10 14 18
## [3,] 3 7 11 15 19
## [4,] 4 8 12 16 20
Use %*%
to multiply the matrices:
v_m <- v1 %*% m1
v_m
## [,1] [,2] [,3] [,4] [,5]
## [1,] 30 70 110 150 190
For correlation calculation one may use cor()
function. Example of use for two variables: (we will use faang_rr dataframe created at the begining of the lesson)
# Inspect faang_rr:
head(faang_rr)
## date fb amzn aapl nflx
## 1 2014-02-24 NA NA NA NA
## 2 2014-02-25 -0.013139319 0.0185911877 -0.010406600 1.348999e-02
## 3 2014-02-26 -0.008446614 0.0041303331 -0.009021976 -9.359307e-03
## 4 2014-02-27 -0.004620271 0.0009172235 0.019947855 7.665109e-03
## 5 2014-02-28 -0.006962619 0.0054702496 -0.002710124 -1.459433e-02
## 6 2014-03-03 -0.015337351 -0.0064070891 0.002888484 -8.975607e-05
## goog
## 1 NA
## 2 0.0061772106
## 3 0.0001393794
## 4 -0.0007868263
## 5 -0.0029199160
## 6 -0.0106609383
# Create data frame consisting of two vectors of our interest (Apple and Facebook rates of return):
df <- faang_rr[-1, c(2, 4)] # "-" before the column or row number remove the column / row
# Plot the rates of return against each other to asses correlation visualy:
plot(df)
# Calculate correlations:
cor(df)
## fb aapl
## fb 1.0000000 0.4361795
## aapl 0.4361795 1.0000000
Example of use for multi variables:
# Create data frame consisting of all companies (this time keep rates of return for all the FAANG comapnies but remove first row and first column):
df <- faang_rr[-1, -1] # "-" before the column or row number remove the column / row
plot(df)
cor(df)
## fb amzn aapl nflx goog
## fb 1.0000000 0.5538736 0.4361795 0.4122462 0.6027777
## amzn 0.5538736 1.0000000 0.4557642 0.4701487 0.6460855
## aapl 0.4361795 0.4557642 1.0000000 0.3474320 0.4968930
## nflx 0.4122462 0.4701487 0.3474320 1.0000000 0.4761786
## goog 0.6027777 0.6460855 0.4968930 0.4761786 1.0000000
Excercise 12
# Create data frame consisting of two vectors of our interest (this time rates of return for all the FAANG comapnies):
df <- faang_log_rr[-1, -1] # "-" before the column or row number remove the column / row
# plot(df)
cor(df)
# select rasults for Apple:
cor(df)[ , 3]
cor(df)[3 , ]
cor(df)["aapl", ]