# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# World Bank Data using WDI Package
# path: ~/ownCloud/
# file_name:
# files_used:
# files_created
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
rm(list = ls(all = TRUE))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# packages
#install.packages("httr")
#install.packages("XML")
#install.packages('WDI')
#install.packages("magrittr")
#install.packages("tidyverse")
#install.packages("quantmod")
#install.packages("PerformanceAnalytics")
#install.packages("tidyquant")
library("httr")
library("XML")
library("magrittr")
library("tidyverse")
library("WDI")
library("quantmod")
library("PerformanceAnalytics")
#library("tidyquant")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
system("ls")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# using quantmod to collect data
# selecting the date while downloading
MCD <- quantmod::getSymbols("MCD",src = "yahoo",
auto.assign=FALSE,
from = "2007-06-01",
to = "2012-07-01" )
head(MCD)
tail(MCD)
# or download the available data and selecting later
MCD <- quantmod::getSymbols("MCD",src = "yahoo",
auto.assign=FALSE )
head(MCD)
tail(MCD)
MCD <- MCD["2007-06-01/2012-07-01"]
head(MCD)
tail(MCD)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# download monthly data
MCD <- quantmod::getSymbols("MCD",src = "yahoo",
auto.assign=FALSE,
from = "2007-06-01",
to = "2012-07-01",
periodicity = "monthly" )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# alternative ways
#stock_list <- c( "IBM", "MCD" )
#
#stocks_weekly <- tq_get(stock_list,
# from = start_date,
# to = end_date,
# periodicity = "weekly")
#stocks_weekly
#install.packages("BatchGetSymbols")
#library(BatchGetSymbols)
#stocks <- BatchGetSymbols( c( "IBM", "MCD" ),
# first.date = "2007-06-01",
# last.date = "2012-07-01",
# freq.data = "monthly",
# how.to.aggregate = 'first')
#stocks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# for now lets use the example time series
IBM <- read.csv("IBM.csv")
MCD <- read.csv("MCD.csv")
head(IBM)
IBM$Adj.Close
MCD$Adj.Close
data <- data.frame(IBM$Adj.Close,MCD$Adj.Close)
data[,3] <- NA
data[,4] <- NA
data
log( IBM$Adj.Close[2] / IBM$Adj.Close[1] )
# log computes logarithms, by default natural logarithms,
# log10 computes common (i.e., base 10) logarithms, and
# log2 computes binary (i.e., base 2) logarithms.
for(i in 2: length(data[,1]) ){
data[i,3] <- ( log( data[i,1] / data[i-1,1] ) )
}
data
for(i in 2: length(data[,2]) ){
data[i,4] <- ( log( data[i,2] / data[i-1,2] ) )
}
data
colnames(data)[3] <- "IBM.Adj.roc"
colnames(data)[4] <- "MCD.Adj.roc"
head( data )
#in comparison: the ROC function from the quantmod package
# computes the standard deviation of the values in x. If na.rm is TRUE
# then missing values are removed before computation proceeds.
ROC( data$IBM.Adj.Close )
str( as.numeric( data$IBM.Adj.roc[2] ) )
str( as.numeric( ROC( data$IBM.Adj.Close )[2] ) )
as.numeric( ROC( data$IBM.Adj.Close ) ) == as.numeric( data$IBM.Adj.roc )
as.numeric( ROC( data$IBM.Adj.Close )[2] ) == as.numeric( data$IBM.Adj.roc[2] )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# why are they not exactly the same? Starting with the 16th digit the
# numbers differ.
# the continuous formulation of ROC is : roc <- diff(log(x), n, na.pad = na.pad)
# this gives the small difference compared to what we calculated first
# log( t / (t-1) )
# lets try the same
diff( log( data$IBM.Adj.Close ) )
ROC( data$IBM.Adj.Close )[2:length(data$IBM.Adj.Close)] ==
diff( log( data$IBM.Adj.Close ) )
# now it is exactly the same, but for practical reasons such
# minuscule differences do not really matter.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# if we use the first 5 digits the numbers are exactly the same
print( ROC( data$IBM.Adj.Close )[2], digits = 20 )
print( as.numeric( data$IBM.Adj.roc )[2], digits = 20 )
round( as.numeric( data$IBM.Adj.roc ), 5)
round( as.numeric( ROC( data$IBM.Adj.Close ) ), 5 )
round( as.numeric( data$IBM.Adj.roc ), 5) ==
round( as.numeric( ROC( data$IBM.Adj.Close ) ), 5 )
#options( digits = 10 ) # Modify global options
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# the data preparation phase is somewhat more intensive if using
# a programming tool like R but in the long run i think
# i pays off by having more accessible data that can be processed in a
# much faster way especially if it comes to big data applications
summary( data )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating the mean with a function and out of the data
mean( na.omit( data[,3] ) )
mean( na.omit( data[,4] ) )
sum( na.omit( data[,3] ) ) / length( na.omit( data[,3] ) )
sum( na.omit( data[,4] ) ) / length( na.omit( data[,4] ) )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas
# Mean = \frac{1}{N} \sum_{i=1}^{N} r_i # population and sample mean
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
var( na.omit( data[,3] ) )
data[,3] - mean( na.omit( data[,3] ) ) # demeaning
( data[,3] - mean( na.omit( data[,3] ) ) )^2 # power of 2
sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) # sum with drop NA
sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) /
( length( na.omit( data[,3] ) ) - 1 )
# NA is also omitted for length and we use the sample variance
# therefore we divide by (n-1)
var( na.omit( data[,4] ) )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas
# Var.p = \frac{1}{N} \sum_{i=1}^{N} (r_i-\bar{r})^2
# Var.s = \frac{1}{N-1} \sum_{i=1}^{N} (r_i-\bar{r})^2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
# stats:: calls the package directly
# this is not really necessary unless there are functions with the
# same name in different packages. Then one might be masked be the other
# SD IBM
stats::sd( data[,3] , na.rm = TRUE )
sd( data[,3] , na.rm = TRUE )
# the squqre root of the variance gives us the standard deviation
sqrt( var( na.omit( data[,3] ) ) )
# the long way:
sqrt(
sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) /
( length( na.omit( data[,3] ) ) - 1 )
)
# SD MCD
stats::sd( data[,4] , na.rm = TRUE )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas standard deviation
#Stdev.p = \sqrt{ \frac{1}{N} \sum_{i=1}^{N} (r_i-\bar{r})^2 }
#Stdev.s = \sqrt{ \frac{1}{N-1} \sum_{i=1}^{N} (r_i-\bar{r})^2 }
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculate covariance
cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) )
# this gives the sample covariance
# calculation by hand
na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) )
na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) )
( na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) ) )*
( na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) ) )
sum(
( na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) ) )*
( na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) ) )
) /
length( na.omit( data$IBM.Adj.roc ) ) * 60/59
# length( na.omit( data$IBM.Adj.roc ) ) == 60
# Population covariance
# * (n-1)/n
cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) ) * (60-1)/60
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas Covariances
# Covariance.p = \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)
# Covariance.s = \frac{1}{N-1} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating correlation - sample correlation
cor( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) )
cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) ) /
( sqrt( var( na.omit( data[,3] ) ) ) * sqrt( var( na.omit( data[,4] ) ) ) )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas Correlation
# Correl(i,j) = \frac{Covar.p(i,j)}{Stdev.p(i)*Stdev.p(j)}
# the full formula
# Correl(i,j) = \frac{Covariance.p = \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)}{Stdev.p_{(i)} = \sqrt{ \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)^2 }*Stdev.p_{(j)} = \sqrt{ \frac{1}{N} \sum_{t=1}^{N} (r_{jt}-\bar{r}_j)^2 }}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting return series
plot( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc),
xlab = "IBM",
ylab = "MCD", frame.plot = FALSE,
axes = TRUE,
xgap.axis = 4,
ygap.axis = 4,pch=2)
model <- lm( na.omit(data$IBM.Adj.roc) ~ na.omit(data$MCD.Adj.roc) )
abline(model$coefficients, col="red")
abline( h=0, v=0, lty = 3 )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating portfolio performance
data[,3:4]
ratio.ibm <- 0.5
ratio.mcd <- (1 - ratio.ibm)
data[,5] <- NA
data
for(i in 2: length(data[,3]) ){
data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )
}
colnames(data)[5] <- "portfolio.performance"
data
head( data )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# portfolio performance values
mean( na.omit( data$portfolio.performance ) )
var( na.omit( data$portfolio.performance ) )
sqrt( var( na.omit( data$portfolio.performance ) ) )
# sd( na.omit( data$portfolio.performance ) )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating different portfolios
ratios <- seq( 0, 1, by = 0.1 )
result <- NULL
for(i in 1:length(ratios) ) {
ratio.ibm <- ratios[i]
ratio.mcd <- (1 - ratio.ibm)
data[,5] <- NA
data
for(i in 2: length(data[,3]) ){
data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )
}
colnames(data)[5] <- "portfolio.performance"
m <- mean( na.omit( data$portfolio.performance ) )
s <- sqrt( var( na.omit( data$portfolio.performance ) ) )
frame <- data.frame( m, s )
result <- data.frame( rbind( result, frame ) )
}
result
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting values
plot( result$s, result$m,
type = "b",
xlab = "PF-SD",
ylab = "PF-Return", frame.plot = FALSE,
axes = TRUE,
xgap.axis = 4,
ygap.axis = 4,pch=2)
#model <- lm( result$m ~ result$s )
#abline(model$coefficients, col="red")
plot( result$s, result$m,
type = "b",
xlim = c(0.035,0.06),
ylim = c(0.0117,0.0122),
xlab = "PF-SD",
ylab = "PF-Return", frame.plot = FALSE,
axes = TRUE,
xgap.axis = 4,
ygap.axis = 4,
pch=2)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating more portfolios values including the option for shorting
ratios <- seq( -3, 3, by = 0.1 )
result <- NULL
for(i in 1:length(ratios) ) {
ratio.ibm <- ratios[i]
ratio.mcd <- (1 - ratio.ibm)
data[,5] <- NA
data
for(i in 2: length(data[,3]) ){
data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )
}
colnames(data)[5] <- "portfolio.performance"
m <- mean( na.omit( data$portfolio.performance ) )
s <- sqrt( var( na.omit( data$portfolio.performance ) ) )
frame <- data.frame( m, s, ratio.ibm, ratio.mcd )
result <- data.frame( rbind( result, frame ) )
}
result
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting values
plot( result$s, result$m,
type = "b",
xlab = "PF-SD",
ylab = "PF-Return", frame.plot = FALSE,
axes = TRUE,
xgap.axis = 4,
ygap.axis = 4,pch=2)
which( round( result$s,4) == round(min( result$s ),4) )
sep.line <- result[which( round( result$s,4) == round(min( result$s ),4) ),1]
abline( h = sep.line , lty = 3, col = "red")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# The next step would be to calculate values for n asset portfolio
min( result$s )
dplyr::filter( result, s == min( result$s ) )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex vector notation
# x=\begin{bmatrix} x_1 \\ x_2 \\ ... \\ x_N \end{bmatrix}