Category Archives: Code

Programming code for different programming languages.

Calculating simple portfolio statistics with R

02/26/2023Allgemein, Code, Data Analysis, Programming, R, StatisticsMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# World Bank Data using WDI Package
# path: ~/ownCloud/
# file_name: 
# files_used: 
# files_created

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#

rm(list = ls(all = TRUE))

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# packages

#install.packages("httr")
#install.packages("XML")
#install.packages('WDI')
#install.packages("magrittr")
#install.packages("tidyverse")
#install.packages("quantmod")
#install.packages("PerformanceAnalytics")
#install.packages("tidyquant")

library("httr")
library("XML")
library("magrittr")
library("tidyverse")
library("WDI")
library("quantmod")
library("PerformanceAnalytics")
#library("tidyquant")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#

system("ls")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# using quantmod to collect data

# selecting the date while downloading

MCD <- quantmod::getSymbols("MCD",src = "yahoo", 
                            auto.assign=FALSE, 
                            from = "2007-06-01",
                            to = "2012-07-01" )

head(MCD)
tail(MCD)

# or download the available data and selecting later

MCD <- quantmod::getSymbols("MCD",src = "yahoo", 
                            auto.assign=FALSE )

head(MCD)
tail(MCD)

MCD <- MCD["2007-06-01/2012-07-01"]

head(MCD)
tail(MCD)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# download monthly data 

MCD <- quantmod::getSymbols("MCD",src = "yahoo", 
                            auto.assign=FALSE, 
                            from = "2007-06-01",
                            to = "2012-07-01",
                            periodicity = "monthly" )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# alternative ways

#stock_list <- c( "IBM", "MCD" )
#
#stocks_weekly <- tq_get(stock_list, 
#                        from = start_date, 
#                        to = end_date, 
#                        periodicity = "weekly")
#stocks_weekly 

#install.packages("BatchGetSymbols")
#library(BatchGetSymbols)

#stocks <- BatchGetSymbols( c( "IBM", "MCD" ), 
#                           first.date = "2007-06-01", 
#                           last.date = "2012-07-01", 
#                           freq.data = "monthly", 
#                           how.to.aggregate = 'first')
#stocks

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# for now lets use the example time series

IBM <- read.csv("IBM.csv")
MCD <- read.csv("MCD.csv")

head(IBM)
IBM$Adj.Close
MCD$Adj.Close

data <- data.frame(IBM$Adj.Close,MCD$Adj.Close)
data[,3] <- NA
data[,4] <- NA
data

log( IBM$Adj.Close[2] / IBM$Adj.Close[1] )
# log computes logarithms, by default natural logarithms, 
# log10 computes common (i.e., base 10) logarithms, and 
# log2 computes binary (i.e., base 2) logarithms. 

for(i in 2: length(data[,1]) ){
  data[i,3] <- ( log( data[i,1] / data[i-1,1] ) )
}
data

for(i in 2: length(data[,2]) ){
  data[i,4] <- ( log( data[i,2] / data[i-1,2] ) )
}
data

colnames(data)[3] <- "IBM.Adj.roc"
colnames(data)[4] <- "MCD.Adj.roc"

head( data )
#in comparison: the ROC function from the quantmod package
# computes the standard deviation of the values in x. If na.rm is TRUE 
# then missing values are removed before computation proceeds.
ROC( data$IBM.Adj.Close )
str( as.numeric( data$IBM.Adj.roc[2] ) )
str( as.numeric( ROC( data$IBM.Adj.Close )[2] ) )

as.numeric( ROC( data$IBM.Adj.Close ) ) == as.numeric( data$IBM.Adj.roc )
as.numeric( ROC( data$IBM.Adj.Close )[2] ) == as.numeric( data$IBM.Adj.roc[2] )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# why are they not exactly the same? Starting with the 16th digit the 
# numbers differ.
# the continuous formulation of ROC is : roc <- diff(log(x), n, na.pad = na.pad)
# this gives the small difference compared to what we calculated first
# log( t / (t-1) )

# lets try the same 

diff( log( data$IBM.Adj.Close ) )

ROC( data$IBM.Adj.Close )[2:length(data$IBM.Adj.Close)] == 
  diff( log( data$IBM.Adj.Close ) )

# now it is exactly the same, but for practical reasons such 
# minuscule differences do not really matter. 

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# if we use the first 5 digits the numbers are exactly the same 

print( ROC( data$IBM.Adj.Close )[2], digits = 20 )
print( as.numeric( data$IBM.Adj.roc )[2], digits = 20 )

round( as.numeric( data$IBM.Adj.roc ), 5)
round( as.numeric( ROC( data$IBM.Adj.Close ) ), 5 )

round( as.numeric( data$IBM.Adj.roc ), 5) == 
  round( as.numeric( ROC( data$IBM.Adj.Close ) ), 5 )

#options( digits = 10 )      # Modify global options

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# the data preparation phase is somewhat more intensive if using
# a programming tool like R but in the long run i think
# i pays off by having more accessible data that can be processed in a 
# much faster way especially if it comes to big data applications 

summary( data )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating the mean with a function and out of the data

mean( na.omit( data[,3] ) )
mean( na.omit( data[,4] ) )

sum( na.omit( data[,3] ) ) / length( na.omit( data[,3] ) )
sum( na.omit( data[,4] ) ) / length( na.omit( data[,4] ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas

# Mean = \frac{1}{N} \sum_{i=1}^{N} r_i     # population and sample mean

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#

var( na.omit( data[,3] ) )

data[,3] - mean( na.omit( data[,3] ) )               #  demeaning
( data[,3] - mean( na.omit( data[,3] ) ) )^2         #  power of 2
sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) )  # sum with drop NA
sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) /
  ( length( na.omit( data[,3] ) ) - 1 )
# NA is also omitted for length and we use the sample variance 
# therefore we divide by (n-1)

var( na.omit( data[,4] ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas

# Var.p = \frac{1}{N} \sum_{i=1}^{N} (r_i-\bar{r})^2
# Var.s = \frac{1}{N-1} \sum_{i=1}^{N} (r_i-\bar{r})^2

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#

# stats::  calls the package directly
# this is not really necessary unless there are functions with the
# same name in different packages. Then one might be masked be the other 

# SD IBM

stats::sd( data[,3] , na.rm = TRUE )
sd( data[,3] , na.rm = TRUE )

# the squqre root of the variance gives us the standard deviation 
sqrt( var( na.omit( data[,3] ) ) )
# the long way:
sqrt(
  sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) /
    ( length( na.omit( data[,3] ) ) - 1 )
)

# SD MCD

stats::sd( data[,4] , na.rm = TRUE )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas standard deviation

#Stdev.p = \sqrt{ \frac{1}{N} \sum_{i=1}^{N} (r_i-\bar{r})^2 }
#Stdev.s = \sqrt{ \frac{1}{N-1} \sum_{i=1}^{N} (r_i-\bar{r})^2 }

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculate covariance

cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) )
# this gives the sample covariance

# calculation by hand
na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) )
na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) )

( na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) ) )*
( na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) ) )

sum( 
  ( na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) ) )*
  ( na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) ) ) 
  ) / 
  length( na.omit( data$IBM.Adj.roc ) ) * 60/59
  # length( na.omit( data$IBM.Adj.roc ) ) == 60

# Population covariance
#  * (n-1)/n    

cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) ) * (60-1)/60 

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas Covariances

# Covariance.p = \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)
# Covariance.s = \frac{1}{N-1} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating correlation    -    sample correlation 

cor( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) )

cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) ) /
 ( sqrt( var( na.omit( data[,3] ) ) ) * sqrt( var( na.omit( data[,4] ) ) ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex formulas Correlation

# Correl(i,j) = \frac{Covar.p(i,j)}{Stdev.p(i)*Stdev.p(j)} 

# the full formula 

# Correl(i,j) = \frac{Covariance.p = \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)}{Stdev.p_{(i)} = \sqrt{ \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)^2 }*Stdev.p_{(j)} = \sqrt{ \frac{1}{N} \sum_{t=1}^{N} (r_{jt}-\bar{r}_j)^2 }} 

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting return series 

plot( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc),
      xlab = "IBM",
      ylab = "MCD", frame.plot = FALSE, 
      axes = TRUE, 
      xgap.axis = 4,
      ygap.axis = 4,pch=2)
model <- lm( na.omit(data$IBM.Adj.roc) ~ na.omit(data$MCD.Adj.roc) )
abline(model$coefficients, col="red")
abline( h=0, v=0, lty = 3 )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating portfolio performance 

data[,3:4]

ratio.ibm <- 0.5
ratio.mcd <- (1 - ratio.ibm)

data[,5] <- NA
data

for(i in 2: length(data[,3]) ){
  data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )
}
colnames(data)[5] <- "portfolio.performance"
data
head( data )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# portfolio performance values

mean( na.omit( data$portfolio.performance ) )
var( na.omit( data$portfolio.performance ) )
sqrt( var( na.omit( data$portfolio.performance ) ) )
# sd( na.omit( data$portfolio.performance ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating different portfolios

ratios <- seq( 0, 1, by = 0.1 )

result <- NULL

for(i in 1:length(ratios) ) {

ratio.ibm <- ratios[i]
ratio.mcd <- (1 - ratio.ibm)

data[,5] <- NA
data

for(i in 2: length(data[,3]) ){
  data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )
}
colnames(data)[5] <- "portfolio.performance"
m <- mean( na.omit( data$portfolio.performance ) )
s <- sqrt( var( na.omit( data$portfolio.performance ) ) )

frame <- data.frame( m, s )
result <- data.frame( rbind( result, frame ) )

}

result

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting values


plot( result$s, result$m,
      type = "b",
      xlab = "PF-SD",
      ylab = "PF-Return", frame.plot = FALSE, 
      axes = TRUE, 
      xgap.axis = 4,
      ygap.axis = 4,pch=2)
#model <- lm( result$m ~ result$s )
#abline(model$coefficients, col="red")

plot( result$s, result$m,
      type = "b",
      xlim = c(0.035,0.06),
      ylim = c(0.0117,0.0122),
      xlab = "PF-SD",
      ylab = "PF-Return", frame.plot = FALSE, 
      axes = TRUE, 
      xgap.axis = 4,
      ygap.axis = 4,
      pch=2)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# calculating more portfolios values including the option for shorting

ratios <- seq( -3, 3, by = 0.1 )

result <- NULL

for(i in 1:length(ratios) ) {
  
  ratio.ibm <- ratios[i]
  ratio.mcd <- (1 - ratio.ibm)
  
  data[,5] <- NA
  data
  
  for(i in 2: length(data[,3]) ){
    data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )
  }
  colnames(data)[5] <- "portfolio.performance"
  m <- mean( na.omit( data$portfolio.performance ) )
  s <- sqrt( var( na.omit( data$portfolio.performance ) ) )
  
  frame <- data.frame( m, s, ratio.ibm, ratio.mcd )
  result <- data.frame( rbind( result, frame ) )
  
}

result

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting values


plot( result$s, result$m,
      type = "b",
      xlab = "PF-SD",
      ylab = "PF-Return", frame.plot = FALSE, 
      axes = TRUE, 
      xgap.axis = 4,
      ygap.axis = 4,pch=2)

which( round( result$s,4) == round(min( result$s ),4) )
sep.line <- result[which( round( result$s,4) == round(min( result$s ),4) ),1]
abline( h = sep.line , lty = 3, col = "red")


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# The next step would be to calculate values for n asset portfolio 

min( result$s )
dplyr::filter( result, s == min( result$s ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# latex vector notation

#  x=\begin{bmatrix} x_1 \\ x_2 \\ ... \\ x_N \end{bmatrix}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# World Bank Data using WDI Package

# path: ~/ownCloud/

# file_name:

# files_used:

# files_created

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

rm(list = ls(all = TRUE))

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# packages

#install.packages("httr")

#install.packages("XML")

#install.packages('WDI')

#install.packages("magrittr")

#install.packages("tidyverse")

#install.packages("quantmod")

#install.packages("PerformanceAnalytics")

#install.packages("tidyquant")

library("httr")

library("XML")

library("magrittr")

library("tidyverse")

library("WDI")

library("quantmod")

library("PerformanceAnalytics")

#library("tidyquant")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

system("ls")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# using quantmod to collect data

# selecting the date while downloading

MCD <- quantmod::getSymbols("MCD",src = "yahoo",

auto.assign=FALSE,

from = "2007-06-01",

to = "2012-07-01" )

head(MCD)

tail(MCD)

# or download the available data and selecting later

MCD <- quantmod::getSymbols("MCD",src = "yahoo",

auto.assign=FALSE )

head(MCD)

tail(MCD)

MCD <- MCD["2007-06-01/2012-07-01"]

head(MCD)

tail(MCD)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# download monthly data

MCD <- quantmod::getSymbols("MCD",src = "yahoo",

auto.assign=FALSE,

from = "2007-06-01",

to = "2012-07-01",

periodicity = "monthly" )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# alternative ways

#stock_list <- c( "IBM", "MCD" )

#stocks_weekly <- tq_get(stock_list,

# from = start_date,

# to = end_date,

# periodicity = "weekly")

#stocks_weekly

#install.packages("BatchGetSymbols")

#library(BatchGetSymbols)

#stocks <- BatchGetSymbols( c( "IBM", "MCD" ),

# first.date = "2007-06-01",

# last.date = "2012-07-01",

# freq.data = "monthly",

# how.to.aggregate = 'first')

#stocks

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# for now lets use the example time series

IBM <- read.csv("IBM.csv")

MCD <- read.csv("MCD.csv")

head(IBM)

IBM$Adj.Close

MCD$Adj.Close

data <- data.frame(IBM$Adj.Close,MCD$Adj.Close)

data[,3] <- NA

data[,4] <- NA

data

log( IBM$Adj.Close[2] / IBM$Adj.Close[1] )

# log computes logarithms, by default natural logarithms,

# log10 computes common (i.e., base 10) logarithms, and

# log2 computes binary (i.e., base 2) logarithms.

for(i in 2: length(data[,1]) ){

data[i,3] <- ( log( data[i,1] / data[i-1,1] ) )

}

data

for(i in 2: length(data[,2]) ){

data[i,4] <- ( log( data[i,2] / data[i-1,2] ) )

}

data

colnames(data)[3] <- "IBM.Adj.roc"

colnames(data)[4] <- "MCD.Adj.roc"

head( data )

#in comparison: the ROC function from the quantmod package

# computes the standard deviation of the values in x. If na.rm is TRUE

# then missing values are removed before computation proceeds.

ROC( data$IBM.Adj.Close )

str( as.numeric( data$IBM.Adj.roc[2] ) )

str( as.numeric( ROC( data$IBM.Adj.Close )[2] ) )

as.numeric( ROC( data$IBM.Adj.Close ) ) == as.numeric( data$IBM.Adj.roc )

as.numeric( ROC( data$IBM.Adj.Close )[2] ) == as.numeric( data$IBM.Adj.roc[2] )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# why are they not exactly the same? Starting with the 16th digit the

# numbers differ.

# the continuous formulation of ROC is : roc <- diff(log(x), n, na.pad = na.pad)

# this gives the small difference compared to what we calculated first

# log( t / (t-1) )

# lets try the same

diff( log( data$IBM.Adj.Close ) )

ROC( data$IBM.Adj.Close )[2:length(data$IBM.Adj.Close)] ==

diff( log( data$IBM.Adj.Close ) )

# now it is exactly the same, but for practical reasons such

# minuscule differences do not really matter.

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# if we use the first 5 digits the numbers are exactly the same

print( ROC( data$IBM.Adj.Close )[2], digits = 20 )

print( as.numeric( data$IBM.Adj.roc )[2], digits = 20 )

round( as.numeric( data$IBM.Adj.roc ), 5)

round( as.numeric( ROC( data$IBM.Adj.Close ) ), 5 )

round( as.numeric( data$IBM.Adj.roc ), 5) ==

round( as.numeric( ROC( data$IBM.Adj.Close ) ), 5 )

#options( digits = 10 ) # Modify global options

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# the data preparation phase is somewhat more intensive if using

# a programming tool like R but in the long run i think

# i pays off by having more accessible data that can be processed in a

# much faster way especially if it comes to big data applications

summary( data )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# calculating the mean with a function and out of the data

mean( na.omit( data[,3] ) )

mean( na.omit( data[,4] ) )

sum( na.omit( data[,3] ) ) / length( na.omit( data[,3] ) )

sum( na.omit( data[,4] ) ) / length( na.omit( data[,4] ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# latex formulas

# Mean = \frac{1}{N} \sum_{i=1}^{N} r_i # population and sample mean

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

var( na.omit( data[,3] ) )

data[,3] - mean( na.omit( data[,3] ) ) # demeaning

( data[,3] - mean( na.omit( data[,3] ) ) )^2 # power of 2

sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) # sum with drop NA

sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) /

( length( na.omit( data[,3] ) ) - 1 )

# NA is also omitted for length and we use the sample variance

# therefore we divide by (n-1)

var( na.omit( data[,4] ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# latex formulas

# Var.p = \frac{1}{N} \sum_{i=1}^{N} (r_i-\bar{r})^2

# Var.s = \frac{1}{N-1} \sum_{i=1}^{N} (r_i-\bar{r})^2

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# stats:: calls the package directly

# this is not really necessary unless there are functions with the

# same name in different packages. Then one might be masked be the other

# SD IBM

stats::sd( data[,3] , na.rm = TRUE )

sd( data[,3] , na.rm = TRUE )

# the squqre root of the variance gives us the standard deviation

sqrt( var( na.omit( data[,3] ) ) )

# the long way:

sqrt(

sum( na.omit( ( data[,3] - mean( na.omit( data[,3] ) ) )^2 ) ) /

( length( na.omit( data[,3] ) ) - 1 )

)

# SD MCD

stats::sd( data[,4] , na.rm = TRUE )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# latex formulas standard deviation

#Stdev.p = \sqrt{ \frac{1}{N} \sum_{i=1}^{N} (r_i-\bar{r})^2 }

#Stdev.s = \sqrt{ \frac{1}{N-1} \sum_{i=1}^{N} (r_i-\bar{r})^2 }

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# calculate covariance

cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) )

# this gives the sample covariance

# calculation by hand

na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) )

na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) )

( na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) ) )*

( na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) ) )

sum(

( na.omit( data$IBM.Adj.roc ) - mean( na.omit( data$IBM.Adj.roc ) ) )*

( na.omit( data$MCD.Adj.roc ) - mean( na.omit( data$MCD.Adj.roc ) ) )

) /

length( na.omit( data$IBM.Adj.roc ) ) * 60/59

# length( na.omit( data$IBM.Adj.roc ) ) == 60

# Population covariance

# * (n-1)/n

cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) ) * (60-1)/60

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# latex formulas Covariances

# Covariance.p = \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)

# Covariance.s = \frac{1}{N-1} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# calculating correlation - sample correlation

cor( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) )

cov( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc) ) /

( sqrt( var( na.omit( data[,3] ) ) ) * sqrt( var( na.omit( data[,4] ) ) ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# latex formulas Correlation

# Correl(i,j) = \frac{Covar.p(i,j)}{Stdev.p(i)*Stdev.p(j)}

# the full formula

# Correl(i,j) = \frac{Covariance.p = \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)(r_{jt}-\bar{r}_j)}{Stdev.p_{(i)} = \sqrt{ \frac{1}{N} \sum_{t=1}^{N} (r_{it}-\bar{r}_i)^2 }*Stdev.p_{(j)} = \sqrt{ \frac{1}{N} \sum_{t=1}^{N} (r_{jt}-\bar{r}_j)^2 }}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# ploting return series

plot( na.omit(data$IBM.Adj.roc), na.omit(data$MCD.Adj.roc),

xlab = "IBM",

ylab = "MCD", frame.plot = FALSE,

axes = TRUE,

xgap.axis = 4,

ygap.axis = 4,pch=2)

model <- lm( na.omit(data$IBM.Adj.roc) ~ na.omit(data$MCD.Adj.roc) )

abline(model$coefficients, col="red")

abline( h=0, v=0, lty = 3 )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# calculating portfolio performance

data[,3:4]

ratio.ibm <- 0.5

ratio.mcd <- (1 - ratio.ibm)

data[,5] <- NA

data

for(i in 2: length(data[,3]) ){

data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )

}

colnames(data)[5] <- "portfolio.performance"

data

head( data )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# portfolio performance values

mean( na.omit( data$portfolio.performance ) )

var( na.omit( data$portfolio.performance ) )

sqrt( var( na.omit( data$portfolio.performance ) ) )

# sd( na.omit( data$portfolio.performance ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# calculating different portfolios

ratios <- seq( 0, 1, by = 0.1 )

result <- NULL

for(i in 1:length(ratios) ) {

ratio.ibm <- ratios[i]

ratio.mcd <- (1 - ratio.ibm)

data[,5] <- NA

data

for(i in 2: length(data[,3]) ){

data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )

}

colnames(data)[5] <- "portfolio.performance"

m <- mean( na.omit( data$portfolio.performance ) )

s <- sqrt( var( na.omit( data$portfolio.performance ) ) )

frame <- data.frame( m, s )

result <- data.frame( rbind( result, frame ) )

}

result

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# ploting values

plot( result$s, result$m,

type = "b",

xlab = "PF-SD",

ylab = "PF-Return", frame.plot = FALSE,

axes = TRUE,

xgap.axis = 4,

ygap.axis = 4,pch=2)

#model <- lm( result$m ~ result$s )

#abline(model$coefficients, col="red")

plot( result$s, result$m,

type = "b",

xlim = c(0.035,0.06),

ylim = c(0.0117,0.0122),

xlab = "PF-SD",

ylab = "PF-Return", frame.plot = FALSE,

axes = TRUE,

xgap.axis = 4,

ygap.axis = 4,

pch=2)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# calculating more portfolios values including the option for shorting

ratios <- seq( -3, 3, by = 0.1 )

result <- NULL

for(i in 1:length(ratios) ) {

ratio.ibm <- ratios[i]

ratio.mcd <- (1 - ratio.ibm)

data[,5] <- NA

data

for(i in 2: length(data[,3]) ){

data[i,5] <- ( data[i,3] * ratio.ibm ) + ( data[i,4] * ratio.mcd )

}

colnames(data)[5] <- "portfolio.performance"

m <- mean( na.omit( data$portfolio.performance ) )

s <- sqrt( var( na.omit( data$portfolio.performance ) ) )

frame <- data.frame( m, s, ratio.ibm, ratio.mcd )

result <- data.frame( rbind( result, frame ) )

}

result

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# ploting values

plot( result$s, result$m,

type = "b",

xlab = "PF-SD",

ylab = "PF-Return", frame.plot = FALSE,

axes = TRUE,

xgap.axis = 4,

ygap.axis = 4,pch=2)

which( round( result$s,4) == round(min( result$s ),4) )

sep.line <- result[which( round( result$s,4) == round(min( result$s ),4) ),1]

abline( h = sep.line , lty = 3, col = "red")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# The next step would be to calculate values for n asset portfolio

min( result$s )

dplyr::filter( result, s == min( result$s ) )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# latex vector notation

# x=\begin{bmatrix} x_1 \\ x_2 \\ ... \\ x_N \end{bmatrix}

Data examples using World Bank Data

03/03/2021Allgemein, Code, Data Analysis, Programming, R, StatisticsMartin Stoppacher

Data examples using World Bank Data in combination with the ggplot package in R.

Tidyverse methods and functions were used to generate a combined data frame (tibble) for all countries and indicators.

countries.wb.data.map %&gt;% filter( country == c("Afghanistan","Germany","Mexico") ) %&gt;% ggplot( aes( x = `year` , y = `Population, total`, #size = `Population, total`, col = `country` #chape = `Region` ) ) + geom_point( ) + labs( title = "Population" )

1	countries.wb.data.map %>% filter( country == c("Afghanistan","Germany","Mexico") ) %>% ggplot( aes( x = `year` , y = `Population, total`, #size = `Population, total`, col = `country` #chape = `Region` ) ) + geom_point( ) + labs( title = "Population" )

countries.wb.data.map %>%
  filter( year > 2017 ) %>%
  filter( year < 2019 )  %>%
  #filter( country == c("Austria","Germany","France") ) %>%
  ggplot( aes( x = log( `GDP (constant 2010 US$)` ) , 
               y = `Fertility rate, total (births per woman)`,
               size = `Population, total`,
               shape = `IncomeGroup`,
               col = `Region`
               #shape = as.character(`year`)
  ) ) +
  geom_point( ) +
  labs( title = "log( `GDP (constant 2010 US$)` ) vs. Fertility rate, total (births per woman)" )

countries.wb.data.map %>%

filter( year > 2017 ) %>%

filter( year < 2019 ) %>%

#filter( country == c("Austria","Germany","France") ) %>%

ggplot( aes( x = log( `GDP (constant 2010 US$)` ) ,

y = `Fertility rate, total (births per woman)`,

size = `Population, total`,

shape = `IncomeGroup`,

col = `Region`

#shape = as.character(`year`)

) ) +

geom_point( ) +

labs( title = "log( `GDP (constant 2010 US$)` ) vs. Fertility rate, total (births per woman)" )

countries.wb.data.map %&gt;%
  filter( year &gt; 2017 ) %&gt;%
  filter( year &lt; 2019 )  %&gt;%
  #filter( country == c("Austria","Germany","France") ) %&gt;%
  ggplot( aes( x = `GDP per capita (constant 2010 US$)`, 
               y = `Fertility rate, total (births per woman)`,
               #size = `Population, total`,
               shape = `IncomeGroup`,
               col = `Region`
               #shape = as.character(`year`)
  ) ) +
  geom_point( ) +
  labs( title = "GDP per capita (constant 2010 US$) vs Fertility rate, total (births per woman)" ) +
  geom_label_repel( aes( label = ifelse( `IncomeGroup` == "Low income", 
                                         country,
                                         "" ) ), 
                    hjust=0, 
                    #vjust=0.5,
                    #nudge_y       = 7, 
                    nudge_x       = 150000, 
                    #force         = 100,
                    box.padding   = 0.35, 
                    point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y",    # limits movement to x axis
                    #color = "white",
                    arrow = arrow(
                      length = unit(0.03, "npc"), 
                      type = "closed", 
                      ends = "first"
                    ) ) +
  #xlim( - 100, 60000000000000)  +
  #scale_x_continuous( expand = expand_scale(mult = c( 0.4, 0.4 ))) +
  #scale_y_continuous( expand = expand_scale(mult = c( 0.1, 0.1))) + 
  theme_bw()

countries.wb.data.map %>%

filter( year > 2017 ) %>%

filter( year < 2019 ) %>%

#filter( country == c("Austria","Germany","France") ) %>%

ggplot( aes( x = `GDP per capita (constant 2010 US$)`,

y = `Fertility rate, total (births per woman)`,

#size = `Population, total`,

shape = `IncomeGroup`,

col = `Region`

#shape = as.character(`year`)

) ) +

geom_point( ) +

labs( title = "GDP per capita (constant 2010 US$) vs Fertility rate, total (births per woman)" ) +

geom_label_repel( aes( label = ifelse( `IncomeGroup` == "Low income",

country,

"" ) ),

hjust=0,

#vjust=0.5,

#nudge_y = 7,

nudge_x = 150000,

#force = 100,

box.padding = 0.35,

point.padding = 0.2,

segment.color = "lightgray",

direction = "y", # limits movement to x axis

#color = "white",

arrow = arrow(

length = unit(0.03, "npc"),

type = "closed",

ends = "first"

) ) +

#xlim( - 100, 60000000000000) +

#scale_x_continuous( expand = expand_scale(mult = c( 0.4, 0.4 ))) +

#scale_y_continuous( expand = expand_scale(mult = c( 0.1, 0.1))) +

theme_bw()

countries.wb.data.map %>%
  filter( year > 2017 ) %>%
  filter( year < 2019 )  %>%
  #filter( country == c("Austria","Germany","France") ) %>%
  ggplot( aes( x = `GDP per capita (constant 2010 US$)`, 
               y = `Fertility rate, total (births per woman)`,
               #size = `Population, total`,
               #shape = `IncomeGroup`,
               col = `IncomeGroup`
               #shape = as.character(`year`)
  ) ) +
  geom_point( ) + 
  geom_line() +
  geom_smooth() +
  theme_bw() +
  labs( title = "log( GDP per capita (constant 2010 US$) ) - Fertility rate, total (births per woman) " ) +
  xlim( 0, 50000 )

countries.wb.data.map %>%

filter( year > 2017 ) %>%

filter( year < 2019 ) %>%

#filter( country == c("Austria","Germany","France") ) %>%

ggplot( aes( x = `GDP per capita (constant 2010 US$)`,

y = `Fertility rate, total (births per woman)`,

#size = `Population, total`,

#shape = `IncomeGroup`,

col = `IncomeGroup`

#shape = as.character(`year`)

) ) +

geom_point( ) +

geom_line() +

geom_smooth() +

theme_bw() +

labs( title = "log( GDP per capita (constant 2010 US$) ) - Fertility rate, total (births per woman) " ) +

xlim( 0, 50000 )

countries.wb.data.map %>%
  filter( year > 2017 ) %>%
  filter( year < 2019 )  %>%
  #filter( country == c("Austria","Germany","France") ) %>%
  ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ), 
               y = `Fertility rate, total (births per woman)`,
               #size = `Population, total`,
               #shape = `IncomeGroup`,
               col = `IncomeGroup`
               #shape = as.character(`year`)
  ) ) +
  geom_point( ) + 
  geom_line() +
  geom_smooth() +
  theme_bw() +
  labs( title = "log( GDP per capita (constant 2010 US$) ) - Fertility rate, total (births per woman) " )

countries.wb.data.map %>%

filter( year > 2017 ) %>%

filter( year < 2019 ) %>%

#filter( country == c("Austria","Germany","France") ) %>%

ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ),

y = `Fertility rate, total (births per woman)`,

#size = `Population, total`,

#shape = `IncomeGroup`,

col = `IncomeGroup`

#shape = as.character(`year`)

) ) +

geom_point( ) +

geom_line() +

geom_smooth() +

theme_bw() +

labs( title = "log( GDP per capita (constant 2010 US$) ) - Fertility rate, total (births per woman) " )

countries.wb.data.map %>%
  filter( year > 2017 ) %>%
  filter( year < 2019 )  %>%
  #filter( country == c("Austria","Germany","France") ) %>%
  ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ), 
               y = `Fertility rate, total (births per woman)`
               #size = `Population, total`,
               #shape = `IncomeGroup`,
               #col = `Region`
               #shape = as.character(`year`)
  ) ) + 
  geom_line() +
  geom_smooth() +
  labs( title = "GDP per capita (constant 2010 US$) vs Fertility rate, total (births per woman) 2018" ) +
  geom_point( aes( x = log( `GDP per capita (constant 2010 US$)` ), 
                   y = `Fertility rate, total (births per woman)`,
                   #size = `IncomeGroup`,
                   shape = `IncomeGroup`,
                   col = `Region`
                   #shape = as.character(`year`)
  )  ) +
  geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` > 5, 
                                         `country`,
                                         "" ) ), 
                    hjust=0, 
                    #vjust=4,
                    #nudge_y       = 7, 
                    nudge_x       = 11, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    #point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  ) +
  geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` < 1.25, 
                                         `country`,
                                         "" ) ), 
                    #hjust=5, 
                    #vjust=2,
                    #nudge_y       = 0, 
                    nudge_x       = -11, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  )

countries.wb.data.map %>%

filter( year > 2017 ) %>%

filter( year < 2019 ) %>%

#filter( country == c("Austria","Germany","France") ) %>%

ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ),

y = `Fertility rate, total (births per woman)`

#size = `Population, total`,

#shape = `IncomeGroup`,

#col = `Region`

#shape = as.character(`year`)

) ) +

geom_line() +

geom_smooth() +

labs( title = "GDP per capita (constant 2010 US$) vs Fertility rate, total (births per woman) 2018" ) +

geom_point( aes( x = log( `GDP per capita (constant 2010 US$)` ),

y = `Fertility rate, total (births per woman)`,

#size = `IncomeGroup`,

shape = `IncomeGroup`,

col = `Region`

#shape = as.character(`year`)

) ) +

geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` > 5,

`country`,

"" ) ),

hjust=0,

#vjust=4,

#nudge_y = 7,

nudge_x = 11,

#force = 100,

#box.padding = 0.9,

#point.padding = 0.2,

segment.color = "lightgray",

direction = "y" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

) +

geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` < 1.25,

`country`,

"" ) ),

#hjust=5,

#vjust=2,

#nudge_y = 0,

nudge_x = -11,

#force = 100,

#box.padding = 0.9,

point.padding = 0.2,

segment.color = "lightgray",

direction = "y" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

)

countries.wb.data.map %>%
  filter( year > 1980 ) %>%
  filter( year < 1982 )  %>%
  #filter( country == c("Austria","Germany","France") ) %>%
  ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ), 
               y = `Fertility rate, total (births per woman)`
               #size = `Population, total`,
               #shape = `IncomeGroup`,
               #col = `Region`
               #shape = as.character(`year`)
  ) ) + 
  geom_line() +
  geom_smooth() +
  labs( title = "GDP per capita (constant 2010 US$) vs Fertility rate, total (births per woman) 1981" ) +
  geom_point( aes( x = log( `GDP per capita (constant 2010 US$)` ), 
                   y = `Fertility rate, total (births per woman)`,
                   #size = `IncomeGroup`,
                   shape = `IncomeGroup`,
                   col = `Region`
                   #shape = as.character(`year`)
  )  ) +
  geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` > 7.3, 
                                         `country`,
                                         "" ) ), 
                    hjust=0, 
                    #vjust=4,
                    #nudge_y       = 7, 
                    nudge_x       = 11, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    #point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  ) +
  geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` < 1.8, 
                                         `country`,
                                         "" ) ), 
                    #hjust=5, 
                    #vjust=2,
                    #nudge_y       = 0, 
                    nudge_x       = -11, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  )

countries.wb.data.map %>%

filter( year > 1980 ) %>%

filter( year < 1982 ) %>%

#filter( country == c("Austria","Germany","France") ) %>%

ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ),

y = `Fertility rate, total (births per woman)`

#size = `Population, total`,

#shape = `IncomeGroup`,

#col = `Region`

#shape = as.character(`year`)

) ) +

geom_line() +

geom_smooth() +

labs( title = "GDP per capita (constant 2010 US$) vs Fertility rate, total (births per woman) 1981" ) +

geom_point( aes( x = log( `GDP per capita (constant 2010 US$)` ),

y = `Fertility rate, total (births per woman)`,

#size = `IncomeGroup`,

shape = `IncomeGroup`,

col = `Region`

#shape = as.character(`year`)

) ) +

geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` > 7.3,

`country`,

"" ) ),

hjust=0,

#vjust=4,

#nudge_y = 7,

nudge_x = 11,

#force = 100,

#box.padding = 0.9,

#point.padding = 0.2,

segment.color = "lightgray",

direction = "y" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

) +

geom_label_repel( aes( label = ifelse( `Fertility rate, total (births per woman)` < 1.8,

`country`,

"" ) ),

#hjust=5,

#vjust=2,

#nudge_y = 0,

nudge_x = -11,

#force = 100,

#box.padding = 0.9,

point.padding = 0.2,

segment.color = "lightgray",

direction = "y" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

)

countries.wb.data.map %>%
  filter( country == c("Austria","Germany", "France" ) ) %>%
  filter( year > 1960 ) %>%
  filter( year < 2019 ) %>%
  ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ), 
               y = `Fertility rate, total (births per woman)`,
               #size = `Population, total`,
               #shape = `IncomeGroup`,
               col = `country`
               #shape = as.character(`year`)
  ) ) +
  geom_line( ) +
  geom_smooth() +
  geom_point( ) +
  geom_label_repel( aes( label = ifelse( `country` == "France", 
                                         `year`,
                                         "" ) ), 
                    #hjust=3, 
                    #vjust=-4,
                    nudge_y       = 1, 
                    #nudge_x       = 9, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    #point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  ) +
  geom_label_repel( aes( label = ifelse( `country` == "Austria", 
                                         `year`,
                                         "" ) ), 
                    #hjust=3, 
                    #vjust=-3,
                    #nudge_y       = 1, 
                    nudge_x       = -3, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    #point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "x"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  ) +
  geom_label_repel( aes( label = ifelse( `country` == "Germany", 
                                         `year`,
                                         "" ) ), 
                    #hjust=3, 
                    #vjust=2,
                    nudge_y       = -4, 
                    #nudge_x       = 9, 
                    #force         = 100,
                    #box.padding   = 0.9, 
                    #point.padding = 0.2,
                    segment.color = "lightgray",
                    direction     = "y"    # limits movement to x axis
                    #color = "white",
                    #arrow = arrow(
                    #  length = unit(0.03, "npc"), 
                    #  type = "closed", 
                    #  ends = "first" )
  )  +
  theme_bw() +
  labs( title = "log( `GDP per capita (constant 2010 US$)` ) - Fertility rate, total (births per woman) 1961 - 2018" )

countries.wb.data.map %>%

filter( country == c("Austria","Germany", "France" ) ) %>%

filter( year > 1960 ) %>%

filter( year < 2019 ) %>%

ggplot( aes( x = log( `GDP per capita (constant 2010 US$)` ),

y = `Fertility rate, total (births per woman)`,

#size = `Population, total`,

#shape = `IncomeGroup`,

col = `country`

#shape = as.character(`year`)

) ) +

geom_line( ) +

geom_smooth() +

geom_point( ) +

geom_label_repel( aes( label = ifelse( `country` == "France",

`year`,

"" ) ),

#hjust=3,

#vjust=-4,

nudge_y = 1,

#nudge_x = 9,

#force = 100,

#box.padding = 0.9,

#point.padding = 0.2,

segment.color = "lightgray",

direction = "y" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

) +

geom_label_repel( aes( label = ifelse( `country` == "Austria",

`year`,

"" ) ),

#hjust=3,

#vjust=-3,

#nudge_y = 1,

nudge_x = -3,

#force = 100,

#box.padding = 0.9,

#point.padding = 0.2,

segment.color = "lightgray",

direction = "x" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

) +

geom_label_repel( aes( label = ifelse( `country` == "Germany",

`year`,

"" ) ),

#hjust=3,

#vjust=2,

nudge_y = -4,

#nudge_x = 9,

#force = 100,

#box.padding = 0.9,

#point.padding = 0.2,

segment.color = "lightgray",

direction = "y" # limits movement to x axis

#color = "white",

#arrow = arrow(

# length = unit(0.03, "npc"),

# type = "closed",

# ends = "first" )

) +

theme_bw() +

labs( title = "log( `GDP per capita (constant 2010 US$)` ) - Fertility rate, total (births per woman) 1961 - 2018" )

Basic R commands – 1.2 – permutations and matrix functions

05/05/2020Allgemein, Code, Programming, RMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Basic Commands and Statistics with R - 1.2 - permutations
# path: ~/ownCloud/STA_Statistics/Basic_Statistics/
# file_name: statistic_basics3.R
# files_used:

#install.packages( 'gtools' )
library( gtools )
#install.packages( "combinat" )
library( combinat )

#install.packages( "magrittr" )
library( "magrittr" )

# - - - - - - - - - - - - - - - - 
#

x <- 6

factorial( x )
# [1] 720

for( i in 1:x ) print( factorial( i ) )
# [1] 1
# [1] 2
# [1] 6
# [1] 24
# [1] 120
# [1] 720

# - - - - - - - - - - - - - - - - 
#

x <- c( "a", "b", "c" )

combinat::permn( x )

# [[1]]
# [1] "a" "b" "c"
# [[2]]
# [1] "a" "c" "b"
# [[3]]
# [1] "c" "a" "b"
# [[4]]
# [1] "c" "b" "a"
# [[5]]
# [1] "b" "c" "a"
# [[6]]
# [1] "b" "a" "c"

# - - - - - - - - - - - - - - - - 
# string conversion using combinat::

x <- "ABCDEFG"
x <- strsplit( x, "" )[[1]]
y <- combinat::permn( x )
y

x <- LETTERS[1:5]
y <- combinat::permn( x )

paste( y[[1]], collapse = "" )
for( i in 1:length( y ) ) print( paste( y[[i]], collapse = "" ) )

# - - - - - - - - - - - - - - - - 
# using gtools:: for permutations

gtools::permutations( 5, 5, x )
# [,1] [,2] [,3] [,4] [,5]
# [1,] "A"  "C"  "I"  "M"  "P" 
# [2,] "A"  "C"  "I"  "P"  "M" 
# [3,] "A"  "C"  "M"  "I"  "P" 
# [4,] "A"  "C"  "M"  "P"  "I" 
# [5,] "A"  "C"  "P"  "I"  "M"
# ...

gtools::permutations( 5, 5, x, repeats.allowed = TRUE )
# [,1] [,2] [,3] [,4] [,5]
# [1,] "A"  "A"  "A"  "A"  "A" 
# [2,] "A"  "A"  "A"  "A"  "C" 
# [3,] "A"  "A"  "A"  "A"  "I" 
# [4,] "A"  "A"  "A"  "A"  "M" 
# [5,] "A"  "A"  "A"  "A"  "P" 
# [6,] "A"  "A"  "A"  "C"  "A" 
# [7,] "A"  "A"  "A"  "C"  "C" 
# [8,] "A"  "A"  "A"  "C"  "I" 
# [9,] "A"  "A"  "A"  "C"  "M" 
# [10,] "A"  "A"  "A"  "C"  "P" 
# ...

# - - - - - - - - - - - - - - - - 
# using gtools:: for combinations

gtools::combinations( 4, 2, x ) 

gtools::combinations( 5, 2, x )

gtools::combinations( 5, 3, x )

gtools::combinations( 4, 2, x, repeats.allowed = TRUE )

gtools::combinations( 2, 5, x, repeats.allowed = TRUE )

# Martin Stoppacher                                                             #
# office@martinstoppacher.com                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

100

101

102

103

104

105

106

107

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Basic Commands and Statistics with R - 1.2 - permutations

# path: ~/ownCloud/STA_Statistics/Basic_Statistics/

# file_name: statistic_basics3.R

# files_used:

#install.packages( 'gtools' )

library( gtools )

#install.packages( "combinat" )

library( combinat )

#install.packages( "magrittr" )

library( "magrittr" )

# - - - - - - - - - - - - - - - -

x <- 6

factorial( x )

# [1] 720

for( i in 1:x ) print( factorial( i ) )

# [1] 1

# [1] 2

# [1] 6

# [1] 24

# [1] 120

# [1] 720

# - - - - - - - - - - - - - - - -

x <- c( "a", "b", "c" )

combinat::permn( x )

# [[1]]

# [1] "a" "b" "c"

# [[2]]

# [1] "a" "c" "b"

# [[3]]

# [1] "c" "a" "b"

# [[4]]

# [1] "c" "b" "a"

# [[5]]

# [1] "b" "c" "a"

# [[6]]

# [1] "b" "a" "c"

# - - - - - - - - - - - - - - - -

# string conversion using combinat::

x <- "ABCDEFG"

x <- strsplit( x, "" )[[1]]

y <- combinat::permn( x )

x <- LETTERS[1:5]

y <- combinat::permn( x )

paste( y[[1]], collapse = "" )

for( i in 1:length( y ) ) print( paste( y[[i]], collapse = "" ) )

# - - - - - - - - - - - - - - - -

# using gtools:: for permutations

gtools::permutations( 5, 5, x )

# [,1] [,2] [,3] [,4] [,5]

# [1,] "A" "C" "I" "M" "P"

# [2,] "A" "C" "I" "P" "M"

# [3,] "A" "C" "M" "I" "P"

# [4,] "A" "C" "M" "P" "I"

# [5,] "A" "C" "P" "I" "M"

# ...

gtools::permutations( 5, 5, x, repeats.allowed = TRUE )

# [,1] [,2] [,3] [,4] [,5]

# [1,] "A" "A" "A" "A" "A"

# [2,] "A" "A" "A" "A" "C"

# [3,] "A" "A" "A" "A" "I"

# [4,] "A" "A" "A" "A" "M"

# [5,] "A" "A" "A" "A" "P"

# [6,] "A" "A" "A" "C" "A"

# [7,] "A" "A" "A" "C" "C"

# [8,] "A" "A" "A" "C" "I"

# [9,] "A" "A" "A" "C" "M"

# [10,] "A" "A" "A" "C" "P"

# ...

# - - - - - - - - - - - - - - - -

# using gtools:: for combinations

gtools::combinations( 4, 2, x )

gtools::combinations( 5, 2, x )

gtools::combinations( 5, 3, x )

gtools::combinations( 4, 2, x, repeats.allowed = TRUE )

gtools::combinations( 2, 5, x, repeats.allowed = TRUE )

# Martin Stoppacher #

# office@martinstoppacher.com #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

#################################################################################

# matrix algebra

m1 <- matrix( c( 1, 2, 3, 4, 5, 6, 7, 8, 9 ) , 3, 3 )
m1

m2 <- matrix( rep( 2, 9 ), 3, 3 )
m2

#       [,1] [,2] [,3]
# [1,]    1    4    7
# [2,]    2    5    8
# [3,]    3    6    9

m1 * m1  # element multipliction

#       [,1] [,2] [,3]
# [1,]    1   16   49
# [2,]    4   25   64
# [3,]    9   36   81

m1 * m2

#       [,1] [,2] [,3]
# [1,]    2    8   14
# [2,]    4   10   16
# [3,]    6   12   18


# Matrix Multiplication
m1 %*% m1

#       [,1] [,2] [,3]
# [1,]   30   66  102
# [2,]   36   81  126
# [3,]   42   96  150

# Transpose
t( m1 )

#       [,1] [,2] [,3]
# [1,]    1    2    3
# [2,]    4    5    6
# [3,]    7    8    9

# Outer Product

m1[ 1 , ]
m1[ , 1 ] 
m1[ 1 , ] %o%  m1[ , 1 ] 

m1 %o% m1

# , , 1, 1
# 
#       [,1] [,2] [,3]
# [1,]    1    4    7
# [2,]    2    5    8
# [3,]    3    6    9
# 
# , , 2, 1
# 
#       [,1] [,2] [,3]
# [1,]    2    8   14
# [2,]    4   10   16
# [3,]    6   12   18
# 
# , , 3, 1
# 
#       [,1] [,2] [,3]
# [1,]    3   12   21
# [2,]    6   15   24
# [3,]    9   18   27
# 
# , , 1, 2
# 
#       [,1] [,2] [,3]
# [1,]    4   16   28
# [2,]    8   20   32
# [3,]   12   24   36
# 
# , , 2, 2
# 
#       [,1] [,2] [,3]
# [1,]    5   20   35
# [2,]   10   25   40
# [3,]   15   30   45
# 
# , , 3, 2
# 
#       [,1] [,2] [,3]
# [1,]    6   24   42
# [2,]   12   30   48
# [3,]   18   36   54
# 
# , , 1, 3
# 
#       [,1] [,2] [,3]
# [1,]    7   28   49
# [2,]   14   35   56
# [3,]   21   42   63
# 
# , , 2, 3
# 
#       [,1] [,2] [,3]
# [1,]    8   32   56
# [2,]   16   40   64
# [3,]   24   48   72
# 
# , , 3, 3
# 
#       [,1] [,2] [,3]
# [1,]    9   36   63
# [2,]   18   45   72
# [3,]   27   54   81

# kroenecker product

m1 %x% m1

#       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
# [1,]    1    4    7    4   16   28    7   28   49
# [2,]    2    5    8    8   20   32   14   35   56
# [3,]    3    6    9   12   24   36   21   42   63
# [4,]    2    8   14    5   20   35    8   32   56
# [5,]    4   10   16   10   25   40   16   40   64
# [6,]    6   12   18   15   30   45   24   48   72
# [7,]    3   12   21    6   24   42    9   36   63
# [8,]    6   15   24   12   30   48   18   45   72
# [9,]    9   18   27   18   36   54   27   54   81

m1 %x% m2

#       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
# [1,]    2    2    2    8    8    8   14   14   14
# [2,]    2    2    2    8    8    8   14   14   14
# [3,]    2    2    2    8    8    8   14   14   14
# [4,]    4    4    4   10   10   10   16   16   16
# [5,]    4    4    4   10   10   10   16   16   16
# [6,]    4    4    4   10   10   10   16   16   16
# [7,]    6    6    6   12   12   12   18   18   18
# [8,]    6    6    6   12   12   12   18   18   18
# [9,]    6    6    6   12   12   12   18   18   18

# cross Product 

m1[ , 1 ]
m1[ , 2 ] 

crossprod( m1, m2[ , 1 ]  )

#       [,1]
# [1,]   32
# [2,]   77
# [3,]  122

crossprod( m1 )

#      [,1] [,2] [,3]
# [1,]   14   32   50
# [2,]   32   77  122
# [3,]   50  122  194

crossprod( m1, m2 )

#       [,1] [,2] [,3]
# [1,]   12   12   12
# [2,]   30   30   30
# [3,]   48   48   48

# Diagonal Matrix with elements of x in the diagonal

diag( nrow = 3 )

# [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    1    0
# [3,]    0    0    1

diag( 3,  nrow = 3 )

# [,1] [,2] [,3]
# [1,]    3    0    0
# [2,]    0    3    0
# [3,]    0    0    3

# returns a vector of the principal diagonal elements
m1
diag( m1 )

# [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    2    0
# [3,]    0    0    3

# k * k identity matrix for m1[ , 1 ]
diag( m1[ , 1 ] )

# [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    2    0
# [3,]    0    0   

# simple combinations

cbind( m1, m2 )
rbind( m1, m2 )

# means and sums

rowMeans( m1 )
rowSums( m1 )

colMeans( m1 )
colSums( m1 )

# eigenvalues, eigenvectors

eigen( m1 )
e <- eigen( m1 )
e$values
e$vectors

# eigen() decomposition
# $values
# [1]  1.611684e+01 -1.116844e+00 -5.700691e-16
#
# $vectors
# [,1]       [,2]       [,3]
# [1,] -0.4645473 -0.8829060  0.4082483
# [2,] -0.5707955 -0.2395204 -0.8164966
# [3,] -0.6770438  0.4038651  0.4082483

# single value decomposition

svd( m1 )

# $d    singular values of m1
# [1] 1.684810e+01 1.068370e+00 5.543107e-16
#
# $u    left singular vectors of m1
# [,1]        [,2]       [,3]
# [1,] -0.4796712  0.77669099  0.4082483
# [2,] -0.5723678  0.07568647 -0.8164966
# [3,] -0.6650644 -0.62531805  0.4082483
#
# $v    right singular vectors of m1
# [,1]       [,2]       [,3]
# [1,] -0.2148372 -0.8872307  0.4082483
# [2,] -0.5205874 -0.2496440 -0.8164966
# [3,] -0.8263375  0.3879428  0.4082483


# QR decomposition 

qr( m1 )

# $qr
# [,1]      [,2]          [,3]
# [1,] -3.7416574 -8.552360 -1.336306e+01
# [2,]  0.5345225  1.963961  3.927922e+00
# [3,]  0.8017837  0.988693  1.776357e-15
#
# $rank
# [1] 2
#
# $qraux  
# [1] 1.267261e+00 1.149954e+00 1.776357e-15
#
# $pivot
# [1] 1 2 3
#
# attr(,"class")
# [1] "qr"


# inverse 

solve( m1 )

# Error in solve.default(m1) : 
#   Lapack routine dgesv: system is exactly singular: U[3,3] = 0

m3 <- matrix( c( 2, 6, 1, 4 ) , 2, 2 )
m3

# [,1] [,2]
# [1,]    2    1
# [2,]    6    4

sm3 <- solve( m3 )

# [,1] [,2]
# [1,]    2 -0.5
# [2,]   -3  1.0

m3 %*% sm3

# [,1] [,2]
# [1,]    1    0
# [2,]    0    1

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

# matrix algebra

m1 <- matrix( c( 1, 2, 3, 4, 5, 6, 7, 8, 9 ) , 3, 3 )

m2 <- matrix( rep( 2, 9 ), 3, 3 )

# [,1] [,2] [,3]

# [1,] 1 4 7

# [2,] 2 5 8

# [3,] 3 6 9

m1 * m1 # element multipliction

# [,1] [,2] [,3]

# [1,] 1 16 49

# [2,] 4 25 64

# [3,] 9 36 81

m1 * m2

# [,1] [,2] [,3]

# [1,] 2 8 14

# [2,] 4 10 16

# [3,] 6 12 18

# Matrix Multiplication

m1 %*% m1

# [,1] [,2] [,3]

# [1,] 30 66 102

# [2,] 36 81 126

# [3,] 42 96 150

# Transpose

t( m1 )

# [,1] [,2] [,3]

# [1,] 1 2 3

# [2,] 4 5 6

# [3,] 7 8 9

# Outer Product

m1[ 1 , ]

m1[ , 1 ]

m1[ 1 , ] %o% m1[ , 1 ]

m1 %o% m1

# , , 1, 1

# [,1] [,2] [,3]

# [1,] 1 4 7

# [2,] 2 5 8

# [3,] 3 6 9

# , , 2, 1

# [,1] [,2] [,3]

# [1,] 2 8 14

# [2,] 4 10 16

# [3,] 6 12 18

# , , 3, 1

# [,1] [,2] [,3]

# [1,] 3 12 21

# [2,] 6 15 24

# [3,] 9 18 27

# , , 1, 2

# [,1] [,2] [,3]

# [1,] 4 16 28

# [2,] 8 20 32

# [3,] 12 24 36

# , , 2, 2

# [,1] [,2] [,3]

# [1,] 5 20 35

# [2,] 10 25 40

# [3,] 15 30 45

# , , 3, 2

# [,1] [,2] [,3]

# [1,] 6 24 42

# [2,] 12 30 48

# [3,] 18 36 54

# , , 1, 3

# [,1] [,2] [,3]

# [1,] 7 28 49

# [2,] 14 35 56

# [3,] 21 42 63

# , , 2, 3

# [,1] [,2] [,3]

# [1,] 8 32 56

# [2,] 16 40 64

# [3,] 24 48 72

# , , 3, 3

# [,1] [,2] [,3]

# [1,] 9 36 63

# [2,] 18 45 72

# [3,] 27 54 81

# kroenecker product

m1 %x% m1

# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]

# [1,] 1 4 7 4 16 28 7 28 49

# [2,] 2 5 8 8 20 32 14 35 56

# [3,] 3 6 9 12 24 36 21 42 63

# [4,] 2 8 14 5 20 35 8 32 56

# [5,] 4 10 16 10 25 40 16 40 64

# [6,] 6 12 18 15 30 45 24 48 72

# [7,] 3 12 21 6 24 42 9 36 63

# [8,] 6 15 24 12 30 48 18 45 72

# [9,] 9 18 27 18 36 54 27 54 81

m1 %x% m2

# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]

# [1,] 2 2 2 8 8 8 14 14 14

# [2,] 2 2 2 8 8 8 14 14 14

# [3,] 2 2 2 8 8 8 14 14 14

# [4,] 4 4 4 10 10 10 16 16 16

# [5,] 4 4 4 10 10 10 16 16 16

# [6,] 4 4 4 10 10 10 16 16 16

# [7,] 6 6 6 12 12 12 18 18 18

# [8,] 6 6 6 12 12 12 18 18 18

# [9,] 6 6 6 12 12 12 18 18 18

# cross Product

m1[ , 1 ]

m1[ , 2 ]

crossprod( m1, m2[ , 1 ] )

# [,1]

# [1,] 32

# [2,] 77

# [3,] 122

crossprod( m1 )

# [,1] [,2] [,3]

# [1,] 14 32 50

# [2,] 32 77 122

# [3,] 50 122 194

crossprod( m1, m2 )

# [,1] [,2] [,3]

# [1,] 12 12 12

# [2,] 30 30 30

# [3,] 48 48 48

# Diagonal Matrix with elements of x in the diagonal

diag( nrow = 3 )

# [,1] [,2] [,3]

# [1,] 1 0 0

# [2,] 0 1 0

# [3,] 0 0 1

diag( 3, nrow = 3 )

# [,1] [,2] [,3]

# [1,] 3 0 0

# [2,] 0 3 0

# [3,] 0 0 3

# returns a vector of the principal diagonal elements

diag( m1 )

# [,1] [,2] [,3]

# [1,] 1 0 0

# [2,] 0 2 0

# [3,] 0 0 3

# k * k identity matrix for m1[ , 1 ]

diag( m1[ , 1 ] )

# [,1] [,2] [,3]

# [1,] 1 0 0

# [2,] 0 2 0

# [3,] 0 0

# simple combinations

cbind( m1, m2 )

rbind( m1, m2 )

# means and sums

rowMeans( m1 )

rowSums( m1 )

colMeans( m1 )

colSums( m1 )

# eigenvalues, eigenvectors

eigen( m1 )

e <- eigen( m1 )

e$values

e$vectors

# eigen() decomposition

# $values

# [1] 1.611684e+01 -1.116844e+00 -5.700691e-16

# $vectors

# [,1] [,2] [,3]

# [1,] -0.4645473 -0.8829060 0.4082483

# [2,] -0.5707955 -0.2395204 -0.8164966

# [3,] -0.6770438 0.4038651 0.4082483

# single value decomposition

svd( m1 )

# $d singular values of m1

# [1] 1.684810e+01 1.068370e+00 5.543107e-16

# $u left singular vectors of m1

# [,1] [,2] [,3]

# [1,] -0.4796712 0.77669099 0.4082483

# [2,] -0.5723678 0.07568647 -0.8164966

# [3,] -0.6650644 -0.62531805 0.4082483

# $v right singular vectors of m1

# [,1] [,2] [,3]

# [1,] -0.2148372 -0.8872307 0.4082483

# [2,] -0.5205874 -0.2496440 -0.8164966

# [3,] -0.8263375 0.3879428 0.4082483

# QR decomposition

qr( m1 )

# $qr

# [,1] [,2] [,3]

# [1,] -3.7416574 -8.552360 -1.336306e+01

# [2,] 0.5345225 1.963961 3.927922e+00

# [3,] 0.8017837 0.988693 1.776357e-15

# $rank

# [1] 2

# $qraux

# [1] 1.267261e+00 1.149954e+00 1.776357e-15

# $pivot

# [1] 1 2 3

# attr(,"class")

# [1] "qr"

# inverse

solve( m1 )

# Error in solve.default(m1) :

# Lapack routine dgesv: system is exactly singular: U[3,3] = 0

m3 <- matrix( c( 2, 6, 1, 4 ) , 2, 2 )

# [,1] [,2]

# [1,] 2 1

# [2,] 6 4

sm3 <- solve( m3 )

# [,1] [,2]

# [1,] 2 -0.5

# [2,] -3 1.0

m3 %*% sm3

# [,1] [,2]

# [1,] 1 0

# [2,] 0 1

Basic R commands – 1

04/21/2020Allgemein, Code, Data Analysis, Programming, R, StatisticsMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Basic Commands and Statistics with R

rm(list = ls(all = TRUE))
getwd()
#system("ls")
setwd("~/ownCloud/STA_Statistics/basicR/")

search()

options(scipen=100)   # scientific off
options(scipen=0)     # scientific on
options(digits = 3)     

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/mtcars.html

attach(mtcars)
search()
detach(mtcars)
search()
mtcars         # The data was extracted from the 1974 Motor Trend US magazine 
# ?mtcars

class(mtcars)  # determine the class of an object

str(mtcars)    # Compactly display the internal structure of an R object

# 'data.frame':	32 obs. of  11 variables:

#                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
# Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
# Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
# Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
# Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
# Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
# Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
# Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
# Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
# Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
# Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
# Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
# Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
# Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
# Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
# Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
# Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
# Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
# Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
# Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
# Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
# Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
# Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
# AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
# Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
# Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
# Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
# Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
# Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
# Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
# Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
# Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
# Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

# A data frame with 32 observations on 11 (numeric) variables.

#[, 1]	mpg	Miles/(US) gallon
#[, 2]	cyl	Number of cylinders
#[, 3]	disp	Displacement (cu.in.)
#[, 4]	hp	Gross horsepower
#[, 5]	drat	Rear axle ratio
#[, 6]	wt	Weight (1000 lbs)
#[, 7]	qsec	1/4 mile time
#[, 8]	vs	Engine (0 = V-shaped, 1 = straight)
#[, 9]	am	Transmission (0 = automatic, 1 = manual)
#[,10]	gear	Number of forward gears
#[,11]	carb	Number of carburetors

colnames(mtcars) <- c("mpg","cyl","disp","hp","drat","wt",
                      "qsec","vs","am","gear","carb")

# colnames(mtcars) <- c("miles-per-gallon","cylinders","displacement",
#                       "horsepower","rear-axle-ratio","weight","qsec",
#                       "engine-vs","Transmissionam","gear","carburetors")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# data structures

letters
l <- letters
str(l)
# chr [1:26]  "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" 
#             "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 
# Vectors

t <- c(1:3,"Hello",NA,FALSE,TRUE)
t
# [1] "1"     "2"     "3"     "Hello" NA      "FALSE" "TRUE" 
str(t)
# chr [1:7] "1" "2" "3" "Hello" NA "FALSE" "TRUE"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Factors

mtcars$am[mtcars$am==1]
which(mtcars$am==1)

am.names <- mtcars$am
am.names[which(am.names==1)] <- "Automatic"
am.names
am.names[which(am.names==0)] <- "Manual"
am.names

am.names.factor <- factor(am.names)
am.names.factor
# [1] Automatic Automatic Automatic Manual    Manual  
# Levels: Automatic Manual

# internally stored as a table of:
# 1 Automatic
# 2 Manual

# Automatic and Manual are the levels of the factor 
levels(am.names.factor)

relevel(am.names.factor, ref="Manual")
# [1] Automatic Automatic Automatic Manual    Manual  
# Levels: Manual Automatic

am.names.factor[1]
as.character(am.names.factor[1])
as.numeric(am.names.factor[1])  # numeric takes the faktor not the level
levels(am.names.factor)[1]      # can be used to get the factor

table(am.names.factor)

is.factor(am.names.factor)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 
# Data Frames

mtcars
str(mtcars)
# 'data.frame':	32 obs. of  11 variables:

df <- data.frame(1:20,60:41,letters[1:20])
df
colnames(df) <- c("n1","n2","letters")
df
t(df)

df$letters      # get the vectors from the dataframe
df$n1
df[,2]
df[1,3]
df[[3]]
levels(df$letters)
is.factor(df[,3])

df <- data.frame(1:20,60:41,letters[1:20], stringsAsFactors = FALSE)
df
is.factor(df[,3])

trees
mtcars
USMortality 
data()    # list of data associated with all current packages in the serch path

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Matrix

as.matrix(mtcars)
mtcars.mat <- as.matrix(mtcars) # all elements of a matrix have the same mode 
                                #(numeric, character)
x <- as.vector(mtcars.mat)      # all in order in the vector X
x

mat <- matrix(1:100,nco=10)
mat
as.vector(mat)

mat44 <- matrix(1:(4*4),nco=4)
mat44
as.vector(mat44)

dimnames(mtcars.mat)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Arrays

x <- 1:10000             # this is a matrix
dim(x) <- c(100,100)
x

x <- 1:1000              # array - a matrix with more than 2 dim
dim(x) <- c(10,10,10)
x

x <- 1:(2*3*3)           # array - a matrix with more than 2 dim
dim(x) <- c(2,3,3)
x

str(x)
# int [1:2, 1:3, 1:3] 1 2 3 4 5 6 7 8 9 10 ...

x <- letters[1:(2*3*3)]           # array - a matrix with more than 2 dim
dim(x) <- c(2,3,3)
x
str(x)
# chr [1:2, 1:3, 1:3] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" 
#                     "m" "n" "o" "p" "q" "r"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Lists - collect different types of data objects

data()

l <- list(mtcars,barley,environmental,ethanol,melanoma)
                                            # most data from lattice package
str(l)

# List of 5
# $ :'data.frame':	32 obs. of  11 variables:
# ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
# ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ...
# ..$ disp: num [1:32] 160 160 108 258 360 ...
# ..$ hp  : num [1:32] 110 110 93 110 175 105 245 62 95 123 ...
# ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
# ..$ wt  : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
# ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ...
# ..$ vs  : num [1:32] 0 0 1 1 0 1 0 1 1 1 ...
# ..$ am  : num [1:32] 1 1 1 0 0 0 0 0 0 0 ...
# ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ...
# ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ...
# $ :'data.frame':	120 obs. of  4 variables:
# ..$ yield  : num [1:120] 27 48.9 27.4 39.9 33 ...
# ..$ variety: Factor w/10 levels "Svansota","No.462",..: 3 3 3 3 3 3 7 7 7 7...
# ..$ year   : Factor w/2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2...
# ..$ site   : Factor w/6 levels "Grand Rapids",..: 3 6 4 5 1 2 3 6 4 5...
# $ :'data.frame':	111 obs. of  4 variables:
# ..$ ozone      : num [1:111] 41 36 12 18 23 19 8 16 11 14 ...
# ..$ radiation  : num [1:111] 190 118 149 313 299 99 19 256 290 274 ...
# ..$ temperature: num [1:111] 67 72 74 62 65 59 61 69 66 68 ...
# ..$ wind       : num [1:111] 7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
# $ :'data.frame':	88 obs. of  3 variables:
# ..$ NOx: num [1:88] 3.74 2.29 1.5 2.88 0.76 ...
# ..$ C  : num [1:88] 12 12 12 12 12 9 9 9 12 12 ...
# ..$ E  : num [1:88] 0.907 0.761 1.108 1.016 1.189 ...
# $ :'data.frame':	37 obs. of  2 variables:
# ..$ year     : num [1:37] 1936 1937 1938 1939 1940 ...
# ..$ incidence: num [1:37] 0.9 0.8 0.8 1.3 1.4 1.2 1.7 1.8 1.6 1.5 ...

l[[1]]
l[[2]]
l[[3]]

l[[1]]

model <- lm(mpg~wt,data=l[[1]])   # building a model from a list object
names(model) 
str(model)                        # the model itselfe is returned as a list

model$coefficients
model[[1]]

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# some basic R functions

mtcars$mpg
# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...
mtcars$mpg^2
# [1]  441.00  441.00 519.84 457.96 349.69 327.61 204.49  595.36  519.84  ...
sqrt(mtcars$mpg^2)
# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...

pi
# [1] 3.141593
90*pi/180 # radiant conversion
sin(90*pi/180)
cos(90*pi/180)
cos(0*pi/180)

celsius <- 20
9/5*celsius+32

celsius <- -20:40
fahrenheit <- 9/5*celsius+32
plot(celsius,fahrenheit,type="s")

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Basic Commands and Statistics with R

rm(list = ls(all = TRUE))

getwd()

#system("ls")

setwd("~/ownCloud/STA_Statistics/basicR/")

search()

options(scipen=100) # scientific off

options(scipen=0) # scientific on

options(digits = 3)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/mtcars.html

attach(mtcars)

search()

detach(mtcars)

search()

mtcars # The data was extracted from the 1974 Motor Trend US magazine

# ?mtcars

class(mtcars) # determine the class of an object

str(mtcars) # Compactly display the internal structure of an R object

# 'data.frame': 32 obs. of 11 variables:

# mpg cyl disp hp drat wt qsec vs am gear carb

# Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4

# Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4

# Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1

# Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1

# Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2

# Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1

# Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4

# Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2

# Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2

# Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4

# Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4

# Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3

# Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3

# Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3

# Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4

# Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4

# Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4

# Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1

# Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2

# Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1

# Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1

# Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2

# AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2

# Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4

# Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2

# Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1

# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2

# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2

# Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4

# Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6

# Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8

# Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2

# A data frame with 32 observations on 11 (numeric) variables.

#[, 1] mpg Miles/(US) gallon

#[, 2] cyl Number of cylinders

#[, 3] disp Displacement (cu.in.)

#[, 4] hp Gross horsepower

#[, 5] drat Rear axle ratio

#[, 6] wt Weight (1000 lbs)

#[, 7] qsec 1/4 mile time

#[, 8] vs Engine (0 = V-shaped, 1 = straight)

#[, 9] am Transmission (0 = automatic, 1 = manual)

#[,10] gear Number of forward gears

#[,11] carb Number of carburetors

colnames(mtcars) <- c("mpg","cyl","disp","hp","drat","wt",

"qsec","vs","am","gear","carb")

# colnames(mtcars) <- c("miles-per-gallon","cylinders","displacement",

# "horsepower","rear-axle-ratio","weight","qsec",

# "engine-vs","Transmissionam","gear","carburetors")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# data structures

letters

l <- letters

str(l)

# chr [1:26] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p"

# "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Vectors

t <- c(1:3,"Hello",NA,FALSE,TRUE)

# [1] "1" "2" "3" "Hello" NA "FALSE" "TRUE"

str(t)

# chr [1:7] "1" "2" "3" "Hello" NA "FALSE" "TRUE"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Factors

mtcars$am[mtcars$am==1]

which(mtcars$am==1)

am.names <- mtcars$am

am.names[which(am.names==1)] <- "Automatic"

am.names

am.names[which(am.names==0)] <- "Manual"

am.names

am.names.factor <- factor(am.names)

am.names.factor

# [1] Automatic Automatic Automatic Manual Manual

# Levels: Automatic Manual

# internally stored as a table of:

# 1 Automatic

# 2 Manual

# Automatic and Manual are the levels of the factor

levels(am.names.factor)

relevel(am.names.factor, ref="Manual")

# [1] Automatic Automatic Automatic Manual Manual

# Levels: Manual Automatic

am.names.factor[1]

as.character(am.names.factor[1])

as.numeric(am.names.factor[1]) # numeric takes the faktor not the level

levels(am.names.factor)[1] # can be used to get the factor

table(am.names.factor)

is.factor(am.names.factor)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Data Frames

mtcars

str(mtcars)

# 'data.frame': 32 obs. of 11 variables:

df <- data.frame(1:20,60:41,letters[1:20])

colnames(df) <- c("n1","n2","letters")

t(df)

df$letters # get the vectors from the dataframe

df$n1

df[,2]

df[1,3]

df[[3]]

levels(df$letters)

is.factor(df[,3])

df <- data.frame(1:20,60:41,letters[1:20], stringsAsFactors = FALSE)

is.factor(df[,3])

trees

mtcars

USMortality

data() # list of data associated with all current packages in the serch path

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Matrix

as.matrix(mtcars)

mtcars.mat <- as.matrix(mtcars) # all elements of a matrix have the same mode

#(numeric, character)

x <- as.vector(mtcars.mat) # all in order in the vector X

mat <- matrix(1:100,nco=10)

mat

as.vector(mat)

mat44 <- matrix(1:(4*4),nco=4)

mat44

as.vector(mat44)

dimnames(mtcars.mat)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Arrays

x <- 1:10000 # this is a matrix

dim(x) <- c(100,100)

x <- 1:1000 # array - a matrix with more than 2 dim

dim(x) <- c(10,10,10)

x <- 1:(2*3*3) # array - a matrix with more than 2 dim

dim(x) <- c(2,3,3)

str(x)

# int [1:2, 1:3, 1:3] 1 2 3 4 5 6 7 8 9 10 ...

x <- letters[1:(2*3*3)] # array - a matrix with more than 2 dim

dim(x) <- c(2,3,3)

str(x)

# chr [1:2, 1:3, 1:3] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l"

# "m" "n" "o" "p" "q" "r"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Lists - collect different types of data objects

data()

l <- list(mtcars,barley,environmental,ethanol,melanoma)

# most data from lattice package

str(l)

# List of 5

# $ :'data.frame': 32 obs. of 11 variables:

# ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...

# ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ...

# ..$ disp: num [1:32] 160 160 108 258 360 ...

# ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ...

# ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...

# ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...

# ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ...

# ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ...

# ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ...

# ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ...

# ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ...

# $ :'data.frame': 120 obs. of 4 variables:

# ..$ yield : num [1:120] 27 48.9 27.4 39.9 33 ...

# ..$ variety: Factor w/10 levels "Svansota","No.462",..: 3 3 3 3 3 3 7 7 7 7...

# ..$ year : Factor w/2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2...

# ..$ site : Factor w/6 levels "Grand Rapids",..: 3 6 4 5 1 2 3 6 4 5...

# $ :'data.frame': 111 obs. of 4 variables:

# ..$ ozone : num [1:111] 41 36 12 18 23 19 8 16 11 14 ...

# ..$ radiation : num [1:111] 190 118 149 313 299 99 19 256 290 274 ...

# ..$ temperature: num [1:111] 67 72 74 62 65 59 61 69 66 68 ...

# ..$ wind : num [1:111] 7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...

# $ :'data.frame': 88 obs. of 3 variables:

# ..$ NOx: num [1:88] 3.74 2.29 1.5 2.88 0.76 ...

# ..$ C : num [1:88] 12 12 12 12 12 9 9 9 12 12 ...

# ..$ E : num [1:88] 0.907 0.761 1.108 1.016 1.189 ...

# $ :'data.frame': 37 obs. of 2 variables:

# ..$ year : num [1:37] 1936 1937 1938 1939 1940 ...

# ..$ incidence: num [1:37] 0.9 0.8 0.8 1.3 1.4 1.2 1.7 1.8 1.6 1.5 ...

l[[1]]

l[[2]]

l[[3]]

l[[1]]

model <- lm(mpg~wt,data=l[[1]]) # building a model from a list object

names(model)

str(model) # the model itselfe is returned as a list

model$coefficients

model[[1]]

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# some basic R functions

mtcars$mpg

# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...

mtcars$mpg^2

# [1] 441.00 441.00 519.84 457.96 349.69 327.61 204.49 595.36 519.84 ...

sqrt(mtcars$mpg^2)

# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...

# [1] 3.141593

90*pi/180 # radiant conversion

sin(90*pi/180)

cos(90*pi/180)

cos(0*pi/180)

celsius <- 20

9/5*celsius+32

celsius <- -20:40

fahrenheit <- 9/5*celsius+32

plot(celsius,fahrenheit,type="s")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sum(mtcars$mpg)
# [1] 642.9
cumsum(mtcars$mpg)
# [1]  21.0  42.0  64.8  86.2 104.9 123.0 137.3 161.7
cumprod(mtcars$mpg)
# [1] 2.100000e+01 4.410000e+02 1.005480e+04 2.151727e+05 
#     4.023730e+06 7.282951e+07

c(1,2,3,4,5)    # vectors # concatenating function
c(1:5)
c(TRUE,FALSE)
c("TRUE","FALSE") # character vector

x <- runif(5)
x
# [1] 0.832 0.101 0.926 0.253 0.619
sort(x)

order(x)
# [1] 0.101 0.253 0.619 0.832 0.926
x[order(x)]
# [1] 0.101 0.253 0.619 0.832 0.926

x <- c(1:5)
x
y <- c("TRUE","FALSE",NA) 
y
z <- c(x,y)
z
# [1] "1"     "2"     "3"     "4"     "5"     "TRUE"  "FALSE" NA    
rev(z)
# [1] NA      "FALSE" "TRUE"  "5"     "4"     "3"     "2"     "1"   

z[2]
z[2:5]
# [1] "2" "3" "4" "5"
z[z<4]
# [1] "1" "2" "3" NA 
z[z>4]
# [1] "5"     "TRUE"  "FALSE" NA 
z[z>=4]
# [1] "4"     "5"     "TRUE"  "FALSE" NA  
z[-c(3,4)]
# [1] "1"     "2"     "5"     "TRUE"  "FALSE" NA 
z[c(3,4)]
# [1] "3" "4"

z[is.na(z)]
z[is.na(z)]<-0
z
z[z==TRUE]<-1
z[z==FALSE]<-0
z
# [1] "1" "2" "3" "4" "5" "1" "0" "0"

plot(z)
plot(z,type="b")

x <- runif(length(z))*5

plot(x,z,type="b")
plot(x~z,type="b")

plot(z,x,type="b")
lines(z~x,type="b",col="red")

plot(sin((1:360)*pi/180),type="l")
plot(sin((1:360)*pi/180),cos((1:360)*pi/180),type="l")

plot(sin((1:360)*pi/18),cos((1:360)*pi/10),type="l")

plot(sin((1:360)*pi/18),cos((1:360)*pi/10),type="l")

plot(tan((1:360)*pi/180),cos((1:360)*pi/180),type="l")
plot(tan((1:360)*pi/180),sin((1:360)*pi/180),type="l")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sum(mtcars$mpg)

# [1] 642.9

cumsum(mtcars$mpg)

# [1] 21.0 42.0 64.8 86.2 104.9 123.0 137.3 161.7

cumprod(mtcars$mpg)

# [1] 2.100000e+01 4.410000e+02 1.005480e+04 2.151727e+05

# 4.023730e+06 7.282951e+07

c(1,2,3,4,5) # vectors # concatenating function

c(1:5)

c(TRUE,FALSE)

c("TRUE","FALSE") # character vector

x <- runif(5)

# [1] 0.832 0.101 0.926 0.253 0.619

sort(x)

order(x)

# [1] 0.101 0.253 0.619 0.832 0.926

x[order(x)]

# [1] 0.101 0.253 0.619 0.832 0.926

x <- c(1:5)

y <- c("TRUE","FALSE",NA)

z <- c(x,y)

# [1] "1" "2" "3" "4" "5" "TRUE" "FALSE" NA

rev(z)

# [1] NA "FALSE" "TRUE" "5" "4" "3" "2" "1"

z[2]

z[2:5]

# [1] "2" "3" "4" "5"

z[z<4]

# [1] "1" "2" "3" NA

z[z>4]

# [1] "5" "TRUE" "FALSE" NA

z[z>=4]

# [1] "4" "5" "TRUE" "FALSE" NA

z[-c(3,4)]

# [1] "1" "2" "5" "TRUE" "FALSE" NA

z[c(3,4)]

# [1] "3" "4"

z[is.na(z)]

z[is.na(z)]<-0

z[z==TRUE]<-1

z[z==FALSE]<-0

# [1] "1" "2" "3" "4" "5" "1" "0" "0"

plot(z)

plot(z,type="b")

x <- runif(length(z))*5

plot(x,z,type="b")

plot(x~z,type="b")

plot(z,x,type="b")

lines(z~x,type="b",col="red")

plot(sin((1:360)*pi/180),type="l")

plot(sin((1:360)*pi/180),cos((1:360)*pi/180),type="l")

plot(sin((1:360)*pi/18),cos((1:360)*pi/10),type="l")

plot(tan((1:360)*pi/180),cos((1:360)*pi/180),type="l")

plot(tan((1:360)*pi/180),sin((1:360)*pi/180),type="l")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# some loops

for(i in 1:360){
    plot(cos((1:360)*pi/i),sin((1:360)*pi/i),type="l")
    print(i)
    Sys.sleep(0.1)
}

for(i in 360:1){
  plot(cos((1:360)*pi/i),sin((1:360)*pi/i),type="l")
  print(i)
  Sys.sleep(0.1)
}

for(i in 1:360){
  for(e in 1:360){
  plot(cos((1:360)*pi/i),sin((1:360)*pi/e),type="l")
  print(i)
  Sys.sleep(0.1)
  }
}

for(i in 1:360){
  for(e in 1:360){
    plot(sin((1:360)*pi/i),cos((1:360)*pi/e),type="l")
    print(i)
    Sys.sleep(0.1)
  }
}

for(i in 1:10){
  for(e in 1:10){
    plot(sin((1:360)*pi/i),cos((1:360)*pi/e),type="l")
    mtext(paste("i:",i,"e:",e), side=3, outer=TRUE, line=-3)
    print(paste("i:",i,"e:",e))
    Sys.sleep(0.1)
  }
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# function collection

letters
# "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" 
# "s" "t" "u" "v" "w" "x" "y" "z"
LETTERS
# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" 
# "S" "T" "U" "V" "W" "X" "Y" "Z"
letters[1:15]
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o"

plot(mtcars$mpg)
print(mtcars$mpg)

table(mtcars$mpg)
table(mtcars$cyl)
#   4   6   8 
#   11  7   14 

table(mtcars[,9:10])
str(table(mtcars[,9:10])) # table with 2 dimmensions
table(mtcars[,9:11])
str(table(mtcars[,9:11]))
# 'table' int [1:2, 1:3, 1:6] 3 0 0 4 0 0 4 0 2 2 ...
# - attr(*, "dimnames")=List of 3
# ..$ am  : chr [1:2] "0" "1"
# ..$ gear: chr [1:3] "3" "4" "5"
# ..$ carb: chr [1:6] "1" "2" "3" "4" ...

length(mtcars$mpg)
cat(mtcars$mpg)
mean(mtcars$mpg)
median(mtcars$mpg)
range(mtcars$mpg)
unique(mtcars$mpg)

rep(mtcars$mpg,10)

names()
colnames()
rownames()

diff(mtcars$mpg)
plot(diff(mtcars$mpg),type="h",xlab="",ylab="difference")
points(diff(mtcars$mpg),col="red")

sort(mtcars$mpg)
order(mtcars$mpg)
rev(mtcars$mpg)
rev(sort(mtcars$mpg))

cumsum(mtcars$mpg)
cumprod(mtcars$mpg)

rank(mtcars$vs) # Returns the sample ranks of the values in a vector.
(r1 <- rank(x1 <- c(3, 1, 4, 15, 92)))
rank(mtcars$vs, ties.method= "first")  # first occurrence wins 
                                       ## ranks without averaging
rank(mtcars$vs, ties.method= "last")   #  last occurrence wins 
                                       ## ranks without averaging
rank(mtcars$vs, ties.method= "random") # ties broken at random 
                                       ## ranks without averaging
rank(mtcars$vs, ties.method= "random") # and again 
                                       ## ranks without averaging

1:10
7:20
intersect(1:10, 7:20)

match(1:10,7:20)    # match returns a vector of the positions of (first) 
                    # matches of its first argument in its second.

1:10 %in% c(1,3,5,9)
sstr <- c("c","ab","B","bba","c",NA,"@","bla","a","Ba","%")
sstr %in% c(letters, LETTERS)
sstr[sstr %in% c(letters, LETTERS)]

apply()
sapply()
aggregate()
aggregate(state.x77, list(Region = state.region), mean)
tapply()

merge(mtcars$vs,c(1,2))

read.csv()
read.table()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# histograms

hist(mtcars$mpg)

# looping with R
par(mfrow=c(3,4))
for(i in 1:length(mtcars[1,])){ 
  hist(mtcars[,i],main=paste("Data: MTcars",colnames(mtcars)[i]),
                              xlab=paste(colnames(mtcars)[i]))
}
par(mfrow=c(1,1))

hist(mtcars$mpg, main="Data: MTcars - mpg Miles per gallon", 
     xlab="mpg Miles per gallon")

hist(mtcars$mpg, breaks=10, main="Data: MTcars - mpg Miles per gallon", 
     xlab="mpg Miles per gallon")

hist(mtcars$mpg, breaks=length(mtcars$mpg), 
     main="Data: MTcars - mpg Miles per gallon", 
     xlab="mpg Miles per gallon")

# probability densities, component
hist(mtcars$mpg, breaks=length(mtcars$mpg), freq=FALSE, 
     main="Data: MTcars - mpg Miles per gallon", xlab="mpg Miles per gallon")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#  Boxplot with Scatterplot

par( fig = c( 0, 0.8, 0, 0.8 ), new = TRUE)
plot( mtcars$wt, mtcars$mpg, xlab = "Car Weight",
      ylab = "Miles Per Gallon" )
abline( lm( mpg~wt ), col = "red" )      # regression line (y~x)
lines( lowess( wt, mpg ), col = "blue" ) # lowess line (x,y) 
                                         # LOWESS smoother which uses 
                                         # locally-weighted 
                                         # polynomial regression
model <- lm( mpg~wt )
text( 4, 32, paste( "Intercept", model$coefficients[1] ) )
text( 4, 30, paste( "wt", model$coefficients[2] ) )

par( fig = c( 0, 0.8, 0.55, 1 ), new = TRUE )
boxplot(mtcars$wt, horizontal = TRUE, axes=FALSE, 
                                 col= "green", notch = TRUE )
m.wt <- mean( mtcars$wt )
mtext( paste( "mean wt", m.wt ), side = 3, outer = TRUE, line = -8 )

par( fig = c( 0.65, 1, 0, 0.8 ), new = TRUE)
boxplot( mtcars$mpg, axes = FALSE, col = "green", notch = TRUE)
m.mpg <- mean( mtcars$mpg )
mtext( paste( "mean mpg", m.mpg ), side = 2, outer = TRUE, line = -28)

mtext( "Miles per gallon vs Car Weight", side = 3, outer = TRUE, line = -3 )

par( mfrow = c( 1, 1 ) )

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# some loops

for(i in 1:360){

plot(cos((1:360)*pi/i),sin((1:360)*pi/i),type="l")

print(i)

Sys.sleep(0.1)

}

for(i in 360:1){

plot(cos((1:360)*pi/i),sin((1:360)*pi/i),type="l")

print(i)

Sys.sleep(0.1)

}

for(i in 1:360){

for(e in 1:360){

plot(cos((1:360)*pi/i),sin((1:360)*pi/e),type="l")

print(i)

Sys.sleep(0.1)

}

for(i in 1:360){

for(e in 1:360){

plot(sin((1:360)*pi/i),cos((1:360)*pi/e),type="l")

print(i)

Sys.sleep(0.1)

}

for(i in 1:10){

for(e in 1:10){

plot(sin((1:360)*pi/i),cos((1:360)*pi/e),type="l")

mtext(paste("i:",i,"e:",e), side=3, outer=TRUE, line=-3)

print(paste("i:",i,"e:",e))

Sys.sleep(0.1)

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# function collection

letters

# "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r"

# "s" "t" "u" "v" "w" "x" "y" "z"

LETTERS

# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R"

# "S" "T" "U" "V" "W" "X" "Y" "Z"

letters[1:15]

# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o"

plot(mtcars$mpg)

print(mtcars$mpg)

table(mtcars$mpg)

table(mtcars$cyl)

# 4 6 8

# 11 7 14

table(mtcars[,9:10])

str(table(mtcars[,9:10])) # table with 2 dimmensions

table(mtcars[,9:11])

str(table(mtcars[,9:11]))

# 'table' int [1:2, 1:3, 1:6] 3 0 0 4 0 0 4 0 2 2 ...

# - attr(*, "dimnames")=List of 3

# ..$ am : chr [1:2] "0" "1"

# ..$ gear: chr [1:3] "3" "4" "5"

# ..$ carb: chr [1:6] "1" "2" "3" "4" ...

length(mtcars$mpg)

cat(mtcars$mpg)

mean(mtcars$mpg)

median(mtcars$mpg)

range(mtcars$mpg)

unique(mtcars$mpg)

rep(mtcars$mpg,10)

names()

colnames()

rownames()

diff(mtcars$mpg)

plot(diff(mtcars$mpg),type="h",xlab="",ylab="difference")

points(diff(mtcars$mpg),col="red")

sort(mtcars$mpg)

order(mtcars$mpg)

rev(mtcars$mpg)

rev(sort(mtcars$mpg))

cumsum(mtcars$mpg)

cumprod(mtcars$mpg)

rank(mtcars$vs) # Returns the sample ranks of the values in a vector.

(r1 <- rank(x1 <- c(3, 1, 4, 15, 92)))

rank(mtcars$vs, ties.method= "first") # first occurrence wins

## ranks without averaging

rank(mtcars$vs, ties.method= "last") # last occurrence wins

## ranks without averaging

rank(mtcars$vs, ties.method= "random") # ties broken at random

## ranks without averaging

rank(mtcars$vs, ties.method= "random") # and again

## ranks without averaging

1:10

7:20

intersect(1:10, 7:20)

match(1:10,7:20) # match returns a vector of the positions of (first)

# matches of its first argument in its second.

1:10 %in% c(1,3,5,9)

sstr <- c("c","ab","B","bba","c",NA,"@","bla","a","Ba","%")

sstr %in% c(letters, LETTERS)

sstr[sstr %in% c(letters, LETTERS)]

apply()

sapply()

aggregate()

aggregate(state.x77, list(Region = state.region), mean)

tapply()

merge(mtcars$vs,c(1,2))

read.csv()

read.table()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# histograms

hist(mtcars$mpg)

# looping with R

par(mfrow=c(3,4))

for(i in 1:length(mtcars[1,])){

hist(mtcars[,i],main=paste("Data: MTcars",colnames(mtcars)[i]),

xlab=paste(colnames(mtcars)[i]))

}

par(mfrow=c(1,1))

hist(mtcars$mpg, main="Data: MTcars - mpg Miles per gallon",

xlab="mpg Miles per gallon")

hist(mtcars$mpg, breaks=10, main="Data: MTcars - mpg Miles per gallon",

xlab="mpg Miles per gallon")

hist(mtcars$mpg, breaks=length(mtcars$mpg),

main="Data: MTcars - mpg Miles per gallon",

xlab="mpg Miles per gallon")

# probability densities, component

hist(mtcars$mpg, breaks=length(mtcars$mpg), freq=FALSE,

main="Data: MTcars - mpg Miles per gallon", xlab="mpg Miles per gallon")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Boxplot with Scatterplot

par( fig = c( 0, 0.8, 0, 0.8 ), new = TRUE)

plot( mtcars$wt, mtcars$mpg, xlab = "Car Weight",

ylab = "Miles Per Gallon" )

abline( lm( mpg~wt ), col = "red" ) # regression line (y~x)

lines( lowess( wt, mpg ), col = "blue" ) # lowess line (x,y)

# LOWESS smoother which uses

# locally-weighted

# polynomial regression

model <- lm( mpg~wt )

text( 4, 32, paste( "Intercept", model$coefficients[1] ) )

text( 4, 30, paste( "wt", model$coefficients[2] ) )

par( fig = c( 0, 0.8, 0.55, 1 ), new = TRUE )

boxplot(mtcars$wt, horizontal = TRUE, axes=FALSE,

col= "green", notch = TRUE )

m.wt <- mean( mtcars$wt )

mtext( paste( "mean wt", m.wt ), side = 3, outer = TRUE, line = -8 )

par( fig = c( 0.65, 1, 0, 0.8 ), new = TRUE)

boxplot( mtcars$mpg, axes = FALSE, col = "green", notch = TRUE)

m.mpg <- mean( mtcars$mpg )

mtext( paste( "mean mpg", m.mpg ), side = 2, outer = TRUE, line = -28)

mtext( "Miles per gallon vs Car Weight", side = 3, outer = TRUE, line = -3 )

par( mfrow = c( 1, 1 ) )

# Martin Stoppacher                                                             #
# office@martinstoppacher.com                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

# Martin Stoppacher #

# office@martinstoppacher.com #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

#################################################################################

Using trigonometric functions in R

06/18/2018Code, Data Analysis, Programming, RMartin Stoppacher

R uses radiant as input for trigonometric functions.

# calculating rad
pi/2
90*pi/180 # transforming degree into radiant
degr<-c(0:360)
degr
rad<-grad*(pi/180)
rad

# calculating rad

pi/2

90*pi/180 # transforming degree into radiant

degr<-c(0:360)

degr

rad<-grad*(pi/180)

rad

Now we can plot the function.

sin(rad)
plot(sin(rad))

1 2	sin(rad) plot(sin(rad))

And by playing with the functions we get a funny graphic output.

plot(asin(sin(rad)))
plot(asin(sin(rad)),type="l")
points(sin(rad))
points(sin(rad)*-1,col="green")
lines(asin(sin(rad))*-1)
lines(1/sin(rad),col="red")
lines(1/sin(rad)*-1,col="blue")
points(x=91,y=0)
points(x=271,y=0)

plot(asin(sin(rad)))

plot(asin(sin(rad)),type="l")

points(sin(rad))

points(sin(rad)*-1,col="green")

lines(asin(sin(rad))*-1)

lines(1/sin(rad),col="red")

lines(1/sin(rad)*-1,col="blue")

points(x=91,y=0)

points(x=271,y=0)

And if we include the tangent, the graphic looks like this:

tan(rad)
tangens <- tan(rad)
tangens[91]<-0
tangens[271]<-0
plot(tangens,type="l")
lines(sin(rad),type="l",col="red")
lines(cos(rad),type="l",col="green")

tan(rad)

tangens <- tan(rad)

tangens[91]<-0

tangens[271]<-0

plot(tangens,type="l")

lines(sin(rad),type="l",col="red")

lines(cos(rad),type="l",col="green")

lines(tangens,type="l",col="red")
lines(1/tangens,type="l",col="green")
# dividing by 10 for visual reasons
lines(tangens/10,type="l",col="red")
lines(1/tangens/10,type="l",col="green")

lines(tangens,type="l",col="red")

lines(1/tangens,type="l",col="green")

# dividing by 10 for visual reasons

lines(tangens/10,type="l",col="red")

lines(1/tangens/10,type="l",col="green")

plot(tangens,type="l")
lines(1/tangens,type="l")
plot(tangens,type="l")
lines(1/tangens,type="l",col="green")

plot(tangens,type="l")

lines(1/tangens,type="l")

plot(tangens,type="l")

lines(1/tangens,type="l",col="green")

Audio file conversion with afconvert (mac)

10/02/2017Code, Engineering, Programming, SoundengineeringMartin Stoppacher

I was looking for a simple and elegant way to convert a high amount of audio files from one format (.caf) to another (.aif). The solution i found is a very elegant one and also comes included with your operating system – if using a MAC.

afconvert -f AIFF -d BEI24@48000 "Wow Bass 03.caf" "Wow Bass 03.aif"

1	afconvert -f AIFF -d BEI24@48000 "Wow Bass 03.caf" "Wow Bass 03.aif"

And now here is the most amazing part. It is super easy to execute the conversion of multiple files by just one command line.

for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done

1	for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done

or to run through subdirectories:

for d in */ ; do cd $d; pwd; cd ..; done
for d in */ ; do cd $d; for i in *.caf; do echo *caf; pwd; done; cd ..; done;
for d in */ ; do cd $d; for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done; cd ..; done

for d in */ ; do cd $d; pwd; cd ..; done

for d in */ ; do cd $d; for i in *.caf; do echo *caf; pwd; done; cd ..; done;

for d in */ ; do cd $d; for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done; cd ..; done

or with recursion by using find:

find -name "*.caf"
find . -type f -iname '*.caf' -print | while read -r name; do cp "$name" "/.../..."; done
find . -type f -iname '*.caf' -print | while read -r name; do afconvert -f AIFF -d BEI24@48000 "$name" "/Volumes/Daten/Logic/test/${name%.*}.aif"; done

find -name "*.caf"

find . -type f -iname '*.caf' -print | while read -r name; do cp "$name" "/.../..."; done

find . -type f -iname '*.caf' -print | while read -r name; do afconvert -f AIFF -d BEI24@48000 "$name" "/Volumes/Daten/Logic/test/${name%.*}.aif"; done

Key	linear PCM format
LE	Little Endian
BE	Big Endian
F	Floating point
I	Integer
UI	Unsigned integer
8/16/24/32/64	Number of bits

Number of bits	Information Size
8	256
16	65536
24	16777216
32	4294967296
64	18446744073709551616

afconvert -hf

1	afconvert -hf

Audio file and data formats:	data_formats:
‘3gpp’ = 3GP Audio (.3gp)	‘Qclp’ ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘samr’
‘3gp2’ = 3GPP-2 Audio (.3g2)	Qclp’ ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘samr’
‘adts’ = AAC ADTS (.aac, .adts)	‘aac ‘ ‘aach’ ‘aacp’
‘ac-3’ = AC3 (.ac3)	‘ac-3’
‘AIFC’ = AIFC (.aifc, .aiff, .aif)	I8 BEI16 BEI24 BEI32 BEF32 BEF64 UI8 ‘ulaw’ ‘alaw’ ‘MAC3’ ‘MAC6’ ‘ima4’ ‘QDMC’ ‘QDM2’ ‘Qclp’ ‘agsm’
‘AIFF’ = AIFF (.aiff, .aif)	I8 BEI16 BEI24 BEI32
‘amrf’ = AMR (.amr)	‘samr’
‘m4af’ = Apple MPEG-4 Audio (.m4a, .m4r)	‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘alac’
‘caff’ = CAF (.caf)	‘.mp1’ ‘.mp2’ ‘.mp3’ ‘QDM2’ ‘QDMC’ ‘Qclp’ ‘Qclq’ ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘alac’ ‘alaw’ ‘dvi8’ ‘ilbc’ ‘ima4’ I8 BEI16 BEI24 BEI32 BEF32 BEF64 LEI16 LEI24 LEI32 LEF32 LEF64 ‘ms\x00\x02’ ‘ms\x00\x11’ ‘ms\x001’ ‘paac’ ‘samr’ ‘ulaw’
‘MPG1’ = MPEG Layer 1 (.mp1, .mpeg, .mpa)	‘.mp1’
‘MPG2’ = MPEG Layer 2 (.mp2, .mpeg, .mpa)	‘.mp2’
‘MPG3’ = MPEG Layer 3 (.mp3, .mpeg, .mpa)	‘.mp3’
‘mp4f’ = MPEG-4 Audio (.mp4)	data_formats: ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’
‘NeXT’ = NeXT/Sun (.snd, .au)	I8 BEI16 BEI24 BEI32 BEF32 BEF64 ‘ulaw’
‘Sd2f’ = Sound Designer II (.sd2)	I8 BEI16 BEI24 BEI32
‘WAVE’ = WAVE (.wav)	UI8 LEI16 LEI24 LEI32 LEF32 LEF64 ‘ulaw’ ‘alaw’

Supported Audio File and Data Formats in OS X

ARCH/GARCH modelling

10/31/2015Code, Data Analysis, Econometrics, Economics, Programming, RMartin Stoppacher

library(quantmod)
library(rugarch)
library("PerformanceAnalytics")
 
getSymbols("SPY", from="1900-01-01")
getSymbols("^GDAXI", from="1900-01-01")
spyRets = na.omit( ROC( Cl( GDAXI) ) )

getSymbols("^GSPC", from="1900-01-01")
spyRets = na.omit( ROC( Cl( GSPC) ) )

getSymbols("IBM", from="1900-01-01")
spyRets = na.omit( ROC( Cl( IBM) ) )

plot(cumsum(spyRets["2008"]))
plot(cumsum(spyRets["2014"]))

spyRets = na.trim( ROC( Cl( SPY ) ) )
#spyRets = na.trim( diff( Cl( SPY ) ) )

# Train over 2000-2004, forecast 2005
ss = spyRets["2006/2008"]
outOfSample = NROW(ss["2008"])

ss = spyRets["2011/2014"]
outOfSample = NROW(ss["2014"])

spec = ugarchspec(
            variance.model=list(garchOrder=c(1,1)),
            mean.model=list(armaOrder=c(1,1), include.mean=T),
            distribution.model="sged")

fit = ugarchfit(spec=spec, data=ss, out.sample=outOfSample)
fore = ugarchforecast(fit, n.ahead=1, n.roll=outOfSample)

# Build some sort of indicator base on the forecasts
ind = xts(head(as.array(fore)[,2,],-1), order.by=index(ss["2008"]))
ind = xts(head(as.array(fore)[,2,],-1), order.by=index(ss["2014"]))

ind = ifelse(ind < 0, 1, -1)
 
# Compute the performance
mm = merge( ss["2008"], ind, all=F )
mod <- mm[,1]*mm[,2]
charts.PerformanceSummary(merge(spyRets["2008"],mod))

mm = merge( ss["2014"], ind, all=F )
mod <- mm[,1]*mm[,2]
charts.PerformanceSummary(merge(spyRets["2014"],mod))

plot(cumsum(mod))

cumprod(1+1*2)

tail(cumprod(mm[,1]*mm[,2]+1))
plot(cumprod(mm[,1]*mm[,2]+1))
# Output (last line): 2005-12-30  1.129232


library(quantmod)
library(fArma)

# Get S&P 500
getSymbols( "^GSPC", from="2000-01-01" )

# Compute the daily returns
GSPC.rets = diff(log(Cl(GSPC)))

# Use only the last two years of returns
GSPC.tail = as.ts( tail( GSPC.rets, 500 ) )

# Fit the model
GSPC.arma = armaFit( formula=~arma(2,2), data=GSPC.tail )


xxArma = armaFit( xx ~ arma( 5, 1 ), data=xx )
xxArma@fit$aic

findBestArma = function( xx, minOrder=c(0,0), maxOrder=c(5,5), trace=FALSE )
{
   bestAic = 1e9 
   len = NROW( xx )
   for( p in minOrder[1]:maxOrder[1] ) for( q in minOrder[2]:maxOrder[2] )
   {   
      if( p == 0 && q == 0 ) 
      {   
         next
      }   
 
      formula = as.formula( paste( sep="", "xx ~ arma(", p, ",", q, ")" ) ) 
 
      fit = tryCatch( armaFit( formula, data=xx ),
                      error=function( err ) FALSE,
                      warning=function( warn ) FALSE )
      if( !is.logical( fit ) ) 
      {   
         fitAic = fit@fit$aic
         if( fitAic < bestAic )
         {   
            bestAic = fitAic
            bestFit = fit 
            bestModel = c( p, q ) 
         }   
 
         if( trace )
         {   
            ss = paste( sep="", "(", p, ",", q, "): AIC = ", fitAic )
            print( ss )
         }   
      }   
      else
      {   
         if( trace )
         {   
            ss = paste( sep="", "(", p, ",", q, "): None" )
            print( ss )
         }   
      }   
   }   
 
   if( bestAic < 1e9 )
   {   
      return( list( aic=bestAic, fit=bestFit, model=bestModel ) ) 
   }   
 
   return( FALSE )
}

library(quantmod)
library(fArma)
 
getSymbols("SPY", from="1900-01-01")
SPY.rets = diff(log(Ad(SPY)))
SPY.arma = armaFit(~arma(0, 2), data=as.ts(tail(SPY.rets,500)))
predict(SPY.arma, n.ahead=1, doplot=F)

# Now, to build an indicator for back testing, one can walk the 
# daily return series and at each point perform the steps we covered so 
# far. The main loop looks like (in pseudocode):

for(ii in history:length(dailyRetSeries))
{
   tt = as.ts(tail(head(dailyRetSeries, ii), history))
   ttArma = findBestArma()
   predict(ttArma, n.ahead=1, doplot=F)
}

library(quantmod)
library(fGarch)
 
getSymbols("SPY", from="1900-01-01")
SPY.rets = diff(log(Ad(SPY)))
SPY.garch = garchFit(~arma(0, 2) + garch(1, 1), data=as.ts(tail(SPY.rets, 500)))
predict(SPY.garch, n.ahead=1, doplot=F)

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

library(quantmod)

library(rugarch)

library("PerformanceAnalytics")

getSymbols("SPY", from="1900-01-01")

getSymbols("^GDAXI", from="1900-01-01")

spyRets = na.omit( ROC( Cl( GDAXI) ) )

getSymbols("^GSPC", from="1900-01-01")

spyRets = na.omit( ROC( Cl( GSPC) ) )

getSymbols("IBM", from="1900-01-01")

spyRets = na.omit( ROC( Cl( IBM) ) )

plot(cumsum(spyRets["2008"]))

plot(cumsum(spyRets["2014"]))

spyRets = na.trim( ROC( Cl( SPY ) ) )

#spyRets = na.trim( diff( Cl( SPY ) ) )

# Train over 2000-2004, forecast 2005

ss = spyRets["2006/2008"]

outOfSample = NROW(ss["2008"])

ss = spyRets["2011/2014"]

outOfSample = NROW(ss["2014"])

spec = ugarchspec(

variance.model=list(garchOrder=c(1,1)),

mean.model=list(armaOrder=c(1,1), include.mean=T),

distribution.model="sged")

fit = ugarchfit(spec=spec, data=ss, out.sample=outOfSample)

fore = ugarchforecast(fit, n.ahead=1, n.roll=outOfSample)

# Build some sort of indicator base on the forecasts

ind = xts(head(as.array(fore)[,2,],-1), order.by=index(ss["2008"]))

ind = xts(head(as.array(fore)[,2,],-1), order.by=index(ss["2014"]))

ind = ifelse(ind < 0, 1, -1)

# Compute the performance

mm = merge( ss["2008"], ind, all=F )

mod <- mm[,1]*mm[,2]

charts.PerformanceSummary(merge(spyRets["2008"],mod))

mm = merge( ss["2014"], ind, all=F )

mod <- mm[,1]*mm[,2]

charts.PerformanceSummary(merge(spyRets["2014"],mod))

plot(cumsum(mod))

cumprod(1+1*2)

tail(cumprod(mm[,1]*mm[,2]+1))

plot(cumprod(mm[,1]*mm[,2]+1))

# Output (last line): 2005-12-30 1.129232

library(quantmod)

library(fArma)

# Get S&P 500

getSymbols( "^GSPC", from="2000-01-01" )

# Compute the daily returns

GSPC.rets = diff(log(Cl(GSPC)))

# Use only the last two years of returns

GSPC.tail = as.ts( tail( GSPC.rets, 500 ) )

# Fit the model

GSPC.arma = armaFit( formula=~arma(2,2), data=GSPC.tail )

xxArma = armaFit( xx ~ arma( 5, 1 ), data=xx )

xxArma@fit$aic

findBestArma = function( xx, minOrder=c(0,0), maxOrder=c(5,5), trace=FALSE )

{

bestAic = 1e9

len = NROW( xx )

for( p in minOrder[1]:maxOrder[1] ) for( q in minOrder[2]:maxOrder[2] )

{

if( p == 0 && q == 0 )

{

}

formula = as.formula( paste( sep="", "xx ~ arma(", p, ",", q, ")" ) )

fit = tryCatch( armaFit( formula, data=xx ),

error=function( err ) FALSE,

warning=function( warn ) FALSE )

if( !is.logical( fit ) )

{

fitAic = fit@fit$aic

if( fitAic < bestAic )

{

bestAic = fitAic

bestFit = fit

bestModel = c( p, q )

}

if( trace )

{

ss = paste( sep="", "(", p, ",", q, "): AIC = ", fitAic )

print( ss )

}

else

{

if( trace )

{

ss = paste( sep="", "(", p, ",", q, "): None" )

print( ss )

}

if( bestAic < 1e9 )

{

return( list( aic=bestAic, fit=bestFit, model=bestModel ) )

}

return( FALSE )

}

library(quantmod)

library(fArma)

getSymbols("SPY", from="1900-01-01")

SPY.rets = diff(log(Ad(SPY)))

SPY.arma = armaFit(~arma(0, 2), data=as.ts(tail(SPY.rets,500)))

predict(SPY.arma, n.ahead=1, doplot=F)

# Now, to build an indicator for back testing, one can walk the

# daily return series and at each point perform the steps we covered so

# far. The main loop looks like (in pseudocode):

for(ii in history:length(dailyRetSeries))

{

tt = as.ts(tail(head(dailyRetSeries, ii), history))

ttArma = findBestArma()

predict(ttArma, n.ahead=1, doplot=F)

}

library(quantmod)

library(fGarch)

getSymbols("SPY", from="1900-01-01")

SPY.rets = diff(log(Ad(SPY)))

SPY.garch = garchFit(~arma(0, 2) + garch(1, 1), data=as.ts(tail(SPY.rets, 500)))

predict(SPY.garch, n.ahead=1, doplot=F)

Creating sounds out of financial data.

04/09/2015Allgemein, Code, Data Analysis, Engineering, Programming, R, Soundengineering, StatisticsMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#

install.packages("seewave")
require("seewave")
install.packages("tuneR")
require("tuneR")

#rm(list = ls(all = TRUE)) # clear current workspace #
setwd("/Users/martinstoppacher/R Analysis/3_Index Sounds/")

library("quantmod")

getSymbols("^GSPC",from=1900)

head(GSPC)

tail(GSPC)

jpeg(filename = "SP500.jpg1975-2015.jpg", width=880,height=880,res=100)
plot(Cl(GSPC),main="S&amp;P 500 Index (closing prices)")
dev.off()

summary(Cl(GSPC))

jpeg(filename = "SP500.jpg1975-1985.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1975/1985"],main="S&amp;P 500 Index 1975-1985 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1985-1995.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1985/1995"],main="S&amp;P 500 Index 1985-1995 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1995-2005.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1995/2005"],main="S&amp;P 500 Index 1995-2005 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg2005-2015.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["2005/2015"],main="S&amp;P 500 Index 2005-2015 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades-new.jpg", width=880,height=880,res=100)
plot(as.numeric(Cl(GSPC)["1975/1984"]),main="S&amp;P 500 Index 1975-1985 - 4 decades (closing prices)",ylim=c(0,2100),type="l",ylab="index values",xlab="days (10 year)")
lines(as.numeric(Cl(GSPC)["1985/1994"]),col="red")
lines(as.numeric(Cl(GSPC)["1995/2004"]),col="blue")
lines(as.numeric(Cl(GSPC)["2005/2015"]),col="green")
legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))
dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades percent-new2.jpg", width=880,height=880,res=100)
plot((as.numeric(Cl(GSPC)["1975/1984"])/as.numeric(Cl(GSPC)["1975/1984"][1])-1),main="S&amp;P 500 Index 1975-1985 - 4 decades (percent changes)",ylim=c(-0.4,2.3),type="l",ylab="index values",xlab="days (10 year)")
lines((as.numeric(Cl(GSPC)["1985/1994"])/as.numeric(Cl(GSPC)["1985/1994"][1])-1),col="red")
lines((as.numeric(Cl(GSPC)["1995/2004"])/as.numeric(Cl(GSPC)["1995/2004"][1])-1),col="blue")
lines((as.numeric(Cl(GSPC)["2005/2014"])/as.numeric(Cl(GSPC)["2005/2014"][1])-1),col="green")
legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))
dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4otherdecades percent2-new.jpg", width=880,height=880,res=100)
plot(as.numeric(Cl(GSPC)["1980/1989"])/as.numeric(Cl(GSPC)["1980/1989"][1]),main="S&amp;P 500 Index 1975-2015 - new truncation - (percent changes)",ylim=c(0.6,4.2),type="l",ylab="index values",xlab="days (10 year)",col="yellow")
lines(as.numeric(Cl(GSPC)["1975/1979"])/as.numeric(Cl(GSPC)["1975/1979"][1]),col="red")
lines(as.numeric(Cl(GSPC)["1990/1999"])/as.numeric(Cl(GSPC)["1990/1999"][1]),col="black")
lines(as.numeric(Cl(GSPC)["2000/2009"])/as.numeric(Cl(GSPC)["2000/2009"][1]),col="blue")
lines(as.numeric(Cl(GSPC)["2010/2014"])/as.numeric(Cl(GSPC)["2010/2014"][1]),col="green")
legend("topleft", legend = c("1975-1979","1980-1989","1990-1999","2000-2009","2010-2014") , lty = 1, col = c("red","yellow","black","blue","green"))
dev.off()

library("PerformanceAnalytics")

Cl(GSPC)["2010/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1])
charts.PerformanceSummary(,main="",xlab="")

# percent

jpeg(filename = "SP500.jpg1975-1985-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1975/1985"]/as.numeric(Cl(GSPC)["1975/1985"][1]),main="S&amp;P 500 Index 1975-1985 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1985-1995-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1985/1995"]/as.numeric(Cl(GSPC)["1985/1995"][1]),main="S&amp;P 500 Index 1985-1995 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1995-2005-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1995/2005"]/as.numeric(Cl(GSPC)["1995/2005"][1]),main="S&amp;P 500 Index 1995-2005 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg2005-2015-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["2005/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1]),main="S&amp;P 500 Index 2005-2015 (closing prices)")
dev.off()

GSPC.cl.close &lt;- diff(Cl(GSPC))
tail(GSPC.cl.close,20)
jpeg(filename = "SP500-first-difference-example.jpg", width=880,height=880,res=100)
plot(tail(GSPC.cl.close,200),main="S&amp;P 500 first difference")
dev.off()

##

GSPC.cl.close.roc &lt;- ROC(Cl(GSPC))
tail(GSPC.cl.close.roc,5)

#prices &lt;- Cl(GSPC) # ROC is log diff!
#log_returns &lt;- diff(log(prices), lag=1)
#tail(log_returns)

jpeg(filename = "SP500-first-roc-example.jpg", width=880,height=880,res=100)
plot(tail(GSPC.cl.close.roc,200),main="S&amp;P 500 first difference")
dev.off()

##

dax.roc &lt;- na.omit(ROC(Cl(GSPC)))*100
plot(head(dax.roc,20))
plot(dax.roc)

# standard

if(abs(max(dax.roc))&gt;abs(min(dax.roc))){
dax.roc.standard &lt;- as.numeric(dax.roc/max(dax.roc))
}else{
dax.roc.standard &lt;- as.numeric(dax.roc/abs(min(dax.roc)))
}

plot(dax.roc.standard,type="l")

w&lt;-dax.roc.standard
f=41000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

f=10000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

f=5000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

dax.roc &lt;- as.numeric(dax.roc)
dax.roc2 &lt;- NULL
for(i in 1:length(dax.roc)){
dax.roc2 &lt;- rbind(dax.roc2,((dax.roc[i]+dax.roc[i+1])/2))
}
lines &lt;- NULL
for(i in 1:length(dax.roc)){
line &lt;- rbind(dax.roc[i],dax.roc2[i])
lines &lt;- rbind(lines,line)
}
dax.roc &lt;- na.omit(lines)
tail(dax.roc)

dax.roc &lt;- na.omit(ROC(SMA(Cl(GSPC),n=500)))*100
dax.roc &lt;- na.omit(ROC(Cl(GDAXI)))*100

dax.roc.standard &lt;- as.numeric(dax.roc/max(dax.roc))
dax.roc.standard &lt;- as.numeric(dax.roc/min(dax.roc))

hist(dax.roc.standard)

w&lt;-na.omit(SMA(dax.roc.standard,n=100))
w&lt;-dax.roc.standard

for(i in 1:5){
w&lt;-c(w,w)
}

f=32000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

# Martin Stoppacher #
# office@martinstoppacher.com #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

install.packages("seewave")

require("seewave")

install.packages("tuneR")

require("tuneR")

#rm(list = ls(all = TRUE)) # clear current workspace #

setwd("/Users/martinstoppacher/R Analysis/3_Index Sounds/")

library("quantmod")

getSymbols("^GSPC",from=1900)

head(GSPC)

tail(GSPC)

jpeg(filename = "SP500.jpg1975-2015.jpg", width=880,height=880,res=100)

plot(Cl(GSPC),main="S&P 500 Index (closing prices)")

dev.off()

summary(Cl(GSPC))

jpeg(filename = "SP500.jpg1975-1985.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1975/1985"],main="S&P 500 Index 1975-1985 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1985-1995.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1985/1995"],main="S&P 500 Index 1985-1995 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1995-2005.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1995/2005"],main="S&P 500 Index 1995-2005 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg2005-2015.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["2005/2015"],main="S&P 500 Index 2005-2015 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades-new.jpg", width=880,height=880,res=100)

plot(as.numeric(Cl(GSPC)["1975/1984"]),main="S&P 500 Index 1975-1985 - 4 decades (closing prices)",ylim=c(0,2100),type="l",ylab="index values",xlab="days (10 year)")

lines(as.numeric(Cl(GSPC)["1985/1994"]),col="red")

lines(as.numeric(Cl(GSPC)["1995/2004"]),col="blue")

lines(as.numeric(Cl(GSPC)["2005/2015"]),col="green")

legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))

dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades percent-new2.jpg", width=880,height=880,res=100)

plot((as.numeric(Cl(GSPC)["1975/1984"])/as.numeric(Cl(GSPC)["1975/1984"][1])-1),main="S&P 500 Index 1975-1985 - 4 decades (percent changes)",ylim=c(-0.4,2.3),type="l",ylab="index values",xlab="days (10 year)")

lines((as.numeric(Cl(GSPC)["1985/1994"])/as.numeric(Cl(GSPC)["1985/1994"][1])-1),col="red")

lines((as.numeric(Cl(GSPC)["1995/2004"])/as.numeric(Cl(GSPC)["1995/2004"][1])-1),col="blue")

lines((as.numeric(Cl(GSPC)["2005/2014"])/as.numeric(Cl(GSPC)["2005/2014"][1])-1),col="green")

legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))

dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4otherdecades percent2-new.jpg", width=880,height=880,res=100)

plot(as.numeric(Cl(GSPC)["1980/1989"])/as.numeric(Cl(GSPC)["1980/1989"][1]),main="S&P 500 Index 1975-2015 - new truncation - (percent changes)",ylim=c(0.6,4.2),type="l",ylab="index values",xlab="days (10 year)",col="yellow")

lines(as.numeric(Cl(GSPC)["1975/1979"])/as.numeric(Cl(GSPC)["1975/1979"][1]),col="red")

lines(as.numeric(Cl(GSPC)["1990/1999"])/as.numeric(Cl(GSPC)["1990/1999"][1]),col="black")

lines(as.numeric(Cl(GSPC)["2000/2009"])/as.numeric(Cl(GSPC)["2000/2009"][1]),col="blue")

lines(as.numeric(Cl(GSPC)["2010/2014"])/as.numeric(Cl(GSPC)["2010/2014"][1]),col="green")

legend("topleft", legend = c("1975-1979","1980-1989","1990-1999","2000-2009","2010-2014") , lty = 1, col = c("red","yellow","black","blue","green"))

dev.off()

library("PerformanceAnalytics")

Cl(GSPC)["2010/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1])

charts.PerformanceSummary(,main="",xlab="")

# percent

jpeg(filename = "SP500.jpg1975-1985-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1975/1985"]/as.numeric(Cl(GSPC)["1975/1985"][1]),main="S&P 500 Index 1975-1985 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1985-1995-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1985/1995"]/as.numeric(Cl(GSPC)["1985/1995"][1]),main="S&P 500 Index 1985-1995 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1995-2005-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1995/2005"]/as.numeric(Cl(GSPC)["1995/2005"][1]),main="S&P 500 Index 1995-2005 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg2005-2015-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["2005/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1]),main="S&P 500 Index 2005-2015 (closing prices)")

dev.off()

GSPC.cl.close <- diff(Cl(GSPC))

tail(GSPC.cl.close,20)

jpeg(filename = "SP500-first-difference-example.jpg", width=880,height=880,res=100)

plot(tail(GSPC.cl.close,200),main="S&P 500 first difference")

dev.off()

GSPC.cl.close.roc <- ROC(Cl(GSPC))

tail(GSPC.cl.close.roc,5)

#prices <- Cl(GSPC) # ROC is log diff!

#log_returns <- diff(log(prices), lag=1)

#tail(log_returns)

jpeg(filename = "SP500-first-roc-example.jpg", width=880,height=880,res=100)

plot(tail(GSPC.cl.close.roc,200),main="S&P 500 first difference")

dev.off()

dax.roc <- na.omit(ROC(Cl(GSPC)))*100

plot(head(dax.roc,20))

plot(dax.roc)

# standard

if(abs(max(dax.roc))>abs(min(dax.roc))){

dax.roc.standard <- as.numeric(dax.roc/max(dax.roc))

}else{

dax.roc.standard <- as.numeric(dax.roc/abs(min(dax.roc)))

}

plot(dax.roc.standard,type="l")

w<-dax.roc.standard

f=41000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

f=10000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

f=5000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

dax.roc <- as.numeric(dax.roc)

dax.roc2 <- NULL

for(i in 1:length(dax.roc)){

dax.roc2 <- rbind(dax.roc2,((dax.roc[i]+dax.roc[i+1])/2))

}

lines <- NULL

for(i in 1:length(dax.roc)){

line <- rbind(dax.roc[i],dax.roc2[i])

lines <- rbind(lines,line)

}

dax.roc <- na.omit(lines)

tail(dax.roc)

dax.roc <- na.omit(ROC(SMA(Cl(GSPC),n=500)))*100

dax.roc <- na.omit(ROC(Cl(GDAXI)))*100

dax.roc.standard <- as.numeric(dax.roc/max(dax.roc))

dax.roc.standard <- as.numeric(dax.roc/min(dax.roc))

hist(dax.roc.standard)

w<-na.omit(SMA(dax.roc.standard,n=100))

w<-dax.roc.standard

for(i in 1:5){

w<-c(w,w)

}

f=32000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

# Martin Stoppacher #

# office@martinstoppacher.com #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

#################################################################################

ooRexx with BSF4ooRexx – “java,net.URL” Classes (2_getinfo.rxj)

08/05/2014Code, Content Syndication, ooRexx, ProgrammingMartin Stoppacher

/* ***************************************************************************** */
/* This is just a short example of implementing the  "java,net.URL" classes      */
/* using BSF4ooRexx                                                              */   
/* created by Martin Stoppacher    26.12.2009                                    */
/* ***************************************************************************** */

say hello this reads a syndfeed
say please type in the url
url= "http://rss.orf.at/fm4.xml"

f=.bsf~new("java.net.URL", url) /* creating a java url object with the above url */

say f~getAuthority()                      /* gets the authority part of this URL */
say f~getDefaultPort()           /* gets the default port number of the protocol */
                                                     /* associated with this URL */
say f~getPort()                              /* gets the port number of this URL */
say f~getFile()                                /* gets the file name of this URL */
say f~getHost()                 /* gets the host name of this URL, if applicable */
say f~getProtocol()                        /* gets the protocol name of this URL */
say f~getQuery()                              /* gets the query part of this URL */
say f~hashCode()          /* creates an integer suitable for hash table indexing */

::requires BSF.cls                            /* get the Java support for ooRexx */

/* ***************************************************************************** */

/* This is just a short example of implementing the "java,net.URL" classes */

/* using BSF4ooRexx */

/* created by Martin Stoppacher 26.12.2009 */

/* ***************************************************************************** */

say hello this reads a syndfeed

say please type in the url

url= "http://rss.orf.at/fm4.xml"

f=.bsf~new("java.net.URL", url) /* creating a java url object with the above url */

say f~getAuthority() /* gets the authority part of this URL */

say f~getDefaultPort() /* gets the default port number of the protocol */

/* associated with this URL */

say f~getPort() /* gets the port number of this URL */

say f~getFile() /* gets the file name of this URL */

say f~getHost() /* gets the host name of this URL, if applicable */

say f~getProtocol() /* gets the protocol name of this URL */

say f~getQuery() /* gets the query part of this URL */

say f~hashCode() /* creates an integer suitable for hash table indexing */

::requires BSF.cls /* get the Java support for ooRexx */

Syndication Feed Reader (1_Read.rxj) – ooRexx with BSF4ooRexx

08/05/2014Code, Content Syndication, ooRexx, ProgrammingMartin Stoppacher

/* ***************************************************************************** */
/* Example of a syndication feed reader using the Project Rome API with          */
/* BSF4ooRexx                                                                    */
/* current version (2009) of Rome: rome1.0.jar   https://rome.dev.java.net/      */
/* You need to implement this API plus the JDOM API                              */
/* jdom.jar  ,   you can find this at     https://jdom.org/                      */
/*                                                                               */
/* This class retrieves a syndfeed from the web by using a precreated            */
/* Java class via BSF4ooRexx "com.sun.syndication.io.SyndFeedInput" methods      */
/* created by Martin Stoppacher       date:  26.12.2009                          */ 
/* ***************************************************************************** */

javaclass = "FeedReader1"                         /* determine Java class to use */
get=.bsf~new(javaClass)                     /* create an instance of "javaClass" */
say get~getfeed             /* calls the getfeed method in the FeedReader1 class */

::requires BSF.CLS                                       /* get the Java support */

/* ***************************************************************************** */

/* Example of a syndication feed reader using the Project Rome API with */

/* BSF4ooRexx */

/* current version (2009) of Rome: rome1.0.jar https://rome.dev.java.net/ */

/* You need to implement this API plus the JDOM API */

/* jdom.jar , you can find this at https://jdom.org/ */

/* */

/* This class retrieves a syndfeed from the web by using a precreated */

/* Java class via BSF4ooRexx "com.sun.syndication.io.SyndFeedInput" methods */

/* created by Martin Stoppacher date: 26.12.2009 */

/* ***************************************************************************** */

javaclass = "FeedReader1" /* determine Java class to use */

get=.bsf~new(javaClass) /* create an instance of "javaClass" */

say get~getfeed /* calls the getfeed method in the FeedReader1 class */

::requires BSF.CLS /* get the Java support */