Category Archives: Programming

Renewable electricity output – Statistics

12/07/2019Data Analysis, Programming, RMartin Stoppacher

# Renewable electricity output (% of total electricity output) - EG.ELC.RNEW.ZS

1	# Renewable electricity output (% of total electricity output) - EG.ELC.RNEW.ZS

library("XML")
library("WDI")
library("gridExtra")
library("scatterplot3d")

options(scipen=100) # turning of scientific notation

titel<-"Renewable electricity output"
subtitel<-"(% of total electricity output)"

renewable.electricity.output <- WDI(country="all", indicator="EG.ELC.RNEW.ZS", start=1900, end=2019)
str(renewable.electricity.output)

# 'data.frame':	15840 obs. of  4 variables:
#   $ iso2c         : chr  "1A" "1A" "1A" "1A" ...
# $ country       : chr  "Arab World" "Arab World" "Arab World" "Arab World" ...
# $ EG.ELC.RNEW.ZS: num  NA NA NA NA 2.92 ...
# $ year          : num  2019 2018 2017 2016 2015 ...

head(renewable.electricity.output)

# iso2c    country EG.ELC.RNEW.ZS year
# 1    1A Arab World             NA 2019
# 2    1A Arab World             NA 2018
# 3    1A Arab World             NA 2017
# 4    1A Arab World             NA 2016
# 5    1A Arab World       2.920702 2015
# 6    1A Arab World       3.278073 2014

library("XML")

library("WDI")

library("gridExtra")

library("scatterplot3d")

options(scipen=100) # turning of scientific notation

titel<-"Renewable electricity output"

subtitel<-"(% of total electricity output)"

renewable.electricity.output <- WDI(country="all", indicator="EG.ELC.RNEW.ZS", start=1900, end=2019)

str(renewable.electricity.output)

# 'data.frame': 15840 obs. of 4 variables:

# $ iso2c : chr "1A" "1A" "1A" "1A" ...

# $ country : chr "Arab World" "Arab World" "Arab World" "Arab World" ...

# $ EG.ELC.RNEW.ZS: num NA NA NA NA 2.92 ...

# $ year : num 2019 2018 2017 2016 2015 ...

head(renewable.electricity.output)

# iso2c country EG.ELC.RNEW.ZS year

# 1 1A Arab World NA 2019

# 2 1A Arab World NA 2018

# 3 1A Arab World NA 2017

# 4 1A Arab World NA 2016

# 5 1A Arab World 2.920702 2015

# 6 1A Arab World 3.278073 2014

# selection of the year 2015

year <- 2015
target.year <- which(renewable.electricity.output$year==year)
head(renewable.electricity.output[target.year,])

# iso2c                                     country EG.ELC.RNEW.ZS year
# 5      1A                                  Arab World       2.920702 2015
# 65     S3                      Caribbean small states       8.914780 2015
# 125    B8              Central Europe and the Baltics      20.252851 2015
# 185    V2                  Early-demographic dividend      18.104862 2015
# 245    Z4                         East Asia & Pacific      20.412466 2015
# 305    4E East Asia & Pacific (excluding high income)      23.394930 2015

# selection of the year 2015

year <- 2015

target.year <- which(renewable.electricity.output$year==year)

head(renewable.electricity.output[target.year,])

# iso2c country EG.ELC.RNEW.ZS year

# 5 1A Arab World 2.920702 2015

# 65 S3 Caribbean small states 8.914780 2015

# 125 B8 Central Europe and the Baltics 20.252851 2015

# 185 V2 Early-demographic dividend 18.104862 2015

# 245 Z4 East Asia & Pacific 20.412466 2015

# 305 4E East Asia & Pacific (excluding high income) 23.394930 2015

# income categories

– Low income
– Lower middle income
– Upper middle income
– High income

target.income <- which(renewable.electricity.output$country=="Low income")
renewable.electricity.output[target.income,]
plot(renewable.electricity.output[target.income,4:3],type="l",
     main=titel,sub=subtitel,ylab="%",xlab="",xlim=c(1980,2020),ylim=c(10,90),
     axes=TRUE)
text(1985, 65, "Low income",cex=0.7)

target.income <- which(renewable.electricity.output$country=="Low income")

renewable.electricity.output[target.income,]

plot(renewable.electricity.output[target.income,4:3],type="l",

main=titel,sub=subtitel,ylab="%",xlab="",xlim=c(1980,2020),ylim=c(10,90),

axes=TRUE)

text(1985, 65, "Low income",cex=0.7)

target.income <- which(renewable.electricity.output$country=="Lower middle income")
renewable.electricity.output[target.income,]
lines(renewable.electricity.output[target.income,4:3],type="l",main=titel,ylab="%",xlab="",col="red")
text(1985, 20, "Lower middle income", col="red",cex=0.7)

target.income <- which(renewable.electricity.output$country=="Upper middle income")
renewable.electricity.output[target.income,]
lines(renewable.electricity.output[target.income,4:3],type="l",main=titel,ylab="%",xlab="",col="green")
text(1985, 25, "Upper middle income", col="green",cex=0.7)

target.income <- which(renewable.electricity.output$country=="High income")
renewable.electricity.output[target.income,]
lines(renewable.electricity.output[target.income,4:3],type="l",main=titel,ylab="%",xlab="",col="blue")
text(1985, 15, "High income", col="blue",cex=0.7)

target.income <- which(renewable.electricity.output$country=="Lower middle income")

renewable.electricity.output[target.income,]

lines(renewable.electricity.output[target.income,4:3],type="l",main=titel,ylab="%",xlab="",col="red")

text(1985, 20, "Lower middle income", col="red",cex=0.7)

target.income <- which(renewable.electricity.output$country=="Upper middle income")

renewable.electricity.output[target.income,]

lines(renewable.electricity.output[target.income,4:3],type="l",main=titel,ylab="%",xlab="",col="green")

text(1985, 25, "Upper middle income", col="green",cex=0.7)

target.income <- which(renewable.electricity.output$country=="High income")

renewable.electricity.output[target.income,]

lines(renewable.electricity.output[target.income,4:3],type="l",main=titel,ylab="%",xlab="",col="blue")

text(1985, 15, "High income", col="blue",cex=0.7)

# -------------

# region

# World
# Euro area
# European Union
# Europe & Central Asia
# Arab World
# North America
# Latin America & Caribbean
# Middle East & North Africa
# Sub-Saharan Africa 
# East Asia & Pacific
# South Asia

target.area <- which(renewable.electricity.output$country=="World")
renewable.electricity.output[target.area,]
plot(renewable.electricity.output[target.area,4:3],type="l",
     main=titel,sub=subtitel,ylab="%",xlab="",xlim=c(1980,2020),ylim=c(1,90),
     axes=TRUE)
text(1985, 20, "World",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Euro area")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="red")
text(1985, 12, "Euro area", col="red",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Arab World")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="green")
text(1985, 5, "Arab World", col="green",cex=0.7)

target.area <- which(renewable.electricity.output$country=="North America")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="blue",lty=2)
text(1985, 17, "North America", col="blue",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Latin America & Caribbean")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="yellow")
text(1985, 65, "Latin America & Caribbean", col="yellow",cex=0.7)

# target.area <- which(renewable.electricity.output$country=="Middle East & North Africa")
# renewable.electricity.output[target.area,]
# lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="orange")
# text(1985, 5, "Middle East & North Africa", col="orange",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Sub-Saharan Africa")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="cyan",lty=3)
text(1985, 23, "Sub-Saharan Africa", col="cyan",cex=0.7)

target.area <- which(renewable.electricity.output$country=="East Asia & Pacific")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="magenta",lty=4)
text(1985, 14.5, "East Asia & Pacific", col="magenta",cex=0.7)

target.area <- which(renewable.electricity.output$country=="South Asia")
renewable.electricity.output[target.area,]
lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="pink",lty=5)
text(1985, 28, "South Asia", col="pink",cex=0.7)

# -------------

# region

# World

# Euro area

# European Union

# Europe & Central Asia

# Arab World

# North America

# Latin America & Caribbean

# Middle East & North Africa

# Sub-Saharan Africa

# East Asia & Pacific

# South Asia

target.area <- which(renewable.electricity.output$country=="World")

renewable.electricity.output[target.area,]

plot(renewable.electricity.output[target.area,4:3],type="l",

main=titel,sub=subtitel,ylab="%",xlab="",xlim=c(1980,2020),ylim=c(1,90),

axes=TRUE)

text(1985, 20, "World",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Euro area")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="red")

text(1985, 12, "Euro area", col="red",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Arab World")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="green")

text(1985, 5, "Arab World", col="green",cex=0.7)

target.area <- which(renewable.electricity.output$country=="North America")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="blue",lty=2)

text(1985, 17, "North America", col="blue",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Latin America & Caribbean")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="yellow")

text(1985, 65, "Latin America & Caribbean", col="yellow",cex=0.7)

# target.area <- which(renewable.electricity.output$country=="Middle East & North Africa")

# renewable.electricity.output[target.area,]

# lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="orange")

# text(1985, 5, "Middle East & North Africa", col="orange",cex=0.7)

target.area <- which(renewable.electricity.output$country=="Sub-Saharan Africa")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="cyan",lty=3)

text(1985, 23, "Sub-Saharan Africa", col="cyan",cex=0.7)

target.area <- which(renewable.electricity.output$country=="East Asia & Pacific")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="magenta",lty=4)

text(1985, 14.5, "East Asia & Pacific", col="magenta",cex=0.7)

target.area <- which(renewable.electricity.output$country=="South Asia")

renewable.electricity.output[target.area,]

lines(renewable.electricity.output[target.area,4:3],type="l",main=titel,ylab="%",xlab="",col="pink",lty=5)

text(1985, 28, "South Asia", col="pink",cex=0.7)

Using trigonometric functions in R

06/18/2018Code, Data Analysis, Programming, RMartin Stoppacher

R uses radiant as input for trigonometric functions.

# calculating rad
pi/2
90*pi/180 # transforming degree into radiant
degr<-c(0:360)
degr
rad<-grad*(pi/180)
rad

# calculating rad

pi/2

90*pi/180 # transforming degree into radiant

degr<-c(0:360)

degr

rad<-grad*(pi/180)

rad

Now we can plot the function.

sin(rad)
plot(sin(rad))

1 2	sin(rad) plot(sin(rad))

And by playing with the functions we get a funny graphic output.

plot(asin(sin(rad)))
plot(asin(sin(rad)),type="l")
points(sin(rad))
points(sin(rad)*-1,col="green")
lines(asin(sin(rad))*-1)
lines(1/sin(rad),col="red")
lines(1/sin(rad)*-1,col="blue")
points(x=91,y=0)
points(x=271,y=0)

plot(asin(sin(rad)))

plot(asin(sin(rad)),type="l")

points(sin(rad))

points(sin(rad)*-1,col="green")

lines(asin(sin(rad))*-1)

lines(1/sin(rad),col="red")

lines(1/sin(rad)*-1,col="blue")

points(x=91,y=0)

points(x=271,y=0)

And if we include the tangent, the graphic looks like this:

tan(rad)
tangens <- tan(rad)
tangens[91]<-0
tangens[271]<-0
plot(tangens,type="l")
lines(sin(rad),type="l",col="red")
lines(cos(rad),type="l",col="green")

tan(rad)

tangens <- tan(rad)

tangens[91]<-0

tangens[271]<-0

plot(tangens,type="l")

lines(sin(rad),type="l",col="red")

lines(cos(rad),type="l",col="green")

lines(tangens,type="l",col="red")
lines(1/tangens,type="l",col="green")
# dividing by 10 for visual reasons
lines(tangens/10,type="l",col="red")
lines(1/tangens/10,type="l",col="green")

lines(tangens,type="l",col="red")

lines(1/tangens,type="l",col="green")

# dividing by 10 for visual reasons

lines(tangens/10,type="l",col="red")

lines(1/tangens/10,type="l",col="green")

plot(tangens,type="l")
lines(1/tangens,type="l")
plot(tangens,type="l")
lines(1/tangens,type="l",col="green")

plot(tangens,type="l")

lines(1/tangens,type="l")

plot(tangens,type="l")

lines(1/tangens,type="l",col="green")

Audio file conversion with afconvert (mac)

10/02/2017Code, Engineering, Programming, SoundengineeringMartin Stoppacher

I was looking for a simple and elegant way to convert a high amount of audio files from one format (.caf) to another (.aif). The solution i found is a very elegant one and also comes included with your operating system – if using a MAC.

afconvert -f AIFF -d BEI24@48000 "Wow Bass 03.caf" "Wow Bass 03.aif"

1	afconvert -f AIFF -d BEI24@48000 "Wow Bass 03.caf" "Wow Bass 03.aif"

And now here is the most amazing part. It is super easy to execute the conversion of multiple files by just one command line.

for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done

1	for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done

or to run through subdirectories:

for d in */ ; do cd $d; pwd; cd ..; done
for d in */ ; do cd $d; for i in *.caf; do echo *caf; pwd; done; cd ..; done;
for d in */ ; do cd $d; for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done; cd ..; done

for d in */ ; do cd $d; pwd; cd ..; done

for d in */ ; do cd $d; for i in *.caf; do echo *caf; pwd; done; cd ..; done;

for d in */ ; do cd $d; for i in *.caf; do afconvert -f AIFF -d BEI24@48000 "$i" "${i%.caf}.aif"; done; cd ..; done

or with recursion by using find:

find -name "*.caf"
find . -type f -iname '*.caf' -print | while read -r name; do cp "$name" "/.../..."; done
find . -type f -iname '*.caf' -print | while read -r name; do afconvert -f AIFF -d BEI24@48000 "$name" "/Volumes/Daten/Logic/test/${name%.*}.aif"; done

find -name "*.caf"

find . -type f -iname '*.caf' -print | while read -r name; do cp "$name" "/.../..."; done

find . -type f -iname '*.caf' -print | while read -r name; do afconvert -f AIFF -d BEI24@48000 "$name" "/Volumes/Daten/Logic/test/${name%.*}.aif"; done

Key	linear PCM format
LE	Little Endian
BE	Big Endian
F	Floating point
I	Integer
UI	Unsigned integer
8/16/24/32/64	Number of bits

Number of bits	Information Size
8	256
16	65536
24	16777216
32	4294967296
64	18446744073709551616

afconvert -hf

1	afconvert -hf

Audio file and data formats:	data_formats:
‘3gpp’ = 3GP Audio (.3gp)	‘Qclp’ ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘samr’
‘3gp2’ = 3GPP-2 Audio (.3g2)	Qclp’ ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘samr’
‘adts’ = AAC ADTS (.aac, .adts)	‘aac ‘ ‘aach’ ‘aacp’
‘ac-3’ = AC3 (.ac3)	‘ac-3’
‘AIFC’ = AIFC (.aifc, .aiff, .aif)	I8 BEI16 BEI24 BEI32 BEF32 BEF64 UI8 ‘ulaw’ ‘alaw’ ‘MAC3’ ‘MAC6’ ‘ima4’ ‘QDMC’ ‘QDM2’ ‘Qclp’ ‘agsm’
‘AIFF’ = AIFF (.aiff, .aif)	I8 BEI16 BEI24 BEI32
‘amrf’ = AMR (.amr)	‘samr’
‘m4af’ = Apple MPEG-4 Audio (.m4a, .m4r)	‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘alac’
‘caff’ = CAF (.caf)	‘.mp1’ ‘.mp2’ ‘.mp3’ ‘QDM2’ ‘QDMC’ ‘Qclp’ ‘Qclq’ ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’ ‘alac’ ‘alaw’ ‘dvi8’ ‘ilbc’ ‘ima4’ I8 BEI16 BEI24 BEI32 BEF32 BEF64 LEI16 LEI24 LEI32 LEF32 LEF64 ‘ms\x00\x02’ ‘ms\x00\x11’ ‘ms\x001’ ‘paac’ ‘samr’ ‘ulaw’
‘MPG1’ = MPEG Layer 1 (.mp1, .mpeg, .mpa)	‘.mp1’
‘MPG2’ = MPEG Layer 2 (.mp2, .mpeg, .mpa)	‘.mp2’
‘MPG3’ = MPEG Layer 3 (.mp3, .mpeg, .mpa)	‘.mp3’
‘mp4f’ = MPEG-4 Audio (.mp4)	data_formats: ‘aac ‘ ‘aace’ ‘aach’ ‘aacl’ ‘aacp’
‘NeXT’ = NeXT/Sun (.snd, .au)	I8 BEI16 BEI24 BEI32 BEF32 BEF64 ‘ulaw’
‘Sd2f’ = Sound Designer II (.sd2)	I8 BEI16 BEI24 BEI32
‘WAVE’ = WAVE (.wav)	UI8 LEI16 LEI24 LEI32 LEF32 LEF64 ‘ulaw’ ‘alaw’

Supported Audio File and Data Formats in OS X

Plane crash data

06/04/2016Allgemein, Data Analysis, R, StatisticsMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# A Simple Skript for planecrash data from: 
# http://www.planecrashinfo.com/database.htm
# The aviation accident database includes: 
# All civil and commercial aviation accidents of scheduled and non-scheduled 
# passenger airliners worldwide, which resulted in a fatality 
# (including all U.S. Part 121 and Part 135 fatal accidents)
# All cargo, positioning, ferry and test flight fatal accidents.
# All military transport accidents with 10 or more fatalities.
# All commercial and military helicopter accidents with greater than 
# 10 fatalities.
# All civil and military airship accidents involving fatalities.
# Aviation accidents involving the death of famous people.
# Aviation accidents or incidents of noteworthy interest.

# Database Format
# Date:	 Date of accident,  in the format - January 01, 2001
# Time:	 Local time, in 24 hr. format unless otherwise specified
# Airline/Op:	 Airline or operator of the aircraft
# Flight #:	 Flight number assigned by the aircraft operator
# Route:	 Complete or partial route flown prior to the accident
# AC Type:	 Aircraft type
# Reg:	 ICAO registration of the aircraft
# cn / ln:	 Construction or serial number / Line or fuselage number
# Aboard:	 Total aboard (passengers / crew)
# Fatalities:	 Total fatalities aboard (passengers / crew)
# Ground:	 Total killed on the ground
# Summary:	 Brief description of the accident and cause if known


# to be done: histograms, t test?, types of chrashes, 

# country filter?, region filter, plane filter
# text analysis, 

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# packages

library("XML")
library("quantmod")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# 

rm(list = ls(all = TRUE))
getwd()
#system("ls")
setwd("~/ownCloud/STA_Statistics/PlaneCrashData")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# get the available years

url <- "http://www.planecrashinfo.com/database.htm"
tables <- readHTMLTable(url,header=FALSE)
years <- NULL
for(i in 1:length(tables[[2]][,1])){
  for(e in 2:length(tables[[2]][1,])){
    years<-c(years,levels(tables[[2]][i,e])[i])
  }
}
years
years<-na.omit(as.numeric(years))
years

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# get all data tables

table.crash.all <- list()

for(i in 1:length(years)){
  url2 <- paste("http://www.planecrashinfo.com/",
                years[i],"/",years[i],".htm",sep="")
  assign(paste("table.crash.",years[i],sep=""),readHTMLTable(url2))
  table.crash.all[[i]] <- get(paste("table.crash.",years[i],sep=""))
  print(paste("Year",years[i],"downloaded"))
  }
#table.crash.all
length(table.crash.all)
save(table.crash.all,file="planecrashdata_list.RData")
for(i in 1:length(years)){
  data<-table.crash.all[[i]]
  save(data,file=paste("planecrashdata_",years[i],".RData",sep=""))
}
for(i in 1:length(years)){
  data<-table.crash.all[[i]]
  write.csv(data,file=paste("planecrashdata_",years[i],".csv",sep=""))
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Plane Crashes per Year

years.crashes <- data.frame()
for(i in 1:length(years)){
  years.crashes[i,1] <- years[i]
  years.crashes[i,2] <- length(as.data.frame(table.crash.all[[i]])[,1])
}
years.crashes
rownames(years.crashes)<-years.crashes[,1]

par(mfrow=c(1,1))

plot(years.crashes,type="b",main="Plane Crashes per Year",
     xlab="Year",ylab="number of crashes")
points(years.crashes,col="red")

par(mfrow=c(1,1))

setwd("/Users/impac/ownCloud/STA_Statistics/PlaneCrashData/plots")

jpeg(file="Plane Crashes per Year-points.jpeg", width = 1080, 
     height = 1080, pointsize = 12, quality = 75)

plot(years.crashes,type="b",main="Plane Crashes per Year",
     xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")
dev.off()

jpeg(file="Plane Crashes per Year-bars.jpeg", width = 1080, 
     height = 1080, pointsize = 12, quality = 75)

plot(years.crashes,type="h",main="Plane Crashes per Year",
     xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")
dev.off()

jpeg(file="Plane Crashes per Year-steps.jpeg", width = 1080, 
     height = 1080, pointsize = 12, quality = 75)

plot(years.crashes,type="s",main="Plane Crashes per Year",
     xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")
dev.off()

par(mfrow=c(2,2))

plot(years.crashes[1:30,],type="h",main=paste("Plane Crashes per Year",
                           years.crashes[1,1],"-",years.crashes[30,1]),xlab="Year",
                           ylab="number of crashes")

points(years.crashes[1:30,],col="red")
abline(h=mean(years.crashes[1:30,2]),col="blue")

plot(years.crashes[31:60,],type="h",main=paste("Plane Crashes per Year",years.crashes[31,1],"-",years.crashes[60,1]),xlab="Year",ylab="number of crashes")
points(years.crashes[31:60,],col="red")
abline(h=mean(years.crashes[31:60,2]),col="blue")

plot(years.crashes[61:80,],type="h",main=paste("Plane Crashes per Year",years.crashes[61,1],"-",years.crashes[80,1]),xlab="Year",ylab="number of crashes")
points(years.crashes[61:80,],col="red")
abline(h=mean(years.crashes[61:80,2]),col="blue")

plot(years.crashes[81:length(years.crashes[,1]),],type="h",main=paste("Plane Crashes per Year",years.crashes[81,1],"-",years.crashes[length(years.crashes[,1]),1]),xlab="Year",ylab="number of crashes")
points(years.crashes[81:length(years.crashes[,1]),],col="red")
abline(h=mean(years.crashes[81:length(years.crashes[,1]),2]),col="blue")


#jpeg(file="Plane Crashes per Year.jpeg", width = 1080, height = 1080, pointsize = 12, quality = 75)
#par(mfrow=c(2,2))

#plot(years.crashes[1:30,],type="h",main=paste("Plane Crashes per Year",years.crashes[1,1],"-",years.crashes[30,1]),xlab="Year",ylab="number of crashes")
#points(years.crashes[1:30,],col="red")
#abline(h=mean(years.crashes[1:30,2]),col="blue")

#plot(years.crashes[31:60,],type="h",main=paste("Plane Crashes per Year",years.crashes[31,1],"-",years.crashes[60,1]),xlab="Year",ylab="number of crashes")
#points(years.crashes[31:60,],col="red")
#abline(h=mean(years.crashes[31:60,2]),col="blue")

#plot(years.crashes[61:80,],type="h",main=paste("Plane Crashes per Year",years.crashes[61,1],"-",years.crashes[80,1]),xlab="Year",ylab="number of crashes")
#points(years.crashes[61:80,],col="red")
#abline(h=mean(years.crashes[61:80,2]),col="blue")

#plot(years.crashes[81:length(years.crashes[,1]),],type="h",main=paste("Plane Crashes per Year",years.crashes[81,1],"-",years.crashes[length(years.crashes[,1]),1]),xlab="Year",ylab="number of crashes")
#points(years.crashes[81:length(years.crashes[,1]),],col="red")
#abline(h=mean(years.crashes[81:length(years.crashes[,1]),2]),col="blue")

#dev.off()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Histograms of the number of crashes

par(mfrow=c(1,1))

hist(years.crashes[,2], nclass=length(years.crashes[,2]), 
     freq=TRUE, main='histogram of plane crashes',
     xlab="number of chrashes per year", col = "lightblue", border = "blue")

hist(years.crashes[,2], nclass=length(years.crashes[,2]), freq=FALSE, 
     main='histogram of plane crashes',xlab="number of chrashes per year", 
     col = "lightblue", border = "blue")

curve(dnorm(x, mean=mean(years.crashes[,2]),sd=sd(years.crashes[,2])), 
      add=TRUE, col="red")

par(mfrow=c(1,2))

qqnorm(years.crashes[,2])
qqline(years.crashes[,2], col = 2)
plot(density(years.crashes[,2]),log='y', main='')
curve(dnorm(x, mean=mean(years.crashes[,2]),
            sd=sd(years.crashes[,2])), log="y", add=TRUE, col="red")

# different periods

par(mfrow=c(1,2))

# 1980-1999
#hist(years.crashes[61:80,2], nclass=20, freq=TRUE, 
#     main='',xlab="number of chrashes per year", col = "lightblue", 
#     border = "blue")

hist(years.crashes[61:80,2], nclass=20, freq=FALSE, main='1980-1999',xlab="number of chrashes per year", col = "lightblue", border = "blue")
curve(dnorm(x, mean=mean(years.crashes[61:80,2]),sd=sd(years.crashes[61:80,2])), add=TRUE, col="red")

# 2000-2020
#hist(years.crashes[81:length(years.crashes[,1]),2], nclass=length(years.crashes[81:length(years.crashes[,1]),2]), freq=TRUE, main='',xlab="number of chrashes per year", col = "lightblue", border = "blue")
hist(years.crashes[81:length(years.crashes[,1]),2], nclass=length(years.crashes[81:length(years.crashes[,1]),2]), freq=FALSE, main='2000-2020',xlab="number of chrashes per year", col = "lightblue", border = "blue")
curve(dnorm(x,mean=mean(years.crashes[81:length(years.crashes[,1]),2]),sd=sd(years.crashes[81:length(years.crashes[,1]),2])), add=TRUE, col="red")

par(mfrow=c(2,2))

qqnorm(years.crashes[61:80,2])
qqline(years.crashes[61:80,2], col = 2)
plot(density(years.crashes[61:80,2]),log='y', main='')
curve(dnorm(x, mean=mean(years.crashes[61:80,2]),sd=sd(years.crashes[61:80,2])), log="y", add=TRUE, col="red")

qqnorm(years.crashes[81:length(years.crashes[,1]),2])
qqline(years.crashes[81:length(years.crashes[,1]),2], col = 2)
plot(density(years.crashes[81:length(years.crashes[,1]),2]),log='y', main='')
curve(dnorm(x, mean=mean(years.crashes[81:length(years.crashes[,1]),2]),sd=sd(years.crashes[81:length(years.crashes[,1]),2])), log="y", add=TRUE, col="red")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# boxplots

par(mfrow=c(1,1))

boxplot(years.crashes[,2], main="number of plain crashes per year 1920 - 2020", xlab="", ylab="plain crashes per year")

boxplot(years.crashes[,2], notch=TRUE, col=(c("gold","darkgreen")), main="number of plain crashes per year 1920 - 2020", ylab="plain crashes per year")

years.crashes[1:30,3]<-rep("1920-1949",30)
years.crashes[31:60,3]<-rep("1950-1979",30)278750
years.crashes[61:80,3]<-rep("1980-1999",20)
years.crashes[81:length(years.crashes[,1]),3]<-rep("2000-2020",21)

colnames(years.crashes)<-c("year","ncrashes","cat")

boxplot(ncrashes~cat, data=years.crashes, notch=TRUE, col=(c("green","blue","blue","green")), main="number of plain crashes per year - groups", xlab="")

# start from here
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# t test

# independent 2-group t-test    t.test(y~x) # where y is numeric and x is a binary factor
# independent 2-group t-test    t.test(y1,y2) # where y1 and y2 are numeric
# paired t-test                 t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric
# one sample t-test             t.test(y,mu=3) # Ho: mu=3

t.test(years.crashes[61:80,2],years.crashes[81:length(years.crashes[,1]),2])

# Welch Two Sample t-test
#
# data:  years.crashes[61:80, 2] and years.crashes[81:length(years.crashes[, 1]), 2]
# t = 5.4142, df = 31.453, p-value = 6.268e-06
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#   14.70178 32.45536
# sample estimates:
#   mean of x mean of y 
# 59.15000  35.57143 

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# rate of difference

par(mfrow=c(3,1))

rate<-na.omit(ROC(years.crashes[,2]))
plot(rate,type="l")
plot(cumsum(rate),type="l")
hist(rate, nclass=20)

rate<-na.omit(ROC(years.crashes[61:80,2]))
plot(rate,type="l")
plot(cumsum(rate),type="l")
hist(rate, nclass=20)

rate<-na.omit(ROC(years.crashes[81:length(years.crashes[,1]),2]))
plot(rate,type="l")
plot(cumsum(rate),type="l")
hist(rate, nclass=20)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# One big Table

table.crash.one.table <- data.frame(table.crash.all[[1]],stringsAsFactors = TRUE)
colnames(table.crash.one.table)<-c("date","location","type","fatalities")

for(i in 2:length(years)){
  data <- data.frame(table.crash.all[[i]],stringsAsFactors = TRUE)
  colnames(data)<-c("date","location","type","fatalities")
  table.crash.one.table <- rbind(table.crash.one.table,data)
}
table.crash.one.table

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# create a time series object

date <- gsub("Jan","01",table.crash.one.table[,1])
date <- gsub("Feb","02",date)
date <- gsub("Mar","03",date)
date <- gsub("Apr","04",date)
date <- gsub("May","05",date)
date <- gsub("Jun","06",date)
date <- gsub("Jul","07",date)
date <- gsub("Aug","08",date)
date <- gsub("Sep","09",date)
date <- gsub("Oct","10",date)
date <- gsub("Nov","11",date)
date <- gsub("Dec","12",date)

gsub("/","-",table.crash.one.table[,4])

fatalities <- strsplit(gsub("/","-",table.crash.one.table[,4]),'-')
number<-NULL
for(i in 1:length(fatalities)){number<-c(number,fatalities[[i]][1])}
#as.numeric(number)
table.crash.one.table[,5]<-as.numeric(number)


#strsplit(gsub("(","-",fatalities[[i]][2], fixed = TRUE),"-")[1]
number2<-NULL
for(i in 1:length(fatalities)){number2<-c(number2,
                strsplit(gsub("(","-",fatalities[[i]][2], fixed = TRUE),"-")[[1]][1])}
number2
table.crash.one.table[,6]<-as.numeric(number2)

colnames(table.crash.one.table)<-c("date","location","type",
                                   "fatalities","fatalities-passengers","fatalities-crew")

#table.crash.one.table.xts <- xts(table.crash.one.table[,2:6], order.by=as.Date(date,"%d %m %Y"))
#colnames(table.crash.one.table.xts)<-c("location","type","fatalities",
#"passengerfatalities","crewfatalities")

table.crash.one.table.xts <- xts(table.crash.one.table[,5:6], order.by=as.Date(date,"%d %m %Y"))
colnames(table.crash.one.table.xts)<-c("totalfatalities","totalaboard")

table.crash.one.table.xts$totalfatalities
table.crash.one.table.xts$totalaboard

plot(table.crash.one.table.xts$totalfatalities, main="Plane Crash Total Fatalities")

plot(table.crash.one.table.xts$totalaboard, main="Plane Crash Total Aboard")

plot(table.crash.one.table.xts$totalaboard, main="Plane Crash Total Aboard vs Total Fatalities")
lines(table.crash.one.table.xts$totalfatalities,col="red")

plot(table.crash.one.table.xts$totalaboard["2000/2020"], 
     main="Plane Crash Total Aboard vs Total Fatalities",type="c")

plot(table.crash.one.table.xts$totalaboard["2000/2020"], 
     main="Plane Crash Total Aboard vs Total Fatalities",type="o")

plot(table.crash.one.table.xts$totalaboard["2000/2020"], 
     main="Plane Crash Total Aboard vs Total Fatalities",type="h")

plot(table.crash.one.table.xts$totalaboard["2000/2020"], 
     main="Plane Crash Total Aboard vs Total Fatalities",type="b")
points(table.crash.one.table.xts$totalfatalities["2000/2020"],col="red")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Total killed on the ground



number3<-NULL
for(i in 1:length(fatalities)){
  sub <- gsub("(","-",fatalities[[i]][2], fixed = TRUE)
  sub <- gsub(")","-",sub, fixed = TRUE)
  number3<-c(number3,strsplit(sub,"-")[[1]][2])
}
number3
table.crash.one.table[,7]<-as.numeric(number3)

colnames(table.crash.one.table)<-c("date","location","type",
                          "fatalities","fatalities-passengers","fatalities-crew","totalkilledontheground")

table.crash.one.table.xts <- xts(table.crash.one.table[,5:7], order.by=as.Date(date,"%d %m %Y"))
colnames(table.crash.one.table.xts)<-c("totalfatalities","totalaboard","totalkilledontheground")


plot(table.crash.one.table.xts$totalkilledontheground, main="Plane Crash Total killed on the ground")
plot(table.crash.one.table.xts$totalkilledontheground, main="Plane Crash Total killed on the ground",type="b")
plot(table.crash.one.table.xts$totalkilledontheground, main="Plane Crash Total killed on the ground",type="c")

# Martin Stoppacher                                                             #
# office@martinstoppacher.com                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# A Simple Skript for planecrash data from:

# http://www.planecrashinfo.com/database.htm

# The aviation accident database includes:

# All civil and commercial aviation accidents of scheduled and non-scheduled

# passenger airliners worldwide, which resulted in a fatality

# (including all U.S. Part 121 and Part 135 fatal accidents)

# All cargo, positioning, ferry and test flight fatal accidents.

# All military transport accidents with 10 or more fatalities.

# All commercial and military helicopter accidents with greater than

# 10 fatalities.

# All civil and military airship accidents involving fatalities.

# Aviation accidents involving the death of famous people.

# Aviation accidents or incidents of noteworthy interest.

# Database Format

# Date: Date of accident, in the format - January 01, 2001

# Time: Local time, in 24 hr. format unless otherwise specified

# Airline/Op: Airline or operator of the aircraft

# Flight #: Flight number assigned by the aircraft operator

# Route: Complete or partial route flown prior to the accident

# AC Type: Aircraft type

# Reg: ICAO registration of the aircraft

# cn / ln: Construction or serial number / Line or fuselage number

# Aboard: Total aboard (passengers / crew)

# Fatalities: Total fatalities aboard (passengers / crew)

# Ground: Total killed on the ground

# Summary: Brief description of the accident and cause if known

# to be done: histograms, t test?, types of chrashes,

# country filter?, region filter, plane filter

# text analysis,

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# packages

library("XML")

library("quantmod")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

rm(list = ls(all = TRUE))

getwd()

#system("ls")

setwd("~/ownCloud/STA_Statistics/PlaneCrashData")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# get the available years

url <- "http://www.planecrashinfo.com/database.htm"

tables <- readHTMLTable(url,header=FALSE)

years <- NULL

for(i in 1:length(tables[[2]][,1])){

for(e in 2:length(tables[[2]][1,])){

years<-c(years,levels(tables[[2]][i,e])[i])

}

years

years<-na.omit(as.numeric(years))

years

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# get all data tables

table.crash.all <- list()

for(i in 1:length(years)){

url2 <- paste("http://www.planecrashinfo.com/",

years[i],"/",years[i],".htm",sep="")

assign(paste("table.crash.",years[i],sep=""),readHTMLTable(url2))

table.crash.all[[i]] <- get(paste("table.crash.",years[i],sep=""))

print(paste("Year",years[i],"downloaded"))

}

#table.crash.all

length(table.crash.all)

save(table.crash.all,file="planecrashdata_list.RData")

for(i in 1:length(years)){

data<-table.crash.all[[i]]

save(data,file=paste("planecrashdata_",years[i],".RData",sep=""))

}

for(i in 1:length(years)){

data<-table.crash.all[[i]]

write.csv(data,file=paste("planecrashdata_",years[i],".csv",sep=""))

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Plane Crashes per Year

years.crashes <- data.frame()

for(i in 1:length(years)){

years.crashes[i,1] <- years[i]

years.crashes[i,2] <- length(as.data.frame(table.crash.all[[i]])[,1])

}

years.crashes

rownames(years.crashes)<-years.crashes[,1]

par(mfrow=c(1,1))

plot(years.crashes,type="b",main="Plane Crashes per Year",

xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")

par(mfrow=c(1,1))

setwd("/Users/impac/ownCloud/STA_Statistics/PlaneCrashData/plots")

jpeg(file="Plane Crashes per Year-points.jpeg", width = 1080,

height = 1080, pointsize = 12, quality = 75)

plot(years.crashes,type="b",main="Plane Crashes per Year",

xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")

dev.off()

jpeg(file="Plane Crashes per Year-bars.jpeg", width = 1080,

height = 1080, pointsize = 12, quality = 75)

plot(years.crashes,type="h",main="Plane Crashes per Year",

xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")

dev.off()

jpeg(file="Plane Crashes per Year-steps.jpeg", width = 1080,

height = 1080, pointsize = 12, quality = 75)

plot(years.crashes,type="s",main="Plane Crashes per Year",

xlab="Year",ylab="number of crashes")

points(years.crashes,col="red")

dev.off()

par(mfrow=c(2,2))

plot(years.crashes[1:30,],type="h",main=paste("Plane Crashes per Year",

years.crashes[1,1],"-",years.crashes[30,1]),xlab="Year",

ylab="number of crashes")

points(years.crashes[1:30,],col="red")

abline(h=mean(years.crashes[1:30,2]),col="blue")

plot(years.crashes[31:60,],type="h",main=paste("Plane Crashes per Year",years.crashes[31,1],"-",years.crashes[60,1]),xlab="Year",ylab="number of crashes")

points(years.crashes[31:60,],col="red")

abline(h=mean(years.crashes[31:60,2]),col="blue")

plot(years.crashes[61:80,],type="h",main=paste("Plane Crashes per Year",years.crashes[61,1],"-",years.crashes[80,1]),xlab="Year",ylab="number of crashes")

points(years.crashes[61:80,],col="red")

abline(h=mean(years.crashes[61:80,2]),col="blue")

plot(years.crashes[81:length(years.crashes[,1]),],type="h",main=paste("Plane Crashes per Year",years.crashes[81,1],"-",years.crashes[length(years.crashes[,1]),1]),xlab="Year",ylab="number of crashes")

points(years.crashes[81:length(years.crashes[,1]),],col="red")

abline(h=mean(years.crashes[81:length(years.crashes[,1]),2]),col="blue")

#jpeg(file="Plane Crashes per Year.jpeg", width = 1080, height = 1080, pointsize = 12, quality = 75)

#par(mfrow=c(2,2))

#plot(years.crashes[1:30,],type="h",main=paste("Plane Crashes per Year",years.crashes[1,1],"-",years.crashes[30,1]),xlab="Year",ylab="number of crashes")

#points(years.crashes[1:30,],col="red")

#abline(h=mean(years.crashes[1:30,2]),col="blue")

#plot(years.crashes[31:60,],type="h",main=paste("Plane Crashes per Year",years.crashes[31,1],"-",years.crashes[60,1]),xlab="Year",ylab="number of crashes")

#points(years.crashes[31:60,],col="red")

#abline(h=mean(years.crashes[31:60,2]),col="blue")

#plot(years.crashes[61:80,],type="h",main=paste("Plane Crashes per Year",years.crashes[61,1],"-",years.crashes[80,1]),xlab="Year",ylab="number of crashes")

#points(years.crashes[61:80,],col="red")

#abline(h=mean(years.crashes[61:80,2]),col="blue")

#plot(years.crashes[81:length(years.crashes[,1]),],type="h",main=paste("Plane Crashes per Year",years.crashes[81,1],"-",years.crashes[length(years.crashes[,1]),1]),xlab="Year",ylab="number of crashes")

#points(years.crashes[81:length(years.crashes[,1]),],col="red")

#abline(h=mean(years.crashes[81:length(years.crashes[,1]),2]),col="blue")

#dev.off()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Histograms of the number of crashes

par(mfrow=c(1,1))

hist(years.crashes[,2], nclass=length(years.crashes[,2]),

freq=TRUE, main='histogram of plane crashes',

xlab="number of chrashes per year", col = "lightblue", border = "blue")

hist(years.crashes[,2], nclass=length(years.crashes[,2]), freq=FALSE,

main='histogram of plane crashes',xlab="number of chrashes per year",

col = "lightblue", border = "blue")

curve(dnorm(x, mean=mean(years.crashes[,2]),sd=sd(years.crashes[,2])),

add=TRUE, col="red")

par(mfrow=c(1,2))

qqnorm(years.crashes[,2])

qqline(years.crashes[,2], col = 2)

plot(density(years.crashes[,2]),log='y', main='')

curve(dnorm(x, mean=mean(years.crashes[,2]),

sd=sd(years.crashes[,2])), log="y", add=TRUE, col="red")

# different periods

par(mfrow=c(1,2))

# 1980-1999

#hist(years.crashes[61:80,2], nclass=20, freq=TRUE,

# main='',xlab="number of chrashes per year", col = "lightblue",

# border = "blue")

hist(years.crashes[61:80,2], nclass=20, freq=FALSE, main='1980-1999',xlab="number of chrashes per year", col = "lightblue", border = "blue")

curve(dnorm(x, mean=mean(years.crashes[61:80,2]),sd=sd(years.crashes[61:80,2])), add=TRUE, col="red")

# 2000-2020

#hist(years.crashes[81:length(years.crashes[,1]),2], nclass=length(years.crashes[81:length(years.crashes[,1]),2]), freq=TRUE, main='',xlab="number of chrashes per year", col = "lightblue", border = "blue")

hist(years.crashes[81:length(years.crashes[,1]),2], nclass=length(years.crashes[81:length(years.crashes[,1]),2]), freq=FALSE, main='2000-2020',xlab="number of chrashes per year", col = "lightblue", border = "blue")

curve(dnorm(x,mean=mean(years.crashes[81:length(years.crashes[,1]),2]),sd=sd(years.crashes[81:length(years.crashes[,1]),2])), add=TRUE, col="red")

par(mfrow=c(2,2))

qqnorm(years.crashes[61:80,2])

qqline(years.crashes[61:80,2], col = 2)

plot(density(years.crashes[61:80,2]),log='y', main='')

curve(dnorm(x, mean=mean(years.crashes[61:80,2]),sd=sd(years.crashes[61:80,2])), log="y", add=TRUE, col="red")

qqnorm(years.crashes[81:length(years.crashes[,1]),2])

qqline(years.crashes[81:length(years.crashes[,1]),2], col = 2)

plot(density(years.crashes[81:length(years.crashes[,1]),2]),log='y', main='')

curve(dnorm(x, mean=mean(years.crashes[81:length(years.crashes[,1]),2]),sd=sd(years.crashes[81:length(years.crashes[,1]),2])), log="y", add=TRUE, col="red")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# boxplots

par(mfrow=c(1,1))

boxplot(years.crashes[,2], main="number of plain crashes per year 1920 - 2020", xlab="", ylab="plain crashes per year")

boxplot(years.crashes[,2], notch=TRUE, col=(c("gold","darkgreen")), main="number of plain crashes per year 1920 - 2020", ylab="plain crashes per year")

years.crashes[1:30,3]<-rep("1920-1949",30)

years.crashes[31:60,3]<-rep("1950-1979",30)278750

years.crashes[61:80,3]<-rep("1980-1999",20)

years.crashes[81:length(years.crashes[,1]),3]<-rep("2000-2020",21)

colnames(years.crashes)<-c("year","ncrashes","cat")

boxplot(ncrashes~cat, data=years.crashes, notch=TRUE, col=(c("green","blue","blue","green")), main="number of plain crashes per year - groups", xlab="")

# start from here

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# t test

# independent 2-group t-test t.test(y~x) # where y is numeric and x is a binary factor

# independent 2-group t-test t.test(y1,y2) # where y1 and y2 are numeric

# paired t-test t.test(y1,y2,paired=TRUE) # where y1 & y2 are numeric

# one sample t-test t.test(y,mu=3) # Ho: mu=3

t.test(years.crashes[61:80,2],years.crashes[81:length(years.crashes[,1]),2])

# Welch Two Sample t-test

# data: years.crashes[61:80, 2] and years.crashes[81:length(years.crashes[, 1]), 2]

# t = 5.4142, df = 31.453, p-value = 6.268e-06

# alternative hypothesis: true difference in means is not equal to 0

# 95 percent confidence interval:

# 14.70178 32.45536

# sample estimates:

# mean of x mean of y

# 59.15000 35.57143

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# rate of difference

par(mfrow=c(3,1))

rate<-na.omit(ROC(years.crashes[,2]))

plot(rate,type="l")

plot(cumsum(rate),type="l")

hist(rate, nclass=20)

rate<-na.omit(ROC(years.crashes[61:80,2]))

plot(rate,type="l")

plot(cumsum(rate),type="l")

hist(rate, nclass=20)

rate<-na.omit(ROC(years.crashes[81:length(years.crashes[,1]),2]))

plot(rate,type="l")

plot(cumsum(rate),type="l")

hist(rate, nclass=20)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# One big Table

table.crash.one.table <- data.frame(table.crash.all[[1]],stringsAsFactors = TRUE)

colnames(table.crash.one.table)<-c("date","location","type","fatalities")

for(i in 2:length(years)){

data <- data.frame(table.crash.all[[i]],stringsAsFactors = TRUE)

colnames(data)<-c("date","location","type","fatalities")

table.crash.one.table <- rbind(table.crash.one.table,data)

}

table.crash.one.table

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# create a time series object

date <- gsub("Jan","01",table.crash.one.table[,1])

date <- gsub("Feb","02",date)

date <- gsub("Mar","03",date)

date <- gsub("Apr","04",date)

date <- gsub("May","05",date)

date <- gsub("Jun","06",date)

date <- gsub("Jul","07",date)

date <- gsub("Aug","08",date)

date <- gsub("Sep","09",date)

date <- gsub("Oct","10",date)

date <- gsub("Nov","11",date)

date <- gsub("Dec","12",date)

gsub("/","-",table.crash.one.table[,4])

fatalities <- strsplit(gsub("/","-",table.crash.one.table[,4]),'-')

number<-NULL

for(i in 1:length(fatalities)){number<-c(number,fatalities[[i]][1])}

#as.numeric(number)

table.crash.one.table[,5]<-as.numeric(number)

#strsplit(gsub("(","-",fatalities[[i]][2], fixed = TRUE),"-")[1]

number2<-NULL

for(i in 1:length(fatalities)){number2<-c(number2,

strsplit(gsub("(","-",fatalities[[i]][2], fixed = TRUE),"-")[[1]][1])}

number2

table.crash.one.table[,6]<-as.numeric(number2)

colnames(table.crash.one.table)<-c("date","location","type",

"fatalities","fatalities-passengers","fatalities-crew")

#table.crash.one.table.xts <- xts(table.crash.one.table[,2:6], order.by=as.Date(date,"%d %m %Y"))

#colnames(table.crash.one.table.xts)<-c("location","type","fatalities",

#"passengerfatalities","crewfatalities")

table.crash.one.table.xts <- xts(table.crash.one.table[,5:6], order.by=as.Date(date,"%d %m %Y"))

colnames(table.crash.one.table.xts)<-c("totalfatalities","totalaboard")

table.crash.one.table.xts$totalfatalities

table.crash.one.table.xts$totalaboard

plot(table.crash.one.table.xts$totalfatalities, main="Plane Crash Total Fatalities")

plot(table.crash.one.table.xts$totalaboard, main="Plane Crash Total Aboard")

plot(table.crash.one.table.xts$totalaboard, main="Plane Crash Total Aboard vs Total Fatalities")

lines(table.crash.one.table.xts$totalfatalities,col="red")

plot(table.crash.one.table.xts$totalaboard["2000/2020"],

main="Plane Crash Total Aboard vs Total Fatalities",type="c")

plot(table.crash.one.table.xts$totalaboard["2000/2020"],

main="Plane Crash Total Aboard vs Total Fatalities",type="o")

plot(table.crash.one.table.xts$totalaboard["2000/2020"],

main="Plane Crash Total Aboard vs Total Fatalities",type="h")

plot(table.crash.one.table.xts$totalaboard["2000/2020"],

main="Plane Crash Total Aboard vs Total Fatalities",type="b")

points(table.crash.one.table.xts$totalfatalities["2000/2020"],col="red")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Total killed on the ground

number3<-NULL

for(i in 1:length(fatalities)){

sub <- gsub("(","-",fatalities[[i]][2], fixed = TRUE)

sub <- gsub(")","-",sub, fixed = TRUE)

number3<-c(number3,strsplit(sub,"-")[[1]][2])

}

number3

table.crash.one.table[,7]<-as.numeric(number3)

colnames(table.crash.one.table)<-c("date","location","type",

"fatalities","fatalities-passengers","fatalities-crew","totalkilledontheground")

table.crash.one.table.xts <- xts(table.crash.one.table[,5:7], order.by=as.Date(date,"%d %m %Y"))

colnames(table.crash.one.table.xts)<-c("totalfatalities","totalaboard","totalkilledontheground")

plot(table.crash.one.table.xts$totalkilledontheground, main="Plane Crash Total killed on the ground")

plot(table.crash.one.table.xts$totalkilledontheground, main="Plane Crash Total killed on the ground",type="b")

plot(table.crash.one.table.xts$totalkilledontheground, main="Plane Crash Total killed on the ground",type="c")

# Martin Stoppacher #

# office@martinstoppacher.com #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

#################################################################################

bifurcation tree with R

06/23/2015Allgemein, Programming, R, StatisticsMartin Stoppacher

Lately i was exploring the logistic map function because i was fascinated again how chaotic behavior can arise from very simple circumstances (i.e. a rather simple equation).
Of course, i learned about this in school but never got a chance to write some code.
So here it is. 🙂

And here is some of the code i wrote:

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# bifurcation tree, Logistic map
# xn1=rxn(1-xn)

rm(list = ls(all = TRUE))

r <- 2.2           # rate
x <- 0.5           # population

(r*x*(1-x))

r <- 2.2
(x <- seq(100,1)/100)
y = r*x*(1-x)
plot(y)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# bifurcation tree, Logistic map

# xn1=rxn(1-xn)

rm(list = ls(all = TRUE))

r <- 2.2 # rate

x <- 0.5 # population

(r*x*(1-x))

r <- 2.2

(x <- seq(100,1)/100)

y = r*x*(1-x)

plot(y)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# if the starting population is 0.1

r <- 2.6
x <- 0.1
for(i in 2:20){
  (x[i] <- r*x[i-1]*(1-x[i-1]))
  print(i)
  print(x[i])
  #Sys.sleep(0.5)
}
x
plot(x, type="b")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# if the starting population is 0.1

r <- 2.6

x <- 0.1

for(i in 2:20){

(x[i] <- r*x[i-1]*(1-x[i-1]))

print(i)

print(x[i])

#Sys.sleep(0.5)

}

plot(x, type="b")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# if the starting population is 0.8

r <- 2.6
x <- 0.8
for(i in 2:20){
  (x[i] <- r*x[i-1]*(1-x[i-1]))
  print(i)
  print(x[i])
  #Sys.sleep(0.5)
}
x
plot(x, type="b")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# if the starting population is 0.8

r <- 2.6

x <- 0.8

for(i in 2:20){

(x[i] <- r*x[i-1]*(1-x[i-1]))

print(i)

print(x[i])

#Sys.sleep(0.5)

}

plot(x, type="b")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# if we make variations to the initial population all of them stabilize

plot(1:20,(1:20)/20,ylab="",xlab="",cex=0)

for(e in ((1:9)/10)){
r <- 2.6
x <- e
for(i in 2:20){
  (x[i] <- r*x[i-1]*(1-x[i-1]))
  print(i)
  print(x[i])
  #Sys.sleep(0.5)
}
x
lines(x, type="b")
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# if we make variations to the initial population all of them stabilize

plot(1:20,(1:20)/20,ylab="",xlab="",cex=0)

for(e in ((1:9)/10)){

r <- 2.6

x <- e

for(i in 2:20){

(x[i] <- r*x[i-1]*(1-x[i-1]))

print(i)

print(x[i])

#Sys.sleep(0.5)

}

lines(x, type="b")

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# what does this look like if there are variations to the groth rate?

plot(0,0, ylab="", xlab="", cex=0, xlim=c(1,20), ylim=c(0,1))

for(e in (1:30)/10){
  r <- e
  x <- 0.5
  for(i in 2:20){
    (x[i] <- r*x[i-1]*(1-x[i-1]))
    print(i)
    print(x[i])
    #Sys.sleep(0.5)
  }
  x
  lines(x, type="b")
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# what does this look like if there are variations to the groth rate?

plot(0,0, ylab="", xlab="", cex=0, xlim=c(1,20), ylim=c(0,1))

for(e in (1:30)/10){

r <- e

x <- 0.5

for(i in 2:20){

(x[i] <- r*x[i-1]*(1-x[i-1]))

print(i)

print(x[i])

#Sys.sleep(0.5)

}

lines(x, type="b")

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# ploting more values but only the last

plot(0,0, ylab="", xlab="", cex=0, xlim=c(2.5,4), ylim=c(0,1))

n<-300
m<-30
for(e in ((290:400)/100)){
  print(r)
  r <- e
  x <- 0.1
  for(i in 2:n){
    (x[i] <- r*x[i-1]*(1-x[i-1]))
    print(r)
    print(x[i])
  }
  x
  points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2)
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# ploting more values but only the last

plot(0,0, ylab="", xlab="", cex=0, xlim=c(2.5,4), ylim=c(0,1))

n<-300

m<-30

for(e in ((290:400)/100)){

print(r)

r <- e

x <- 0.1

for(i in 2:n){

(x[i] <- r*x[i-1]*(1-x[i-1]))

print(r)

print(x[i])

}

points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2)

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# adding color

plot(0,0, ylab="", xlab="", cex=0, xlim=c(0,4), ylim=c(0,1))

n<-100
m<-30
for(e in ((1:400)/100)){
  print(r)
  r <- e
  x <- 0.6
  for(i in 2:n){
    (x[i] <- r*x[i-1]*(1-x[i-1]))
    print(r)
    print(x[i])
  }
  x
  if(r<1)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="red")
  if(r>=1&r<2)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="black")
  if(r>=2&r<5)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="orange")
  if(r>=3&r<=3.5)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="blue")
  if(r>=3.5&r<=4)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="green")
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# adding color

plot(0,0, ylab="", xlab="", cex=0, xlim=c(0,4), ylim=c(0,1))

n<-100

m<-30

for(e in ((1:400)/100)){

print(r)

r <- e

x <- 0.6

for(i in 2:n){

(x[i] <- r*x[i-1]*(1-x[i-1]))

print(r)

print(x[i])

}

if(r<1)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="red")

if(r>=1&r<2)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="black")

if(r>=2&r<5)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="orange")

if(r>=3&r<=3.5)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="blue")

if(r>=3.5&r<=4)points(rep(r,(m+1)),x[(n-m):n], type="p", cex=0.2, lwd=0.2, col="green")

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# bifurcation tree as a function

log.tree <- function(r=2, x=0.4, n=200){
  for(i in 2:(n+1)){
    (x[i] <- r*x[i-1]*(1-x[i-1]))
  }
  x <- x[2:(n+1)]
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# bifurcation tree as a function

log.tree <- function(r=2, x=0.4, n=200){

for(i in 2:(n+1)){

(x[i] <- r*x[i-1]*(1-x[i-1]))

}

x <- x[2:(n+1)]

}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# using the function with sapply

point.count <- 50    # sets how many of the last observations to use
r.seq <- seq(1,4,by=0.001) # a sequence for different rates
plot(0,0, ylab="", xlab="", cex=0, xlim=c(0,4), ylim=c(0,1))
log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=3000),point.count)
for(i in 1:length(r.seq)) points(rep(r.seq[i],point.count),log.tree.points[,i], cex=0.2, lwd=0.2)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# 

system.time(log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=300),point.count))
system.time(log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=3000),point.count))
system.time(log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=30000),point.count))

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# using the function with sapply

point.count <- 50 # sets how many of the last observations to use

r.seq <- seq(1,4,by=0.001) # a sequence for different rates

plot(0,0, ylab="", xlab="", cex=0, xlim=c(0,4), ylim=c(0,1))

log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=3000),point.count)

for(i in 1:length(r.seq)) points(rep(r.seq[i],point.count),log.tree.points[,i], cex=0.2, lwd=0.2)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

system.time(log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=300),point.count))

system.time(log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=3000),point.count))

system.time(log.tree.points <- tail(sapply(r.seq, log.tree, x=0.1, n=30000),point.count))

# Martin Stoppacher                                                             #
# office@martinstoppacher.com                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

# Martin Stoppacher #

# office@martinstoppacher.com #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

#################################################################################

Creating sounds out of financial data.

04/09/2015Allgemein, Code, Data Analysis, Engineering, Programming, R, Soundengineering, StatisticsMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#

install.packages("seewave")
require("seewave")
install.packages("tuneR")
require("tuneR")

#rm(list = ls(all = TRUE)) # clear current workspace #
setwd("/Users/martinstoppacher/R Analysis/3_Index Sounds/")

library("quantmod")

getSymbols("^GSPC",from=1900)

head(GSPC)

tail(GSPC)

jpeg(filename = "SP500.jpg1975-2015.jpg", width=880,height=880,res=100)
plot(Cl(GSPC),main="S&amp;P 500 Index (closing prices)")
dev.off()

summary(Cl(GSPC))

jpeg(filename = "SP500.jpg1975-1985.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1975/1985"],main="S&amp;P 500 Index 1975-1985 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1985-1995.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1985/1995"],main="S&amp;P 500 Index 1985-1995 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1995-2005.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1995/2005"],main="S&amp;P 500 Index 1995-2005 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg2005-2015.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["2005/2015"],main="S&amp;P 500 Index 2005-2015 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades-new.jpg", width=880,height=880,res=100)
plot(as.numeric(Cl(GSPC)["1975/1984"]),main="S&amp;P 500 Index 1975-1985 - 4 decades (closing prices)",ylim=c(0,2100),type="l",ylab="index values",xlab="days (10 year)")
lines(as.numeric(Cl(GSPC)["1985/1994"]),col="red")
lines(as.numeric(Cl(GSPC)["1995/2004"]),col="blue")
lines(as.numeric(Cl(GSPC)["2005/2015"]),col="green")
legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))
dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades percent-new2.jpg", width=880,height=880,res=100)
plot((as.numeric(Cl(GSPC)["1975/1984"])/as.numeric(Cl(GSPC)["1975/1984"][1])-1),main="S&amp;P 500 Index 1975-1985 - 4 decades (percent changes)",ylim=c(-0.4,2.3),type="l",ylab="index values",xlab="days (10 year)")
lines((as.numeric(Cl(GSPC)["1985/1994"])/as.numeric(Cl(GSPC)["1985/1994"][1])-1),col="red")
lines((as.numeric(Cl(GSPC)["1995/2004"])/as.numeric(Cl(GSPC)["1995/2004"][1])-1),col="blue")
lines((as.numeric(Cl(GSPC)["2005/2014"])/as.numeric(Cl(GSPC)["2005/2014"][1])-1),col="green")
legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))
dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4otherdecades percent2-new.jpg", width=880,height=880,res=100)
plot(as.numeric(Cl(GSPC)["1980/1989"])/as.numeric(Cl(GSPC)["1980/1989"][1]),main="S&amp;P 500 Index 1975-2015 - new truncation - (percent changes)",ylim=c(0.6,4.2),type="l",ylab="index values",xlab="days (10 year)",col="yellow")
lines(as.numeric(Cl(GSPC)["1975/1979"])/as.numeric(Cl(GSPC)["1975/1979"][1]),col="red")
lines(as.numeric(Cl(GSPC)["1990/1999"])/as.numeric(Cl(GSPC)["1990/1999"][1]),col="black")
lines(as.numeric(Cl(GSPC)["2000/2009"])/as.numeric(Cl(GSPC)["2000/2009"][1]),col="blue")
lines(as.numeric(Cl(GSPC)["2010/2014"])/as.numeric(Cl(GSPC)["2010/2014"][1]),col="green")
legend("topleft", legend = c("1975-1979","1980-1989","1990-1999","2000-2009","2010-2014") , lty = 1, col = c("red","yellow","black","blue","green"))
dev.off()

library("PerformanceAnalytics")

Cl(GSPC)["2010/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1])
charts.PerformanceSummary(,main="",xlab="")

# percent

jpeg(filename = "SP500.jpg1975-1985-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1975/1985"]/as.numeric(Cl(GSPC)["1975/1985"][1]),main="S&amp;P 500 Index 1975-1985 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1985-1995-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1985/1995"]/as.numeric(Cl(GSPC)["1985/1995"][1]),main="S&amp;P 500 Index 1985-1995 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg1995-2005-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["1995/2005"]/as.numeric(Cl(GSPC)["1995/2005"][1]),main="S&amp;P 500 Index 1995-2005 (closing prices)")
dev.off()

jpeg(filename = "SP500.jpg2005-2015-percent.jpg", width=880,height=880,res=100)
plot(Cl(GSPC)["2005/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1]),main="S&amp;P 500 Index 2005-2015 (closing prices)")
dev.off()

GSPC.cl.close &lt;- diff(Cl(GSPC))
tail(GSPC.cl.close,20)
jpeg(filename = "SP500-first-difference-example.jpg", width=880,height=880,res=100)
plot(tail(GSPC.cl.close,200),main="S&amp;P 500 first difference")
dev.off()

##

GSPC.cl.close.roc &lt;- ROC(Cl(GSPC))
tail(GSPC.cl.close.roc,5)

#prices &lt;- Cl(GSPC) # ROC is log diff!
#log_returns &lt;- diff(log(prices), lag=1)
#tail(log_returns)

jpeg(filename = "SP500-first-roc-example.jpg", width=880,height=880,res=100)
plot(tail(GSPC.cl.close.roc,200),main="S&amp;P 500 first difference")
dev.off()

##

dax.roc &lt;- na.omit(ROC(Cl(GSPC)))*100
plot(head(dax.roc,20))
plot(dax.roc)

# standard

if(abs(max(dax.roc))&gt;abs(min(dax.roc))){
dax.roc.standard &lt;- as.numeric(dax.roc/max(dax.roc))
}else{
dax.roc.standard &lt;- as.numeric(dax.roc/abs(min(dax.roc)))
}

plot(dax.roc.standard,type="l")

w&lt;-dax.roc.standard
f=41000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

f=10000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

f=5000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

dax.roc &lt;- as.numeric(dax.roc)
dax.roc2 &lt;- NULL
for(i in 1:length(dax.roc)){
dax.roc2 &lt;- rbind(dax.roc2,((dax.roc[i]+dax.roc[i+1])/2))
}
lines &lt;- NULL
for(i in 1:length(dax.roc)){
line &lt;- rbind(dax.roc[i],dax.roc2[i])
lines &lt;- rbind(lines,line)
}
dax.roc &lt;- na.omit(lines)
tail(dax.roc)

dax.roc &lt;- na.omit(ROC(SMA(Cl(GSPC),n=500)))*100
dax.roc &lt;- na.omit(ROC(Cl(GDAXI)))*100

dax.roc.standard &lt;- as.numeric(dax.roc/max(dax.roc))
dax.roc.standard &lt;- as.numeric(dax.roc/min(dax.roc))

hist(dax.roc.standard)

w&lt;-na.omit(SMA(dax.roc.standard,n=100))
w&lt;-dax.roc.standard

for(i in 1:5){
w&lt;-c(w,w)
}

f=32000
savewav(w,f=f ,filename = "xyz.wav")
aw&lt;-readWave("xyz.wav")
play(aw)

# Martin Stoppacher #
# office@martinstoppacher.com #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

install.packages("seewave")

require("seewave")

install.packages("tuneR")

require("tuneR")

#rm(list = ls(all = TRUE)) # clear current workspace #

setwd("/Users/martinstoppacher/R Analysis/3_Index Sounds/")

library("quantmod")

getSymbols("^GSPC",from=1900)

head(GSPC)

tail(GSPC)

jpeg(filename = "SP500.jpg1975-2015.jpg", width=880,height=880,res=100)

plot(Cl(GSPC),main="S&P 500 Index (closing prices)")

dev.off()

summary(Cl(GSPC))

jpeg(filename = "SP500.jpg1975-1985.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1975/1985"],main="S&P 500 Index 1975-1985 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1985-1995.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1985/1995"],main="S&P 500 Index 1985-1995 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1995-2005.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1995/2005"],main="S&P 500 Index 1995-2005 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg2005-2015.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["2005/2015"],main="S&P 500 Index 2005-2015 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades-new.jpg", width=880,height=880,res=100)

plot(as.numeric(Cl(GSPC)["1975/1984"]),main="S&P 500 Index 1975-1985 - 4 decades (closing prices)",ylim=c(0,2100),type="l",ylab="index values",xlab="days (10 year)")

lines(as.numeric(Cl(GSPC)["1985/1994"]),col="red")

lines(as.numeric(Cl(GSPC)["1995/2004"]),col="blue")

lines(as.numeric(Cl(GSPC)["2005/2015"]),col="green")

legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))

dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4decades percent-new2.jpg", width=880,height=880,res=100)

plot((as.numeric(Cl(GSPC)["1975/1984"])/as.numeric(Cl(GSPC)["1975/1984"][1])-1),main="S&P 500 Index 1975-1985 - 4 decades (percent changes)",ylim=c(-0.4,2.3),type="l",ylab="index values",xlab="days (10 year)")

lines((as.numeric(Cl(GSPC)["1985/1994"])/as.numeric(Cl(GSPC)["1985/1994"][1])-1),col="red")

lines((as.numeric(Cl(GSPC)["1995/2004"])/as.numeric(Cl(GSPC)["1995/2004"][1])-1),col="blue")

lines((as.numeric(Cl(GSPC)["2005/2014"])/as.numeric(Cl(GSPC)["2005/2014"][1])-1),col="green")

legend("topleft", legend = c("1975-1984","1985-1994","1995-2004","2005-2014") , lty = 1, col = c("black","red","blue","green"))

dev.off()

jpeg(filename = "SP500.jpg 1975-2005 4otherdecades percent2-new.jpg", width=880,height=880,res=100)

plot(as.numeric(Cl(GSPC)["1980/1989"])/as.numeric(Cl(GSPC)["1980/1989"][1]),main="S&P 500 Index 1975-2015 - new truncation - (percent changes)",ylim=c(0.6,4.2),type="l",ylab="index values",xlab="days (10 year)",col="yellow")

lines(as.numeric(Cl(GSPC)["1975/1979"])/as.numeric(Cl(GSPC)["1975/1979"][1]),col="red")

lines(as.numeric(Cl(GSPC)["1990/1999"])/as.numeric(Cl(GSPC)["1990/1999"][1]),col="black")

lines(as.numeric(Cl(GSPC)["2000/2009"])/as.numeric(Cl(GSPC)["2000/2009"][1]),col="blue")

lines(as.numeric(Cl(GSPC)["2010/2014"])/as.numeric(Cl(GSPC)["2010/2014"][1]),col="green")

legend("topleft", legend = c("1975-1979","1980-1989","1990-1999","2000-2009","2010-2014") , lty = 1, col = c("red","yellow","black","blue","green"))

dev.off()

library("PerformanceAnalytics")

Cl(GSPC)["2010/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1])

charts.PerformanceSummary(,main="",xlab="")

# percent

jpeg(filename = "SP500.jpg1975-1985-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1975/1985"]/as.numeric(Cl(GSPC)["1975/1985"][1]),main="S&P 500 Index 1975-1985 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1985-1995-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1985/1995"]/as.numeric(Cl(GSPC)["1985/1995"][1]),main="S&P 500 Index 1985-1995 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg1995-2005-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["1995/2005"]/as.numeric(Cl(GSPC)["1995/2005"][1]),main="S&P 500 Index 1995-2005 (closing prices)")

dev.off()

jpeg(filename = "SP500.jpg2005-2015-percent.jpg", width=880,height=880,res=100)

plot(Cl(GSPC)["2005/2015"]/as.numeric(Cl(GSPC)["2005/2015"][1]),main="S&P 500 Index 2005-2015 (closing prices)")

dev.off()

GSPC.cl.close <- diff(Cl(GSPC))

tail(GSPC.cl.close,20)

jpeg(filename = "SP500-first-difference-example.jpg", width=880,height=880,res=100)

plot(tail(GSPC.cl.close,200),main="S&P 500 first difference")

dev.off()

GSPC.cl.close.roc <- ROC(Cl(GSPC))

tail(GSPC.cl.close.roc,5)

#prices <- Cl(GSPC) # ROC is log diff!

#log_returns <- diff(log(prices), lag=1)

#tail(log_returns)

jpeg(filename = "SP500-first-roc-example.jpg", width=880,height=880,res=100)

plot(tail(GSPC.cl.close.roc,200),main="S&P 500 first difference")

dev.off()

dax.roc <- na.omit(ROC(Cl(GSPC)))*100

plot(head(dax.roc,20))

plot(dax.roc)

# standard

if(abs(max(dax.roc))>abs(min(dax.roc))){

dax.roc.standard <- as.numeric(dax.roc/max(dax.roc))

}else{

dax.roc.standard <- as.numeric(dax.roc/abs(min(dax.roc)))

}

plot(dax.roc.standard,type="l")

w<-dax.roc.standard

f=41000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

f=10000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

f=5000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

dax.roc <- as.numeric(dax.roc)

dax.roc2 <- NULL

for(i in 1:length(dax.roc)){

dax.roc2 <- rbind(dax.roc2,((dax.roc[i]+dax.roc[i+1])/2))

}

lines <- NULL

for(i in 1:length(dax.roc)){

line <- rbind(dax.roc[i],dax.roc2[i])

lines <- rbind(lines,line)

}

dax.roc <- na.omit(lines)

tail(dax.roc)

dax.roc <- na.omit(ROC(SMA(Cl(GSPC),n=500)))*100

dax.roc <- na.omit(ROC(Cl(GDAXI)))*100

dax.roc.standard <- as.numeric(dax.roc/max(dax.roc))

dax.roc.standard <- as.numeric(dax.roc/min(dax.roc))

hist(dax.roc.standard)

w<-na.omit(SMA(dax.roc.standard,n=100))

w<-dax.roc.standard

for(i in 1:5){

w<-c(w,w)

}

f=32000

savewav(w,f=f ,filename = "xyz.wav")

aw<-readWave("xyz.wav")

play(aw)

# Martin Stoppacher #

# office@martinstoppacher.com #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

#################################################################################

Creating a beat frequency interference with R

02/23/2015Engineering, Programming, R, SoundengineeringMartin Stoppacher

A beat frequency is a mix of two frequencies which are very close to each other but not similar. The trick is that they are to close to each other to be separated by the human ear as two distinct frequencies, thus generating a single tone with fluctuating amplitude behavior – a periodic change in volume. In Fact this effect just appears within the human brain, therefore the two tones can be measured physically by using the appropriate instruments. Further more the effect also works in a binaural situation where one ear can only hear one frequency respectively.

The following graphic shows two almost similar sinus waves, one at 440 Hz and one slightly below, at 435 Hz. The sound data is produced for exactly 2 seconds of time at a 44100 Hz sample rate, giving us 88200 sample points for 2 seconds. The first three demonstrations of the graph show only the beginning of the wave whereas the last one presents the combination of both signals for the complete 2 seconds.

435Hz-440Hz-beatsfrequency-4plots

Basically a combination of two sinus waves can be mathematically represented by:

And if we assume that both amplitudes are the same we get the reduced form by:

It is interesting to understand that the resulting frequency of the beat, i.e. the recognized periodic fluctuation of volume, is given by:

440Hz – Sinus – 2 seconds

435Hz – Sinus – 2 seconds

435 & 440Hz – Sinus – resulting beat frequency – 2 seconds

The oscillations in this post are simple created in R by using standard mathematical functions in combination with the time series package in R. In addition the seewave package is used to store the sinus waves as a .wav file to the system.

install.packages("seewave")
require("seewave")

1 2	install.packages("seewave") require("seewave")

The time series package handles data as equispaced points in time. This is in accordance with the sampling of continuous sound signals as the become digitized. A common used sampling frequency for CD quality is 44.1 kHz which results in 88.2k sample points for a length of 2 seconds.

s1<-sin(2*pi*440*seq(0,2,length.out=88200))
s1<-ts(data=s1, start=0, frequency=44100)

jpeg(filename = "440Hz Sinus.jpg", width=880,height=880,res=100)
plot(head(s1,401),type="l",ylab="")
dev.off()

s2<-sin(2*pi*435*seq(0,2,length.out=88200))
s2<-ts(data=s2, start=0, frequency=44100)

jpeg(filename = "435Hz Sinus.jpg", width=880,height=880,res=100)
plot(head(s2,401),type="l",ylab="")
dev.off()

s3<-(s1+s2)/2

s1<-sin(2*pi*440*seq(0,2,length.out=88200))

s1<-ts(data=s1, start=0, frequency=44100)

jpeg(filename = "440Hz Sinus.jpg", width=880,height=880,res=100)

plot(head(s1,401),type="l",ylab="")

dev.off()

s2<-sin(2*pi*435*seq(0,2,length.out=88200))

s2<-ts(data=s2, start=0, frequency=44100)

jpeg(filename = "435Hz Sinus.jpg", width=880,height=880,res=100)

plot(head(s2,401),type="l",ylab="")

dev.off()

s3<-(s1+s2)/2

For ease of use the summation of the amplitude 2a becomes reduced to a by division.

f<-44100
savewav(s1,f=f ,filename = "s1.wav")
savewav(s2,f=f ,filename = "s2.wav")
savewav(s3,f=f ,filename = "s3.wav")

f<-44100

savewav(s1,f=f ,filename = "s1.wav")

savewav(s2,f=f ,filename = "s2.wav")

savewav(s3,f=f ,filename = "s3.wav")

The graphical representation of the sound can easily be saved as a .jpg file to the system.

jpeg(filename = "435Hz-440Hz-beatsfrequency-4plots.jpg", width=880,height=880,res=100)
par(mfrow=c(2,2))
plot(head(s1,401),type="l",ylab="",main="Sinus 440Hz")
plot(head(s2,401),type="l",ylab="",main="Sinus 435Hz")
plot(head(s3,17640),type="l",ylab="",main="440Hz-335Hz Beat")
plot(s3,type="l",ylab="",main="440Hz-335Hz Beat")
dev.off()

jpeg(filename = "435Hz-440Hz-beatsfrequency-4plots.jpg", width=880,height=880,res=100)

par(mfrow=c(2,2))

plot(head(s1,401),type="l",ylab="",main="Sinus 440Hz")

plot(head(s2,401),type="l",ylab="",main="Sinus 435Hz")

plot(head(s3,17640),type="l",ylab="",main="440Hz-335Hz Beat")

plot(s3,type="l",ylab="",main="440Hz-335Hz Beat")

dev.off()

In addition to the sample above we can also see and hear what it is like when the beat effect fades out and the brain starts to recognize two different tones. Therefore the next few examples present the resulting wave after summing two different frequencies, where one is always 440 Hz.

beatsfrequency-9examples

435 & 440Hz – Sinus – resulting beat frequency – 2 seconds

425 & 440Hz – Sinus – resulting (beat) frequency – 2 seconds

415 & 440Hz – Sinus – resulting (beat) frequency – 2 seconds

405 & 440Hz – Sinus – resulting (beat) frequency – 2 seconds

395 & 440Hz – Sinus – resulting (beat) frequency – 2 seconds

485 & 440Hz – Sinus – resulting (beat) frequency – 2 seconds

jpeg(filename = "beatsfrequency-9examples.jpg", width=880,height=880,res=100)
par(mfrow=c(3,3))
for(i in 1:9){
e<-445-i*10
s1<-sin(2*pi*440*seq(0,2,length.out=88200))
s1<-ts(data=s1, start=0, frequency=44100)
s2<-sin(2*pi*e*seq(0,2,length.out=88200))
s2<-ts(data=s2, start=0, frequency=44100)
s3<-s1+s2
#jpeg(filename = "beatsfrequency.jpg", width=880,height=880,res=100)
plot(head(s3/2,17640),type="l",ylab="",xlab="",main=paste(e,"Hz"))
#plot(s3,type="l")
#dev.off()
f<-44100
savewav((s3/2),f=f ,filename = paste(e,"Hz - 440Hz-beat.wav"))
}
dev.off()

jpeg(filename = "beatsfrequency-9examples.jpg", width=880,height=880,res=100)

par(mfrow=c(3,3))

for(i in 1:9){

e<-445-i*10

s1<-sin(2*pi*440*seq(0,2,length.out=88200))

s1<-ts(data=s1, start=0, frequency=44100)

s2<-sin(2*pi*e*seq(0,2,length.out=88200))

s2<-ts(data=s2, start=0, frequency=44100)

s3<-s1+s2

#jpeg(filename = "beatsfrequency.jpg", width=880,height=880,res=100)

plot(head(s3/2,17640),type="l",ylab="",xlab="",main=paste(e,"Hz"))

#plot(s3,type="l")

#dev.off()

f<-44100

savewav((s3/2),f=f ,filename = paste(e,"Hz - 440Hz-beat.wav"))

}

dev.off()

http://cran.r-project.org/web/views/TimeSeries.html
http://cran.r-project.org/web/packages/seewave/index.html

Latex code for the Formulas above:

sin(2 \pi f_1t)+sin(2 \pi f_2t)=2sin(2 \pi \frac{f_1+f_2}{2} t)sin(2 \pi \frac{f_1-f_2}{2} t)

x=(a_2-a_1)cos \omega_2t+2a_1cos\frac{\omega_1+\omega_2}{2}tcos\frac{\omega_2-\omega_1}{2}t

x=2a_1cos\frac{\omega_1+\omega_2}{2}tcos\frac{\omega_2-\omega_1}{2}t

\omega_a=\frac{\omega_1+\omega_2}{2}

\omega_b=\frac{\omega_2-\omega_1}{2}

\omega_s=\omega_2-\omega_1

f_s=f_2-f_1

sin(2 \pi f_1t)+sin(2 \pi f_2t)=2sin(2 \pi \frac{f_1+f_2}{2} t)sin(2 \pi \frac{f_1-f_2}{2} t)

x=(a_2-a_1)cos \omega_2t+2a_1cos\frac{\omega_1+\omega_2}{2}tcos\frac{\omega_2-\omega_1}{2}t

x=2a_1cos\frac{\omega_1+\omega_2}{2}tcos\frac{\omega_2-\omega_1}{2}t

\omega_a=\frac{\omega_1+\omega_2}{2}

\omega_b=\frac{\omega_2-\omega_1}{2}

\omega_s=\omega_2-\omega_1

f_s=f_2-f_1

Implementing a MySQL DB connection via ooRexx by using BSF4ooRexx – (3_MySql_connector.rxj)

10/17/2014Content Syndication, ooRexx, ProgrammingMartin Stoppacher

/* ***************************************************************************** */
/* This is a short example of implementing a MySQL DB connection via ooRexx      */ 
/* by using BSF4ooRexx. Using the Java - mysql connector via bean scripting      */
/* in ooRexx                                                                     */
/* created by Martin Stoppacher - 26.12.2009                                     */
/* ***************************************************************************** */

say hello access to a my sql database via java using bsf4rexx
uid = "root";
url = "jdbc:mysql://localhost/test";
pw = "password";

mydrive=.bsf~new('com.mysql.jdbc.Driver')        /* establishe the db connection */
man=bsf.loadClass("java.sql.DriverManager")
man~registerDriver(mydrive)

conn=man~getConnection(url, uid, pw)                  /* connecting the database */

stmt=conn~createStatement                                       /* fetching data */
rset=stmt~executeQuery("select max(Nr)+1 as maximum from mytest")

rset~next; i = rset~getString("maximum"); say i;
stmt~executeUpdate("insert into mytest values(" || i ||",'BSF4ooRexx')") 

rset=stmt~executeQuery("select NR, NAME from mytest")           /* fetching data */

say "NR______NAME"                                                 /* print data */
do while rset~next
say rset~getString("nr")  || ______ || rset~getString("NAME")
end

say end

::requires BSF.CLS                                       /* get the Java support */

/* ***************************************************************************** */

/* This is a short example of implementing a MySQL DB connection via ooRexx */

/* by using BSF4ooRexx. Using the Java - mysql connector via bean scripting */

/* in ooRexx */

/* created by Martin Stoppacher - 26.12.2009 */

/* ***************************************************************************** */

say hello access to a my sql database via java using bsf4rexx

uid = "root";

url = "jdbc:mysql://localhost/test";

pw = "password";

mydrive=.bsf~new('com.mysql.jdbc.Driver') /* establishe the db connection */

man=bsf.loadClass("java.sql.DriverManager")

man~registerDriver(mydrive)

conn=man~getConnection(url, uid, pw) /* connecting the database */

stmt=conn~createStatement /* fetching data */

rset=stmt~executeQuery("select max(Nr)+1 as maximum from mytest")

rset~next; i = rset~getString("maximum"); say i;

stmt~executeUpdate("insert into mytest values(" || i ||",'BSF4ooRexx')")

rset=stmt~executeQuery("select NR, NAME from mytest") /* fetching data */

say "NR______NAME" /* print data */

do while rset~next

say rset~getString("nr") || ______ || rset~getString("NAME")

end

say end

::requires BSF.CLS /* get the Java support */

GDP and Life expectancy

08/09/2014Allgemein, Data Analysis, Programming, R, StatisticsMartin Stoppacher

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# gdp_world.R

rm(list = ls(all = TRUE))                                   #  clear current workspace  #
setwd("/Users/martinstoppacher/R Analysis/")

# - - - - - - - - - - - - - - - - - - - -
# additional packages

#install.packages("XML")
#install.packages("gridExtra")
library("XML")
library("gridExtra")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Download World GDP Data http://data.worldbank.org/

gdp6 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?display=default")
gdp5 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=1&display=default")
gdp4 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=2&display=default")
gdp3 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=3&display=default")
gdp2 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=4&display=default")
gdp1 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=5&display=default")

gdp <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=6&display=default")
gdp <- gdp[[1]]
gdp <- as.data.frame(gdp)
gdp.all <- gdp[,1:5]

gdp1 <- gdp1[[1]]
gdp1 <- as.data.frame(gdp1)
gdp.all <- cbind(gdp.all,gdp1[,2:6])

gdp2 <- gdp2[[1]]
gdp2 <- as.data.frame(gdp2)
gdp.all <- cbind(gdp.all,gdp2[,2:6])

gdp3 <- gdp3[[1]]
gdp3 <- as.data.frame(gdp3)
gdp.all <- cbind(gdp.all,gdp3[,2:6])

gdp4 <- gdp4[[1]]
gdp4 <- as.data.frame(gdp4)
gdp.all <- cbind(gdp.all,gdp4[,2:6])

gdp5 <- gdp5[[1]]
gdp5 <- as.data.frame(gdp5)
gdp.all <- cbind(gdp.all,gdp5[,2:6])

gdp6 <- gdp6[[1]]
gdp6 <- as.data.frame(gdp6)
gdp.all <- cbind(gdp.all,gdp6[,2:5])

#save(gdp.all,file="gdp_all.R")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Data cleaning

load("gdp_all.R")

gdp.all.new <- data.frame()

for(e in 1:length(gdp.all[,1])){
  p<-NULL
    for(i in 2:length(gdp.all[1,])){
      p[i-1]<-as.numeric(gsub(",","",gdp.all[e,i]))
      }
  p[is.na(p)]<-0
  gdp.all.new<-rbind(gdp.all.new,p)
}

gdp.all.new <- cbind(as.character(gdp.all[,1]),gdp.all.new)

colnames(gdp.all.new)<-c("Coutry","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990"
                        ,"1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002"
                        ,"2003","2004","2005","2006","2007","2008","2009","2010","2011","2012")

rownames(gdp.all.new) <- as.character(gdp.all.new[,1])
gdp.all.new <- gdp.all.new[,2:34]

jpeg(filename = "gdp_1980-2012_data.jpg", width=1280,height=280,res=100) 
grid.table(head(gdp.all.new[,1:10])) 
dev.off()

#save(gdp.all.new,file="gdp_all_new.R")
setwd("/Users/martinstoppacher/R Analysis/world gdp development/")
gdp.all.new
setwd("../")
setwd("/Users/martinstoppacher/R Analysis/gdp and life expectancy/")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# gdp_world.R

rm(list = ls(all = TRUE)) # clear current workspace #

setwd("/Users/martinstoppacher/R Analysis/")

# - - - - - - - - - - - - - - - - - - - -

# additional packages

#install.packages("XML")

#install.packages("gridExtra")

library("XML")

library("gridExtra")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Download World GDP Data http://data.worldbank.org/

gdp6 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?display=default")

gdp5 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=1&display=default")

gdp4 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=2&display=default")

gdp3 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=3&display=default")

gdp2 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=4&display=default")

gdp1 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=5&display=default")

gdp <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=6&display=default")

gdp <- gdp[[1]]

gdp <- as.data.frame(gdp)

gdp.all <- gdp[,1:5]

gdp1 <- gdp1[[1]]

gdp1 <- as.data.frame(gdp1)

gdp.all <- cbind(gdp.all,gdp1[,2:6])

gdp2 <- gdp2[[1]]

gdp2 <- as.data.frame(gdp2)

gdp.all <- cbind(gdp.all,gdp2[,2:6])

gdp3 <- gdp3[[1]]

gdp3 <- as.data.frame(gdp3)

gdp.all <- cbind(gdp.all,gdp3[,2:6])

gdp4 <- gdp4[[1]]

gdp4 <- as.data.frame(gdp4)

gdp.all <- cbind(gdp.all,gdp4[,2:6])

gdp5 <- gdp5[[1]]

gdp5 <- as.data.frame(gdp5)

gdp.all <- cbind(gdp.all,gdp5[,2:6])

gdp6 <- gdp6[[1]]

gdp6 <- as.data.frame(gdp6)

gdp.all <- cbind(gdp.all,gdp6[,2:5])

#save(gdp.all,file="gdp_all.R")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

# Data cleaning

load("gdp_all.R")

gdp.all.new <- data.frame()

for(e in 1:length(gdp.all[,1])){

p<-NULL

for(i in 2:length(gdp.all[1,])){

p[i-1]<-as.numeric(gsub(",","",gdp.all[e,i]))

}

p[is.na(p)]<-0

gdp.all.new<-rbind(gdp.all.new,p)

}

gdp.all.new <- cbind(as.character(gdp.all[,1]),gdp.all.new)

colnames(gdp.all.new)<-c("Coutry","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990"

,"1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002"

,"2003","2004","2005","2006","2007","2008","2009","2010","2011","2012")

rownames(gdp.all.new) <- as.character(gdp.all.new[,1])

gdp.all.new <- gdp.all.new[,2:34]

jpeg(filename = "gdp_1980-2012_data.jpg", width=1280,height=280,res=100)

grid.table(head(gdp.all.new[,1:10]))

dev.off()

#save(gdp.all.new,file="gdp_all_new.R")

setwd("/Users/martinstoppacher/R Analysis/world gdp development/")

gdp.all.new

setwd("../")

setwd("/Users/martinstoppacher/R Analysis/gdp and life expectancy/")

# - - - - - - - - - - - - - - - - - - - -
# gdp and life expectancy

load("gdp_all_new.R")

life_m <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.MA.IN/countries?display=default")
life_m <- life_m[[1]]
life_m <- as.data.frame(life_m)

life_m.all <- life_m[,5]
life_m.all <- as.numeric(as.character(factor(life_m.all)))
life_m.all[is.na(life_m.all)]<-0

gdp.life.all.new <- data.frame(gdp.all.new[,33],life_m.all)
rownames(gdp.life.all.new)<-rownames(gdp.all.new)

gdp.life.all.new[,2][gdp.life.all.new[,2] == 0]<- NA
gdp.life.all.new[,1][gdp.life.all.new[,1] == 0]<- NA
gdp.life.all.new <- na.omit(gdp.life.all.new)

gdp.life.all.new.order <- gdp.life.all.new[order(gdp.life.all.new[,1], decreasing = TRUE),]

plot(gdp.life.all.new.order[1:150,1]/1000000000,gdp.life.all.new.order[1:150,2])

# - - - - - - - - - - - - - - - - - - - -
#

jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100) 

plot(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)")

linmod<-lm(gdp.life.all.new.order[1:150,2]~log(gdp.life.all.new.order[1:150,1]))
abline(linmod)

gdp.life.all.new.order.linmod2 <- data.frame(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2])
colnames(gdp.life.all.new.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2), data = gdp.life.all.new.order.linmod2)
lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod2),col="red")

linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.order.linmod2)
lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod3),col="blue")

linmod4 <- lm(y ~ x + I(x^2) + I(x^3) + I(x^4), data = gdp.life.all.new.order.linmod2)
lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod4),col="green")

dev.off()

# - - - - - - - - - - - - - - - - - - - -
#

life_f <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.FE.IN/countries?display=default")
life_f <- life_f[[1]]
life_f <- as.data.frame(life_f)

life_f.all <- life_f[,5]
life_f.all <- as.numeric(as.character(factor(life_f.all)))
life_f.all[is.na(life_f.all)]<-0

gdp.life.all.new.mf <- data.frame(gdp.all.new[,33],life_m.all,life_f.all)
rownames(gdp.life.all.new.mf)<-rownames(gdp.all.new)

gdp.life.all.new.mf[,3][gdp.life.all.new.mf[,3] == 0]<- NA
gdp.life.all.new.mf[,2][gdp.life.all.new.mf[,2] == 0]<- NA
gdp.life.all.new.mf[,1][gdp.life.all.new.mf[,1] == 0]<- NA

gdp.life.all.new.mf <- na.omit(gdp.life.all.new.mf)

gdp.life.all.new.mf.order <- gdp.life.all.new.mf[order(gdp.life.all.new.mf[,1], decreasing = TRUE),]

#jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100)

jpeg(filename = "gdp_life_ex_male_female_nolog.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order[1:100,1],(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order[1:10,1:2],rownames(gdp.life.all.new.mf.order[1:10,]))
dev.off()

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]),
(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2)

jpeg(filename = "gdp_life_ex_male_female_log.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
#text(gdp.life.all.new.mf.order.log[1:10,],rownames(gdp.life.all.new.mf.order[1:10,]))
dev.off()

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]),
(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2)

jpeg(filename = "gdp_life_ex_male_female_log_text.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order.log[1:100,],rownames(gdp.life.all.new.mf.order[1:100,]))
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# plotting

jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100)

plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
points(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],col="green")

gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2])
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")

gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3])
colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y")
linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3)
lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red")

dev.off()

jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100)
plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)")
gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2])
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")
dev.off()

jpeg(filename = "gdp_life_ex_female.jpg", width=880,height=880,res=100)
plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female (years)")
gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3])
colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y")
linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3)
lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red")
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# top 20

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:20,1]),
(gdp.life.all.new.mf.order[1:20,2]+gdp.life.all.new.mf.order[1:20,3])/2)

jpeg(filename = "gdp_life_ex_male_female_gdptop20.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order.log[1:20,],rownames(gdp.life.all.new.mf.order[1:20,]))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# lowest 20 per gdp!

lowest<-gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),]
gdp.life.all.new.mf.order.log<-cbind(log(lowest[,1]),(lowest[,2]+lowest[,3])/2)

jpeg(filename = "gdp_life_ex_male_female_gdplower20.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order.log,rownames(gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),]))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# top 20 countries by life expectancy

gdp.life.all.new.mf.order.log<-cbind((gdp.life.all.new.mf.order[,2]+gdp.life.all.new.mf.order[,3])/2,log(gdp.life.all.new.mf.order[,1]))
rownames(gdp.life.all.new.mf.order.log)<-rownames(gdp.life.all.new.mf.order)
gdp.life.all.new.mf.order.log.new<-gdp.life.all.new.mf.order.log[order(gdp.life.all.new.mf.order.log[,1]),]

head(gdp.life.all.new.mf.order.log.new[,1])
barplot(head(gdp.life.all.new.mf.order.log.new[,1],20),col="blue")
barplot(tail(gdp.life.all.new.mf.order.log.new[,1],10),col="green")


plot(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),rownames(head(gdp.life.all.new.mf.order.log.new,20)))
gdp.life.all.new.mf.order.linmod2 <- data.frame(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20))
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

plot(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),rownames(tail(gdp.life.all.new.mf.order.log.new,20)))
gdp.life.all.new.mf.order.linmod2 <- data.frame(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20))
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

colnames(gdp.life.all.new.mf.order.log.new)<-c("life expectancy","log(gdp)")

tail(gdp.life.all.new.mf.order.log.new,40)
head(gdp.life.all.new.mf.order.log.new,40)

gdp.life.all.new.mf.order.log.new.real<-cbind(gdp.life.all.new.mf.order.log.new[,1],exp(gdp.life.all.new.mf.order.log.new[,2])/100000000)

colnames(gdp.life.all.new.mf.order.log.new.real)<-c("life expectancy","GDP in Billions USD")

tail(gdp.life.all.new.mf.order.log.new.real,40)
head(gdp.life.all.new.mf.order.log.new.real,40)


# - - - - - - - - - - - - - - - - - - - -
# plotting

#install.packages("scatterplot3d")
library("scatterplot3d")

b<-log(gdp.life.all.new.mf.order[1:150,1])
a<-gdp.life.all.new.mf.order[1:150,2]
c<-gdp.life.all.new.mf.order[1:150,3]
ac<-(gdp.life.all.new.mf.order[1:150,2]+gdp.life.all.new.mf.order[1:150,3])/2

jpeg(filename = "gdp_life_ex_male_female_3d.jpg", width=880,height=880,res=100)

s3d<-scatterplot3d(a,b,c,angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
#my1 <- lm(c ~ b)
#my2 <- lm(a ~ b)
#s3d$points3d(fitted(my2),b,fitted(my1), col="blue", type="h", pch=6) 
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9)
s3d$points3d(a,b,c)
my.lm <- lm(c ~ a + b)
s3d$plane3d(my.lm)
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")

dev.off()

jpeg(filename = "gdp_life_ex_male_female_3d_col.jpg", width=880,height=880,res=100)

group<-c(rep(1,15),rep(2,35),rep(3,100))
s3d<-scatterplot3d(a,b,c,color = as.numeric(group),angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9)
my.lm <- lm(c ~ a + b)
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")
s3d$plane3d(my.lm)
dev.off()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

# - - - - - - - - - - - - - - - - - - - -

# gdp and life expectancy

load("gdp_all_new.R")

life_m <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.MA.IN/countries?display=default")

life_m <- life_m[[1]]

life_m <- as.data.frame(life_m)

life_m.all <- life_m[,5]

life_m.all <- as.numeric(as.character(factor(life_m.all)))

life_m.all[is.na(life_m.all)]<-0

gdp.life.all.new <- data.frame(gdp.all.new[,33],life_m.all)

rownames(gdp.life.all.new)<-rownames(gdp.all.new)

gdp.life.all.new[,2][gdp.life.all.new[,2] == 0]<- NA

gdp.life.all.new[,1][gdp.life.all.new[,1] == 0]<- NA

gdp.life.all.new <- na.omit(gdp.life.all.new)

gdp.life.all.new.order <- gdp.life.all.new[order(gdp.life.all.new[,1], decreasing = TRUE),]

plot(gdp.life.all.new.order[1:150,1]/1000000000,gdp.life.all.new.order[1:150,2])

# - - - - - - - - - - - - - - - - - - - -

jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100)

plot(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)")

linmod<-lm(gdp.life.all.new.order[1:150,2]~log(gdp.life.all.new.order[1:150,1]))

abline(linmod)

gdp.life.all.new.order.linmod2 <- data.frame(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2])

colnames(gdp.life.all.new.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2), data = gdp.life.all.new.order.linmod2)

lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod2),col="red")

linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.order.linmod2)

lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod3),col="blue")

linmod4 <- lm(y ~ x + I(x^2) + I(x^3) + I(x^4), data = gdp.life.all.new.order.linmod2)

lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod4),col="green")

dev.off()

# - - - - - - - - - - - - - - - - - - - -

life_f <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.FE.IN/countries?display=default")

life_f <- life_f[[1]]

life_f <- as.data.frame(life_f)

life_f.all <- life_f[,5]

life_f.all <- as.numeric(as.character(factor(life_f.all)))

life_f.all[is.na(life_f.all)]<-0

gdp.life.all.new.mf <- data.frame(gdp.all.new[,33],life_m.all,life_f.all)

rownames(gdp.life.all.new.mf)<-rownames(gdp.all.new)

gdp.life.all.new.mf[,3][gdp.life.all.new.mf[,3] == 0]<- NA

gdp.life.all.new.mf[,2][gdp.life.all.new.mf[,2] == 0]<- NA

gdp.life.all.new.mf[,1][gdp.life.all.new.mf[,1] == 0]<- NA

gdp.life.all.new.mf <- na.omit(gdp.life.all.new.mf)

gdp.life.all.new.mf.order <- gdp.life.all.new.mf[order(gdp.life.all.new.mf[,1], decreasing = TRUE),]

#jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100)

jpeg(filename = "gdp_life_ex_male_female_nolog.jpg", width=880,height=880,res=100)

plot(gdp.life.all.new.mf.order[1:100,1],(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

text(gdp.life.all.new.mf.order[1:10,1:2],rownames(gdp.life.all.new.mf.order[1:10,]))

dev.off()

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]),

(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2)

jpeg(filename = "gdp_life_ex_male_female_log.jpg", width=880,height=880,res=100)

plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

#text(gdp.life.all.new.mf.order.log[1:10,],rownames(gdp.life.all.new.mf.order[1:10,]))

dev.off()

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]),

(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2)

jpeg(filename = "gdp_life_ex_male_female_log_text.jpg", width=880,height=880,res=100)

plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

text(gdp.life.all.new.mf.order.log[1:100,],rownames(gdp.life.all.new.mf.order[1:100,]))

dev.off()

# - - - - - - - - - - - - - - - - - - - -

# plotting

jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100)

plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

points(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],col="green")

gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2])

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")

gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3])

colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y")

linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3)

lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red")

dev.off()

jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100)

plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)")

gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2])

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")

dev.off()

jpeg(filename = "gdp_life_ex_female.jpg", width=880,height=880,res=100)

plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female (years)")

gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3])

colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y")

linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3)

lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red")

dev.off()

# - - - - - - - - - - - - - - - - - - - -

# top 20

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:20,1]),

(gdp.life.all.new.mf.order[1:20,2]+gdp.life.all.new.mf.order[1:20,3])/2)

jpeg(filename = "gdp_life_ex_male_female_gdptop20.jpg", width=880,height=880,res=100)

plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

text(gdp.life.all.new.mf.order.log[1:20,],rownames(gdp.life.all.new.mf.order[1:20,]))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")

dev.off()

# - - - - - - - - - - - - - - - - - - - -

# lowest 20 per gdp!

lowest<-gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),]

gdp.life.all.new.mf.order.log<-cbind(log(lowest[,1]),(lowest[,2]+lowest[,3])/2)

jpeg(filename = "gdp_life_ex_male_female_gdplower20.jpg", width=880,height=880,res=100)

plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

text(gdp.life.all.new.mf.order.log,rownames(gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),]))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")

dev.off()

# - - - - - - - - - - - - - - - - - - - -

# top 20 countries by life expectancy

gdp.life.all.new.mf.order.log<-cbind((gdp.life.all.new.mf.order[,2]+gdp.life.all.new.mf.order[,3])/2,log(gdp.life.all.new.mf.order[,1]))

rownames(gdp.life.all.new.mf.order.log)<-rownames(gdp.life.all.new.mf.order)

gdp.life.all.new.mf.order.log.new<-gdp.life.all.new.mf.order.log[order(gdp.life.all.new.mf.order.log[,1]),]

head(gdp.life.all.new.mf.order.log.new[,1])

barplot(head(gdp.life.all.new.mf.order.log.new[,1],20),col="blue")

barplot(tail(gdp.life.all.new.mf.order.log.new[,1],10),col="green")

plot(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

text(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),rownames(head(gdp.life.all.new.mf.order.log.new,20)))

gdp.life.all.new.mf.order.linmod2 <- data.frame(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20))

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

plot(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

text(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),rownames(tail(gdp.life.all.new.mf.order.log.new,20)))

gdp.life.all.new.mf.order.linmod2 <- data.frame(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20))

colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)

lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

colnames(gdp.life.all.new.mf.order.log.new)<-c("life expectancy","log(gdp)")

tail(gdp.life.all.new.mf.order.log.new,40)

head(gdp.life.all.new.mf.order.log.new,40)

gdp.life.all.new.mf.order.log.new.real<-cbind(gdp.life.all.new.mf.order.log.new[,1],exp(gdp.life.all.new.mf.order.log.new[,2])/100000000)

colnames(gdp.life.all.new.mf.order.log.new.real)<-c("life expectancy","GDP in Billions USD")

tail(gdp.life.all.new.mf.order.log.new.real,40)

head(gdp.life.all.new.mf.order.log.new.real,40)

# - - - - - - - - - - - - - - - - - - - -

# plotting

#install.packages("scatterplot3d")

library("scatterplot3d")

b<-log(gdp.life.all.new.mf.order[1:150,1])

a<-gdp.life.all.new.mf.order[1:150,2]

c<-gdp.life.all.new.mf.order[1:150,3]

ac<-(gdp.life.all.new.mf.order[1:150,2]+gdp.life.all.new.mf.order[1:150,3])/2

jpeg(filename = "gdp_life_ex_male_female_3d.jpg", width=880,height=880,res=100)

s3d<-scatterplot3d(a,b,c,angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

#my1 <- lm(c ~ b)

#my2 <- lm(a ~ b)

#s3d$points3d(fitted(my2),b,fitted(my1), col="blue", type="h", pch=6)

s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9)

s3d$points3d(a,b,c)

my.lm <- lm(c ~ a + b)

s3d$plane3d(my.lm)

s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")

dev.off()

jpeg(filename = "gdp_life_ex_male_female_3d_col.jpg", width=880,height=880,res=100)

group<-c(rep(1,15),rep(2,35),rep(3,100))

s3d<-scatterplot3d(a,b,c,color = as.numeric(group),angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)")

s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9)

my.lm <- lm(c ~ a + b)

s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")

s3d$plane3d(my.lm)

dev.off()

ooRexx with BSF4ooRexx – “java,net.URL” Classes (2_getinfo.rxj)

08/05/2014Code, Content Syndication, ooRexx, ProgrammingMartin Stoppacher

/* ***************************************************************************** */
/* This is just a short example of implementing the  "java,net.URL" classes      */
/* using BSF4ooRexx                                                              */   
/* created by Martin Stoppacher    26.12.2009                                    */
/* ***************************************************************************** */

say hello this reads a syndfeed
say please type in the url
url= "http://rss.orf.at/fm4.xml"

f=.bsf~new("java.net.URL", url) /* creating a java url object with the above url */

say f~getAuthority()                      /* gets the authority part of this URL */
say f~getDefaultPort()           /* gets the default port number of the protocol */
                                                     /* associated with this URL */
say f~getPort()                              /* gets the port number of this URL */
say f~getFile()                                /* gets the file name of this URL */
say f~getHost()                 /* gets the host name of this URL, if applicable */
say f~getProtocol()                        /* gets the protocol name of this URL */
say f~getQuery()                              /* gets the query part of this URL */
say f~hashCode()          /* creates an integer suitable for hash table indexing */

::requires BSF.cls                            /* get the Java support for ooRexx */

/* ***************************************************************************** */

/* This is just a short example of implementing the "java,net.URL" classes */

/* using BSF4ooRexx */

/* created by Martin Stoppacher 26.12.2009 */

/* ***************************************************************************** */

say hello this reads a syndfeed

say please type in the url

url= "http://rss.orf.at/fm4.xml"

f=.bsf~new("java.net.URL", url) /* creating a java url object with the above url */

say f~getAuthority() /* gets the authority part of this URL */

say f~getDefaultPort() /* gets the default port number of the protocol */

/* associated with this URL */

say f~getPort() /* gets the port number of this URL */

say f~getFile() /* gets the file name of this URL */

say f~getHost() /* gets the host name of this URL, if applicable */

say f~getProtocol() /* gets the protocol name of this URL */

say f~getQuery() /* gets the query part of this URL */

say f~hashCode() /* creates an integer suitable for hash table indexing */

::requires BSF.cls /* get the Java support for ooRexx */