X

GDP and Life expectancy

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# gdp_world.R

rm(list = ls(all = TRUE))                                   #  clear current workspace  #
setwd("/Users/martinstoppacher/R Analysis/")

# - - - - - - - - - - - - - - - - - - - -
# additional packages

#install.packages("XML")
#install.packages("gridExtra")
library("XML")
library("gridExtra")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Download World GDP Data http://data.worldbank.org/

gdp6 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?display=default")
gdp5 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=1&display=default")
gdp4 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=2&display=default")
gdp3 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=3&display=default")
gdp2 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=4&display=default")
gdp1 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=5&display=default")

gdp <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=6&display=default")
gdp <- gdp[[1]]
gdp <- as.data.frame(gdp)
gdp.all <- gdp[,1:5]

gdp1 <- gdp1[[1]]
gdp1 <- as.data.frame(gdp1)
gdp.all <- cbind(gdp.all,gdp1[,2:6])

gdp2 <- gdp2[[1]]
gdp2 <- as.data.frame(gdp2)
gdp.all <- cbind(gdp.all,gdp2[,2:6])

gdp3 <- gdp3[[1]]
gdp3 <- as.data.frame(gdp3)
gdp.all <- cbind(gdp.all,gdp3[,2:6])

gdp4 <- gdp4[[1]]
gdp4 <- as.data.frame(gdp4)
gdp.all <- cbind(gdp.all,gdp4[,2:6])

gdp5 <- gdp5[[1]]
gdp5 <- as.data.frame(gdp5)
gdp.all <- cbind(gdp.all,gdp5[,2:6])

gdp6 <- gdp6[[1]]
gdp6 <- as.data.frame(gdp6)
gdp.all <- cbind(gdp.all,gdp6[,2:5])

#save(gdp.all,file="gdp_all.R")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Data cleaning

load("gdp_all.R")

gdp.all.new <- data.frame()

for(e in 1:length(gdp.all[,1])){
  p<-NULL
    for(i in 2:length(gdp.all[1,])){
      p[i-1]<-as.numeric(gsub(",","",gdp.all[e,i]))
      }
  p[is.na(p)]<-0
  gdp.all.new<-rbind(gdp.all.new,p)
}

gdp.all.new <- cbind(as.character(gdp.all[,1]),gdp.all.new)

colnames(gdp.all.new)<-c("Coutry","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990"
                        ,"1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002"
                        ,"2003","2004","2005","2006","2007","2008","2009","2010","2011","2012")

rownames(gdp.all.new) <- as.character(gdp.all.new[,1])
gdp.all.new <- gdp.all.new[,2:34]

jpeg(filename = "gdp_1980-2012_data.jpg", width=1280,height=280,res=100) 
grid.table(head(gdp.all.new[,1:10])) 
dev.off()

#save(gdp.all.new,file="gdp_all_new.R")
setwd("/Users/martinstoppacher/R Analysis/world gdp development/")
gdp.all.new
setwd("../")
setwd("/Users/martinstoppacher/R Analysis/gdp and life expectancy/")

 

# - - - - - - - - - - - - - - - - - - - -
# gdp and life expectancy

load("gdp_all_new.R")

life_m <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.MA.IN/countries?display=default")
life_m <- life_m[[1]]
life_m <- as.data.frame(life_m)

life_m.all <- life_m[,5]
life_m.all <- as.numeric(as.character(factor(life_m.all)))
life_m.all[is.na(life_m.all)]<-0

gdp.life.all.new <- data.frame(gdp.all.new[,33],life_m.all)
rownames(gdp.life.all.new)<-rownames(gdp.all.new)

gdp.life.all.new[,2][gdp.life.all.new[,2] == 0]<- NA
gdp.life.all.new[,1][gdp.life.all.new[,1] == 0]<- NA
gdp.life.all.new <- na.omit(gdp.life.all.new)

gdp.life.all.new.order <- gdp.life.all.new[order(gdp.life.all.new[,1], decreasing = TRUE),]

plot(gdp.life.all.new.order[1:150,1]/1000000000,gdp.life.all.new.order[1:150,2])

# - - - - - - - - - - - - - - - - - - - -
#

jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100) 

plot(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)")

linmod<-lm(gdp.life.all.new.order[1:150,2]~log(gdp.life.all.new.order[1:150,1]))
abline(linmod)

gdp.life.all.new.order.linmod2 <- data.frame(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2])
colnames(gdp.life.all.new.order.linmod2)<-c("x","y")

linmod2 <- lm(y ~ x + I(x^2), data = gdp.life.all.new.order.linmod2)
lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod2),col="red")

linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.order.linmod2)
lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod3),col="blue")

linmod4 <- lm(y ~ x + I(x^2) + I(x^3) + I(x^4), data = gdp.life.all.new.order.linmod2)
lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod4),col="green")

dev.off()

# - - - - - - - - - - - - - - - - - - - -
#

life_f <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.FE.IN/countries?display=default")
life_f <- life_f[[1]]
life_f <- as.data.frame(life_f)

life_f.all <- life_f[,5]
life_f.all <- as.numeric(as.character(factor(life_f.all)))
life_f.all[is.na(life_f.all)]<-0

gdp.life.all.new.mf <- data.frame(gdp.all.new[,33],life_m.all,life_f.all)
rownames(gdp.life.all.new.mf)<-rownames(gdp.all.new)

gdp.life.all.new.mf[,3][gdp.life.all.new.mf[,3] == 0]<- NA
gdp.life.all.new.mf[,2][gdp.life.all.new.mf[,2] == 0]<- NA
gdp.life.all.new.mf[,1][gdp.life.all.new.mf[,1] == 0]<- NA

gdp.life.all.new.mf <- na.omit(gdp.life.all.new.mf)

gdp.life.all.new.mf.order <- gdp.life.all.new.mf[order(gdp.life.all.new.mf[,1], decreasing = TRUE),]

#jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100)

jpeg(filename = "gdp_life_ex_male_female_nolog.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order[1:100,1],(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order[1:10,1:2],rownames(gdp.life.all.new.mf.order[1:10,]))
dev.off()

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]),
(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2)

jpeg(filename = "gdp_life_ex_male_female_log.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
#text(gdp.life.all.new.mf.order.log[1:10,],rownames(gdp.life.all.new.mf.order[1:10,]))
dev.off()

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]),
(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2)

jpeg(filename = "gdp_life_ex_male_female_log_text.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order.log[1:100,],rownames(gdp.life.all.new.mf.order[1:100,]))
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# plotting

jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100)

plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
points(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],col="green")

gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2])
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")

gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3])
colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y")
linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3)
lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red")

dev.off()

jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100)
plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)")
gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2])
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")
dev.off()

jpeg(filename = "gdp_life_ex_female.jpg", width=880,height=880,res=100)
plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female (years)")
gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3])
colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y")
linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3)
lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red")
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# top 20

gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:20,1]),
(gdp.life.all.new.mf.order[1:20,2]+gdp.life.all.new.mf.order[1:20,3])/2)

jpeg(filename = "gdp_life_ex_male_female_gdptop20.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order.log[1:20,],rownames(gdp.life.all.new.mf.order[1:20,]))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# lowest 20 per gdp!

lowest<-gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),]
gdp.life.all.new.mf.order.log<-cbind(log(lowest[,1]),(lowest[,2]+lowest[,3])/2)

jpeg(filename = "gdp_life_ex_male_female_gdplower20.jpg", width=880,height=880,res=100)
plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(gdp.life.all.new.mf.order.log,rownames(gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),]))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log)
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue")
dev.off()

# - - - - - - - - - - - - - - - - - - - -
# top 20 countries by life expectancy

gdp.life.all.new.mf.order.log<-cbind((gdp.life.all.new.mf.order[,2]+gdp.life.all.new.mf.order[,3])/2,log(gdp.life.all.new.mf.order[,1]))
rownames(gdp.life.all.new.mf.order.log)<-rownames(gdp.life.all.new.mf.order)
gdp.life.all.new.mf.order.log.new<-gdp.life.all.new.mf.order.log[order(gdp.life.all.new.mf.order.log[,1]),]

head(gdp.life.all.new.mf.order.log.new[,1])
barplot(head(gdp.life.all.new.mf.order.log.new[,1],20),col="blue")
barplot(tail(gdp.life.all.new.mf.order.log.new[,1],10),col="green")


plot(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),rownames(head(gdp.life.all.new.mf.order.log.new,20)))
gdp.life.all.new.mf.order.linmod2 <- data.frame(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20))
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

plot(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
text(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),rownames(tail(gdp.life.all.new.mf.order.log.new,20)))
gdp.life.all.new.mf.order.linmod2 <- data.frame(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20))
colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y")
linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2)
lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2))

colnames(gdp.life.all.new.mf.order.log.new)<-c("life expectancy","log(gdp)")

tail(gdp.life.all.new.mf.order.log.new,40)
head(gdp.life.all.new.mf.order.log.new,40)

gdp.life.all.new.mf.order.log.new.real<-cbind(gdp.life.all.new.mf.order.log.new[,1],exp(gdp.life.all.new.mf.order.log.new[,2])/100000000)

colnames(gdp.life.all.new.mf.order.log.new.real)<-c("life expectancy","GDP in Billions USD")

tail(gdp.life.all.new.mf.order.log.new.real,40)
head(gdp.life.all.new.mf.order.log.new.real,40)


# - - - - - - - - - - - - - - - - - - - -
# plotting

#install.packages("scatterplot3d")
library("scatterplot3d")

b<-log(gdp.life.all.new.mf.order[1:150,1])
a<-gdp.life.all.new.mf.order[1:150,2]
c<-gdp.life.all.new.mf.order[1:150,3]
ac<-(gdp.life.all.new.mf.order[1:150,2]+gdp.life.all.new.mf.order[1:150,3])/2

jpeg(filename = "gdp_life_ex_male_female_3d.jpg", width=880,height=880,res=100)

s3d<-scatterplot3d(a,b,c,angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
#my1 <- lm(c ~ b)
#my2 <- lm(a ~ b)
#s3d$points3d(fitted(my2),b,fitted(my1), col="blue", type="h", pch=6) 
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9)
s3d$points3d(a,b,c)
my.lm <- lm(c ~ a + b)
s3d$plane3d(my.lm)
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")

dev.off()

jpeg(filename = "gdp_life_ex_male_female_3d_col.jpg", width=880,height=880,res=100)

group<-c(rep(1,15),rep(2,35),rep(3,100))
s3d<-scatterplot3d(a,b,c,color = as.numeric(group),angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)")
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9)
my.lm <- lm(c ~ a + b)
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")
s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l")
s3d$plane3d(my.lm)
dev.off()

 

Martin Stoppacher: