# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # gdp_world.R rm(list = ls(all = TRUE)) # clear current workspace # setwd("/Users/martinstoppacher/R Analysis/") # - - - - - - - - - - - - - - - - - - - - # additional packages #install.packages("XML") #install.packages("gridExtra") library("XML") library("gridExtra") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Download World GDP Data http://data.worldbank.org/ gdp6 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?display=default") gdp5 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=1&display=default") gdp4 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=2&display=default") gdp3 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=3&display=default") gdp2 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=4&display=default") gdp1 <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=5&display=default") gdp <- readHTMLTable("http://data.worldbank.org/indicator/NY.GDP.MKTP.CD/countries/1W?page=6&display=default") gdp <- gdp[[1]] gdp <- as.data.frame(gdp) gdp.all <- gdp[,1:5] gdp1 <- gdp1[[1]] gdp1 <- as.data.frame(gdp1) gdp.all <- cbind(gdp.all,gdp1[,2:6]) gdp2 <- gdp2[[1]] gdp2 <- as.data.frame(gdp2) gdp.all <- cbind(gdp.all,gdp2[,2:6]) gdp3 <- gdp3[[1]] gdp3 <- as.data.frame(gdp3) gdp.all <- cbind(gdp.all,gdp3[,2:6]) gdp4 <- gdp4[[1]] gdp4 <- as.data.frame(gdp4) gdp.all <- cbind(gdp.all,gdp4[,2:6]) gdp5 <- gdp5[[1]] gdp5 <- as.data.frame(gdp5) gdp.all <- cbind(gdp.all,gdp5[,2:6]) gdp6 <- gdp6[[1]] gdp6 <- as.data.frame(gdp6) gdp.all <- cbind(gdp.all,gdp6[,2:5]) #save(gdp.all,file="gdp_all.R") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Data cleaning load("gdp_all.R") gdp.all.new <- data.frame() for(e in 1:length(gdp.all[,1])){ p<-NULL for(i in 2:length(gdp.all[1,])){ p[i-1]<-as.numeric(gsub(",","",gdp.all[e,i])) } p[is.na(p)]<-0 gdp.all.new<-rbind(gdp.all.new,p) } gdp.all.new <- cbind(as.character(gdp.all[,1]),gdp.all.new) colnames(gdp.all.new)<-c("Coutry","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990" ,"1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002" ,"2003","2004","2005","2006","2007","2008","2009","2010","2011","2012") rownames(gdp.all.new) <- as.character(gdp.all.new[,1]) gdp.all.new <- gdp.all.new[,2:34] jpeg(filename = "gdp_1980-2012_data.jpg", width=1280,height=280,res=100) grid.table(head(gdp.all.new[,1:10])) dev.off() #save(gdp.all.new,file="gdp_all_new.R") setwd("/Users/martinstoppacher/R Analysis/world gdp development/") gdp.all.new setwd("../") setwd("/Users/martinstoppacher/R Analysis/gdp and life expectancy/")
# - - - - - - - - - - - - - - - - - - - - # gdp and life expectancy load("gdp_all_new.R") life_m <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.MA.IN/countries?display=default") life_m <- life_m[[1]] life_m <- as.data.frame(life_m) life_m.all <- life_m[,5] life_m.all <- as.numeric(as.character(factor(life_m.all))) life_m.all[is.na(life_m.all)]<-0 gdp.life.all.new <- data.frame(gdp.all.new[,33],life_m.all) rownames(gdp.life.all.new)<-rownames(gdp.all.new) gdp.life.all.new[,2][gdp.life.all.new[,2] == 0]<- NA gdp.life.all.new[,1][gdp.life.all.new[,1] == 0]<- NA gdp.life.all.new <- na.omit(gdp.life.all.new) gdp.life.all.new.order <- gdp.life.all.new[order(gdp.life.all.new[,1], decreasing = TRUE),] plot(gdp.life.all.new.order[1:150,1]/1000000000,gdp.life.all.new.order[1:150,2]) # - - - - - - - - - - - - - - - - - - - - # jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100) plot(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)") linmod<-lm(gdp.life.all.new.order[1:150,2]~log(gdp.life.all.new.order[1:150,1])) abline(linmod) gdp.life.all.new.order.linmod2 <- data.frame(log(gdp.life.all.new.order[1:150,1]),gdp.life.all.new.order[1:150,2]) colnames(gdp.life.all.new.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x + I(x^2), data = gdp.life.all.new.order.linmod2) lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod2),col="red") linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.order.linmod2) lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod3),col="blue") linmod4 <- lm(y ~ x + I(x^2) + I(x^3) + I(x^4), data = gdp.life.all.new.order.linmod2) lines(gdp.life.all.new.order.linmod2[,1],fitted(linmod4),col="green") dev.off() # - - - - - - - - - - - - - - - - - - - - # life_f <- readHTMLTable("http://data.worldbank.org/indicator/SP.DYN.LE00.FE.IN/countries?display=default") life_f <- life_f[[1]] life_f <- as.data.frame(life_f) life_f.all <- life_f[,5] life_f.all <- as.numeric(as.character(factor(life_f.all))) life_f.all[is.na(life_f.all)]<-0 gdp.life.all.new.mf <- data.frame(gdp.all.new[,33],life_m.all,life_f.all) rownames(gdp.life.all.new.mf)<-rownames(gdp.all.new) gdp.life.all.new.mf[,3][gdp.life.all.new.mf[,3] == 0]<- NA gdp.life.all.new.mf[,2][gdp.life.all.new.mf[,2] == 0]<- NA gdp.life.all.new.mf[,1][gdp.life.all.new.mf[,1] == 0]<- NA gdp.life.all.new.mf <- na.omit(gdp.life.all.new.mf) gdp.life.all.new.mf.order <- gdp.life.all.new.mf[order(gdp.life.all.new.mf[,1], decreasing = TRUE),] #jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100) jpeg(filename = "gdp_life_ex_male_female_nolog.jpg", width=880,height=880,res=100) plot(gdp.life.all.new.mf.order[1:100,1],(gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") text(gdp.life.all.new.mf.order[1:10,1:2],rownames(gdp.life.all.new.mf.order[1:10,])) dev.off() gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]), (gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2) jpeg(filename = "gdp_life_ex_male_female_log.jpg", width=880,height=880,res=100) plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") #text(gdp.life.all.new.mf.order.log[1:10,],rownames(gdp.life.all.new.mf.order[1:10,])) dev.off() gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:100,1]), (gdp.life.all.new.mf.order[1:100,2]+gdp.life.all.new.mf.order[1:100,3])/2) jpeg(filename = "gdp_life_ex_male_female_log_text.jpg", width=880,height=880,res=100) plot(gdp.life.all.new.mf.order.log,xlab="GDP per country",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") text(gdp.life.all.new.mf.order.log[1:100,],rownames(gdp.life.all.new.mf.order[1:100,])) dev.off() # - - - - - - - - - - - - - - - - - - - - # plotting jpeg(filename = "gdp_life_ex_male_female.jpg", width=880,height=880,res=100) plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") points(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],col="green") gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2]) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue") gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3]) colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y") linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3) lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red") dev.off() jpeg(filename = "gdp_life_ex_male.jpg", width=880,height=880,res=100) plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, male (years)") gdp.life.all.new.mf.order.linmod2 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,2]) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue") dev.off() jpeg(filename = "gdp_life_ex_female.jpg", width=880,height=880,res=100) plot(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3],xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female (years)") gdp.life.all.new.mf.order.linmod3 <- data.frame(log(gdp.life.all.new.mf.order[1:150,1]),gdp.life.all.new.mf.order[1:150,3]) colnames(gdp.life.all.new.mf.order.linmod3)<-c("x","y") linmod3 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod3) lines(gdp.life.all.new.mf.order.linmod3[,1],fitted(linmod3),col="red") dev.off() # - - - - - - - - - - - - - - - - - - - - # top 20 gdp.life.all.new.mf.order.log<-cbind(log(gdp.life.all.new.mf.order[1:20,1]), (gdp.life.all.new.mf.order[1:20,2]+gdp.life.all.new.mf.order[1:20,3])/2) jpeg(filename = "gdp_life_ex_male_female_gdptop20.jpg", width=880,height=880,res=100) plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") text(gdp.life.all.new.mf.order.log[1:20,],rownames(gdp.life.all.new.mf.order[1:20,])) gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2)) gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue") dev.off() # - - - - - - - - - - - - - - - - - - - - # lowest 20 per gdp! lowest<-gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),] gdp.life.all.new.mf.order.log<-cbind(log(lowest[,1]),(lowest[,2]+lowest[,3])/2) jpeg(filename = "gdp_life_ex_male_female_gdplower20.jpg", width=880,height=880,res=100) plot(gdp.life.all.new.mf.order.log,xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") text(gdp.life.all.new.mf.order.log,rownames(gdp.life.all.new.mf.order[(length(gdp.life.all.new.mf.order[,1])-20):length(gdp.life.all.new.mf.order[,1]),])) gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2)) gdp.life.all.new.mf.order.linmod2 <- data.frame(gdp.life.all.new.mf.order.log) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x + I(x^2) + I(x^3), data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2),col="blue") dev.off() # - - - - - - - - - - - - - - - - - - - - # top 20 countries by life expectancy gdp.life.all.new.mf.order.log<-cbind((gdp.life.all.new.mf.order[,2]+gdp.life.all.new.mf.order[,3])/2,log(gdp.life.all.new.mf.order[,1])) rownames(gdp.life.all.new.mf.order.log)<-rownames(gdp.life.all.new.mf.order) gdp.life.all.new.mf.order.log.new<-gdp.life.all.new.mf.order.log[order(gdp.life.all.new.mf.order.log[,1]),] head(gdp.life.all.new.mf.order.log.new[,1]) barplot(head(gdp.life.all.new.mf.order.log.new[,1],20),col="blue") barplot(tail(gdp.life.all.new.mf.order.log.new[,1],10),col="green") plot(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") text(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20),rownames(head(gdp.life.all.new.mf.order.log.new,20))) gdp.life.all.new.mf.order.linmod2 <- data.frame(head(gdp.life.all.new.mf.order.log.new[,2],20),head(gdp.life.all.new.mf.order.log.new[,1],20)) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2)) plot(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),xlab="log(GDP per country)",ylab="life expectancy",main="GDP per Country vs. Life expectancy at birth, female & male (years)") text(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20),rownames(tail(gdp.life.all.new.mf.order.log.new,20))) gdp.life.all.new.mf.order.linmod2 <- data.frame(tail(gdp.life.all.new.mf.order.log.new[,2],20),tail(gdp.life.all.new.mf.order.log.new[,1],20)) colnames(gdp.life.all.new.mf.order.linmod2)<-c("x","y") linmod2 <- lm(y ~ x, data = gdp.life.all.new.mf.order.linmod2) lines(gdp.life.all.new.mf.order.linmod2[,1],fitted(linmod2)) colnames(gdp.life.all.new.mf.order.log.new)<-c("life expectancy","log(gdp)") tail(gdp.life.all.new.mf.order.log.new,40) head(gdp.life.all.new.mf.order.log.new,40) gdp.life.all.new.mf.order.log.new.real<-cbind(gdp.life.all.new.mf.order.log.new[,1],exp(gdp.life.all.new.mf.order.log.new[,2])/100000000) colnames(gdp.life.all.new.mf.order.log.new.real)<-c("life expectancy","GDP in Billions USD") tail(gdp.life.all.new.mf.order.log.new.real,40) head(gdp.life.all.new.mf.order.log.new.real,40) # - - - - - - - - - - - - - - - - - - - - # plotting #install.packages("scatterplot3d") library("scatterplot3d") b<-log(gdp.life.all.new.mf.order[1:150,1]) a<-gdp.life.all.new.mf.order[1:150,2] c<-gdp.life.all.new.mf.order[1:150,3] ac<-(gdp.life.all.new.mf.order[1:150,2]+gdp.life.all.new.mf.order[1:150,3])/2 jpeg(filename = "gdp_life_ex_male_female_3d.jpg", width=880,height=880,res=100) s3d<-scatterplot3d(a,b,c,angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)") #my1 <- lm(c ~ b) #my2 <- lm(a ~ b) #s3d$points3d(fitted(my2),b,fitted(my1), col="blue", type="h", pch=6) s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9) s3d$points3d(a,b,c) my.lm <- lm(c ~ a + b) s3d$plane3d(my.lm) s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l") s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l") dev.off() jpeg(filename = "gdp_life_ex_male_female_3d_col.jpg", width=880,height=880,res=100) group<-c(rep(1,15),rep(2,35),rep(3,100)) s3d<-scatterplot3d(a,b,c,color = as.numeric(group),angle= 70,type="p",xlab="male",zlab="female",ylab="GDP",main="GDP per Country vs. Life expectancy at birth, female & male (years)") s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="h", pch=9) my.lm <- lm(c ~ a + b) s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l") s3d$points3d(fitted(linmod2),b,fitted(linmod3), col="red", type="l") s3d$plane3d(my.lm) dev.off()