X

Basic R commands – 1

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Basic Commands and Statistics with R

rm(list = ls(all = TRUE))
getwd()
#system("ls")
setwd("~/ownCloud/STA_Statistics/basicR/")

search()

options(scipen=100)   # scientific off
options(scipen=0)     # scientific on
options(digits = 3)     

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/mtcars.html

attach(mtcars)
search()
detach(mtcars)
search()
mtcars         # The data was extracted from the 1974 Motor Trend US magazine 
# ?mtcars

class(mtcars)  # determine the class of an object

str(mtcars)    # Compactly display the internal structure of an R object

# 'data.frame':	32 obs. of  11 variables:

#                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
# Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
# Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
# Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
# Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
# Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
# Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
# Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
# Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
# Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
# Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
# Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
# Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
# Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
# Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
# Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
# Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
# Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
# Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
# Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
# Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
# Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
# Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
# AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
# Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
# Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
# Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
# Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
# Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
# Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
# Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
# Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
# Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

# A data frame with 32 observations on 11 (numeric) variables.

#[, 1]	mpg	Miles/(US) gallon
#[, 2]	cyl	Number of cylinders
#[, 3]	disp	Displacement (cu.in.)
#[, 4]	hp	Gross horsepower
#[, 5]	drat	Rear axle ratio
#[, 6]	wt	Weight (1000 lbs)
#[, 7]	qsec	1/4 mile time
#[, 8]	vs	Engine (0 = V-shaped, 1 = straight)
#[, 9]	am	Transmission (0 = automatic, 1 = manual)
#[,10]	gear	Number of forward gears
#[,11]	carb	Number of carburetors

colnames(mtcars) <- c("mpg","cyl","disp","hp","drat","wt",
                      "qsec","vs","am","gear","carb")

# colnames(mtcars) <- c("miles-per-gallon","cylinders","displacement",
#                       "horsepower","rear-axle-ratio","weight","qsec",
#                       "engine-vs","Transmissionam","gear","carburetors")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# data structures

letters
l <- letters
str(l)
# chr [1:26]  "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" 
#             "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 
# Vectors

t <- c(1:3,"Hello",NA,FALSE,TRUE)
t
# [1] "1"     "2"     "3"     "Hello" NA      "FALSE" "TRUE" 
str(t)
# chr [1:7] "1" "2" "3" "Hello" NA "FALSE" "TRUE"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Factors

mtcars$am[mtcars$am==1]
which(mtcars$am==1)

am.names <- mtcars$am
am.names[which(am.names==1)] <- "Automatic"
am.names
am.names[which(am.names==0)] <- "Manual"
am.names

am.names.factor <- factor(am.names)
am.names.factor
# [1] Automatic Automatic Automatic Manual    Manual  
# Levels: Automatic Manual

# internally stored as a table of:
# 1 Automatic
# 2 Manual

# Automatic and Manual are the levels of the factor 
levels(am.names.factor)

relevel(am.names.factor, ref="Manual")
# [1] Automatic Automatic Automatic Manual    Manual  
# Levels: Manual Automatic

am.names.factor[1]
as.character(am.names.factor[1])
as.numeric(am.names.factor[1])  # numeric takes the faktor not the level
levels(am.names.factor)[1]      # can be used to get the factor

table(am.names.factor)

is.factor(am.names.factor)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 
# Data Frames

mtcars
str(mtcars)
# 'data.frame':	32 obs. of  11 variables:

df <- data.frame(1:20,60:41,letters[1:20])
df
colnames(df) <- c("n1","n2","letters")
df
t(df)

df$letters      # get the vectors from the dataframe
df$n1
df[,2]
df[1,3]
df[[3]]
levels(df$letters)
is.factor(df[,3])

df <- data.frame(1:20,60:41,letters[1:20], stringsAsFactors = FALSE)
df
is.factor(df[,3])

trees
mtcars
USMortality 
data()    # list of data associated with all current packages in the serch path

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Matrix

as.matrix(mtcars)
mtcars.mat <- as.matrix(mtcars) # all elements of a matrix have the same mode 
                                #(numeric, character)
x <- as.vector(mtcars.mat)      # all in order in the vector X
x

mat <- matrix(1:100,nco=10)
mat
as.vector(mat)

mat44 <- matrix(1:(4*4),nco=4)
mat44
as.vector(mat44)

dimnames(mtcars.mat)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Arrays

x <- 1:10000             # this is a matrix
dim(x) <- c(100,100)
x

x <- 1:1000              # array - a matrix with more than 2 dim
dim(x) <- c(10,10,10)
x

x <- 1:(2*3*3)           # array - a matrix with more than 2 dim
dim(x) <- c(2,3,3)
x

str(x)
# int [1:2, 1:3, 1:3] 1 2 3 4 5 6 7 8 9 10 ...

x <- letters[1:(2*3*3)]           # array - a matrix with more than 2 dim
dim(x) <- c(2,3,3)
x
str(x)
# chr [1:2, 1:3, 1:3] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" 
#                     "m" "n" "o" "p" "q" "r"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Lists - collect different types of data objects

data()

l <- list(mtcars,barley,environmental,ethanol,melanoma)
                                            # most data from lattice package
str(l)

# List of 5
# $ :'data.frame':	32 obs. of  11 variables:
# ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
# ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ...
# ..$ disp: num [1:32] 160 160 108 258 360 ...
# ..$ hp  : num [1:32] 110 110 93 110 175 105 245 62 95 123 ...
# ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
# ..$ wt  : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
# ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ...
# ..$ vs  : num [1:32] 0 0 1 1 0 1 0 1 1 1 ...
# ..$ am  : num [1:32] 1 1 1 0 0 0 0 0 0 0 ...
# ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ...
# ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ...
# $ :'data.frame':	120 obs. of  4 variables:
# ..$ yield  : num [1:120] 27 48.9 27.4 39.9 33 ...
# ..$ variety: Factor w/10 levels "Svansota","No.462",..: 3 3 3 3 3 3 7 7 7 7...
# ..$ year   : Factor w/2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2...
# ..$ site   : Factor w/6 levels "Grand Rapids",..: 3 6 4 5 1 2 3 6 4 5...
# $ :'data.frame':	111 obs. of  4 variables:
# ..$ ozone      : num [1:111] 41 36 12 18 23 19 8 16 11 14 ...
# ..$ radiation  : num [1:111] 190 118 149 313 299 99 19 256 290 274 ...
# ..$ temperature: num [1:111] 67 72 74 62 65 59 61 69 66 68 ...
# ..$ wind       : num [1:111] 7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
# $ :'data.frame':	88 obs. of  3 variables:
# ..$ NOx: num [1:88] 3.74 2.29 1.5 2.88 0.76 ...
# ..$ C  : num [1:88] 12 12 12 12 12 9 9 9 12 12 ...
# ..$ E  : num [1:88] 0.907 0.761 1.108 1.016 1.189 ...
# $ :'data.frame':	37 obs. of  2 variables:
# ..$ year     : num [1:37] 1936 1937 1938 1939 1940 ...
# ..$ incidence: num [1:37] 0.9 0.8 0.8 1.3 1.4 1.2 1.7 1.8 1.6 1.5 ...

l[[1]]
l[[2]]
l[[3]]

l[[1]]

model <- lm(mpg~wt,data=l[[1]])   # building a model from a list object
names(model) 
str(model)                        # the model itselfe is returned as a list

model$coefficients
model[[1]]

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# some basic R functions

mtcars$mpg
# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...
mtcars$mpg^2
# [1]  441.00  441.00 519.84 457.96 349.69 327.61 204.49  595.36  519.84  ...
sqrt(mtcars$mpg^2)
# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...

pi
# [1] 3.141593
90*pi/180 # radiant conversion
sin(90*pi/180)
cos(90*pi/180)
cos(0*pi/180)

celsius <- 20
9/5*celsius+32

celsius <- -20:40
fahrenheit <- 9/5*celsius+32
plot(celsius,fahrenheit,type="s")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sum(mtcars$mpg)
# [1] 642.9
cumsum(mtcars$mpg)
# [1]  21.0  42.0  64.8  86.2 104.9 123.0 137.3 161.7
cumprod(mtcars$mpg)
# [1] 2.100000e+01 4.410000e+02 1.005480e+04 2.151727e+05 
#     4.023730e+06 7.282951e+07

c(1,2,3,4,5)    # vectors # concatenating function
c(1:5)
c(TRUE,FALSE)
c("TRUE","FALSE") # character vector

x <- runif(5)
x
# [1] 0.832 0.101 0.926 0.253 0.619
sort(x)

order(x)
# [1] 0.101 0.253 0.619 0.832 0.926
x[order(x)]
# [1] 0.101 0.253 0.619 0.832 0.926

x <- c(1:5)
x
y <- c("TRUE","FALSE",NA) 
y
z <- c(x,y)
z
# [1] "1"     "2"     "3"     "4"     "5"     "TRUE"  "FALSE" NA    
rev(z)
# [1] NA      "FALSE" "TRUE"  "5"     "4"     "3"     "2"     "1"   

z[2]
z[2:5]
# [1] "2" "3" "4" "5"
z[z<4]
# [1] "1" "2" "3" NA 
z[z>4]
# [1] "5"     "TRUE"  "FALSE" NA 
z[z>=4]
# [1] "4"     "5"     "TRUE"  "FALSE" NA  
z[-c(3,4)]
# [1] "1"     "2"     "5"     "TRUE"  "FALSE" NA 
z[c(3,4)]
# [1] "3" "4"

z[is.na(z)]
z[is.na(z)]<-0
z
z[z==TRUE]<-1
z[z==FALSE]<-0
z
# [1] "1" "2" "3" "4" "5" "1" "0" "0"

plot(z)
plot(z,type="b")

x <- runif(length(z))*5

plot(x,z,type="b")
plot(x~z,type="b")

plot(z,x,type="b")
lines(z~x,type="b",col="red")

plot(sin((1:360)*pi/180),type="l")
plot(sin((1:360)*pi/180),cos((1:360)*pi/180),type="l")

plot(sin((1:360)*pi/18),cos((1:360)*pi/10),type="l")

plot(sin((1:360)*pi/18),cos((1:360)*pi/10),type="l")

plot(tan((1:360)*pi/180),cos((1:360)*pi/180),type="l")
plot(tan((1:360)*pi/180),sin((1:360)*pi/180),type="l")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# some loops

for(i in 1:360){
    plot(cos((1:360)*pi/i),sin((1:360)*pi/i),type="l")
    print(i)
    Sys.sleep(0.1)
}

for(i in 360:1){
  plot(cos((1:360)*pi/i),sin((1:360)*pi/i),type="l")
  print(i)
  Sys.sleep(0.1)
}

for(i in 1:360){
  for(e in 1:360){
  plot(cos((1:360)*pi/i),sin((1:360)*pi/e),type="l")
  print(i)
  Sys.sleep(0.1)
  }
}

for(i in 1:360){
  for(e in 1:360){
    plot(sin((1:360)*pi/i),cos((1:360)*pi/e),type="l")
    print(i)
    Sys.sleep(0.1)
  }
}

for(i in 1:10){
  for(e in 1:10){
    plot(sin((1:360)*pi/i),cos((1:360)*pi/e),type="l")
    mtext(paste("i:",i,"e:",e), side=3, outer=TRUE, line=-3)
    print(paste("i:",i,"e:",e))
    Sys.sleep(0.1)
  }
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# function collection

letters
# "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" 
# "s" "t" "u" "v" "w" "x" "y" "z"
LETTERS
# "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" 
# "S" "T" "U" "V" "W" "X" "Y" "Z"
letters[1:15]
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o"

plot(mtcars$mpg)
print(mtcars$mpg)

table(mtcars$mpg)
table(mtcars$cyl)
#   4   6   8 
#   11  7   14 

table(mtcars[,9:10])
str(table(mtcars[,9:10])) # table with 2 dimmensions
table(mtcars[,9:11])
str(table(mtcars[,9:11]))
# 'table' int [1:2, 1:3, 1:6] 3 0 0 4 0 0 4 0 2 2 ...
# - attr(*, "dimnames")=List of 3
# ..$ am  : chr [1:2] "0" "1"
# ..$ gear: chr [1:3] "3" "4" "5"
# ..$ carb: chr [1:6] "1" "2" "3" "4" ...

length(mtcars$mpg)
cat(mtcars$mpg)
mean(mtcars$mpg)
median(mtcars$mpg)
range(mtcars$mpg)
unique(mtcars$mpg)

rep(mtcars$mpg,10)

names()
colnames()
rownames()

diff(mtcars$mpg)
plot(diff(mtcars$mpg),type="h",xlab="",ylab="difference")
points(diff(mtcars$mpg),col="red")

sort(mtcars$mpg)
order(mtcars$mpg)
rev(mtcars$mpg)
rev(sort(mtcars$mpg))

cumsum(mtcars$mpg)
cumprod(mtcars$mpg)

rank(mtcars$vs) # Returns the sample ranks of the values in a vector.
(r1 <- rank(x1 <- c(3, 1, 4, 15, 92)))
rank(mtcars$vs, ties.method= "first")  # first occurrence wins 
                                       ## ranks without averaging
rank(mtcars$vs, ties.method= "last")   #  last occurrence wins 
                                       ## ranks without averaging
rank(mtcars$vs, ties.method= "random") # ties broken at random 
                                       ## ranks without averaging
rank(mtcars$vs, ties.method= "random") # and again 
                                       ## ranks without averaging

1:10
7:20
intersect(1:10, 7:20)

match(1:10,7:20)    # match returns a vector of the positions of (first) 
                    # matches of its first argument in its second.

1:10 %in% c(1,3,5,9)
sstr <- c("c","ab","B","bba","c",NA,"@","bla","a","Ba","%")
sstr %in% c(letters, LETTERS)
sstr[sstr %in% c(letters, LETTERS)]

apply()
sapply()
aggregate()
aggregate(state.x77, list(Region = state.region), mean)
tapply()

merge(mtcars$vs,c(1,2))

read.csv()
read.table()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# histograms

hist(mtcars$mpg)

# looping with R
par(mfrow=c(3,4))
for(i in 1:length(mtcars[1,])){ 
  hist(mtcars[,i],main=paste("Data: MTcars",colnames(mtcars)[i]),
                              xlab=paste(colnames(mtcars)[i]))
}
par(mfrow=c(1,1))

hist(mtcars$mpg, main="Data: MTcars - mpg Miles per gallon", 
     xlab="mpg Miles per gallon")

hist(mtcars$mpg, breaks=10, main="Data: MTcars - mpg Miles per gallon", 
     xlab="mpg Miles per gallon")

hist(mtcars$mpg, breaks=length(mtcars$mpg), 
     main="Data: MTcars - mpg Miles per gallon", 
     xlab="mpg Miles per gallon")

# probability densities, component
hist(mtcars$mpg, breaks=length(mtcars$mpg), freq=FALSE, 
     main="Data: MTcars - mpg Miles per gallon", xlab="mpg Miles per gallon")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#  Boxplot with Scatterplot

par( fig = c( 0, 0.8, 0, 0.8 ), new = TRUE)
plot( mtcars$wt, mtcars$mpg, xlab = "Car Weight",
      ylab = "Miles Per Gallon" )
abline( lm( mpg~wt ), col = "red" )      # regression line (y~x)
lines( lowess( wt, mpg ), col = "blue" ) # lowess line (x,y) 
                                         # LOWESS smoother which uses 
                                         # locally-weighted 
                                         # polynomial regression
model <- lm( mpg~wt )
text( 4, 32, paste( "Intercept", model$coefficients[1] ) )
text( 4, 30, paste( "wt", model$coefficients[2] ) )

par( fig = c( 0, 0.8, 0.55, 1 ), new = TRUE )
boxplot(mtcars$wt, horizontal = TRUE, axes=FALSE, 
                                 col= "green", notch = TRUE )
m.wt <- mean( mtcars$wt )
mtext( paste( "mean wt", m.wt ), side = 3, outer = TRUE, line = -8 )

par( fig = c( 0.65, 1, 0, 0.8 ), new = TRUE)
boxplot( mtcars$mpg, axes = FALSE, col = "green", notch = TRUE)
m.mpg <- mean( mtcars$mpg )
mtext( paste( "mean mpg", m.mpg ), side = 2, outer = TRUE, line = -28)

mtext( "Miles per gallon vs Car Weight", side = 3, outer = TRUE, line = -3 )

par( mfrow = c( 1, 1 ) )

# Martin Stoppacher                                                             #
# office@martinstoppacher.com                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################
Martin Stoppacher: