# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Basic Commands and Statistics with R
rm(list = ls(all = TRUE))
getwd()
#system("ls")
setwd("~/ownCloud/STA_Statistics/basicR/")
search()
options(scipen=100) # scientific off
options(scipen=0) # scientific on
options(digits = 3)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/mtcars.html
attach(mtcars)
search()
detach(mtcars)
search()
mtcars # The data was extracted from the 1974 Motor Trend US magazine
# ?mtcars
class(mtcars) # determine the class of an object
str(mtcars) # Compactly display the internal structure of an R object
# 'data.frame': 32 obs. of 11 variables:
# mpg cyl disp hp drat wt qsec vs am gear carb
# Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
# Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
# Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
# Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
# Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
# Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
# Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
# Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
# Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
# Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
# Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
# Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
# Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
# Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
# Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
# Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
# Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
# Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
# Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
# Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
# Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
# Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
# AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
# Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
# Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
# Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
# Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
# Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
# Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
# Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
# A data frame with 32 observations on 11 (numeric) variables.
#[, 1] mpg Miles/(US) gallon
#[, 2] cyl Number of cylinders
#[, 3] disp Displacement (cu.in.)
#[, 4] hp Gross horsepower
#[, 5] drat Rear axle ratio
#[, 6] wt Weight (1000 lbs)
#[, 7] qsec 1/4 mile time
#[, 8] vs Engine (0 = V-shaped, 1 = straight)
#[, 9] am Transmission (0 = automatic, 1 = manual)
#[,10] gear Number of forward gears
#[,11] carb Number of carburetors
colnames(mtcars) <- c("mpg","cyl","disp","hp","drat","wt",
"qsec","vs","am","gear","carb")
# colnames(mtcars) <- c("miles-per-gallon","cylinders","displacement",
# "horsepower","rear-axle-ratio","weight","qsec",
# "engine-vs","Transmissionam","gear","carburetors")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# data structures
letters
l <- letters
str(l)
# chr [1:26] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p"
# "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Vectors
t <- c(1:3,"Hello",NA,FALSE,TRUE)
t
# [1] "1" "2" "3" "Hello" NA "FALSE" "TRUE"
str(t)
# chr [1:7] "1" "2" "3" "Hello" NA "FALSE" "TRUE"
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Factors
mtcars$am[mtcars$am==1]
which(mtcars$am==1)
am.names <- mtcars$am
am.names[which(am.names==1)] <- "Automatic"
am.names
am.names[which(am.names==0)] <- "Manual"
am.names
am.names.factor <- factor(am.names)
am.names.factor
# [1] Automatic Automatic Automatic Manual Manual
# Levels: Automatic Manual
# internally stored as a table of:
# 1 Automatic
# 2 Manual
# Automatic and Manual are the levels of the factor
levels(am.names.factor)
relevel(am.names.factor, ref="Manual")
# [1] Automatic Automatic Automatic Manual Manual
# Levels: Manual Automatic
am.names.factor[1]
as.character(am.names.factor[1])
as.numeric(am.names.factor[1]) # numeric takes the faktor not the level
levels(am.names.factor)[1] # can be used to get the factor
table(am.names.factor)
is.factor(am.names.factor)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Data Frames
mtcars
str(mtcars)
# 'data.frame': 32 obs. of 11 variables:
df <- data.frame(1:20,60:41,letters[1:20])
df
colnames(df) <- c("n1","n2","letters")
df
t(df)
df$letters # get the vectors from the dataframe
df$n1
df[,2]
df[1,3]
df[[3]]
levels(df$letters)
is.factor(df[,3])
df <- data.frame(1:20,60:41,letters[1:20], stringsAsFactors = FALSE)
df
is.factor(df[,3])
trees
mtcars
USMortality
data() # list of data associated with all current packages in the serch path
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Matrix
as.matrix(mtcars)
mtcars.mat <- as.matrix(mtcars) # all elements of a matrix have the same mode
#(numeric, character)
x <- as.vector(mtcars.mat) # all in order in the vector X
x
mat <- matrix(1:100,nco=10)
mat
as.vector(mat)
mat44 <- matrix(1:(4*4),nco=4)
mat44
as.vector(mat44)
dimnames(mtcars.mat)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Arrays
x <- 1:10000 # this is a matrix
dim(x) <- c(100,100)
x
x <- 1:1000 # array - a matrix with more than 2 dim
dim(x) <- c(10,10,10)
x
x <- 1:(2*3*3) # array - a matrix with more than 2 dim
dim(x) <- c(2,3,3)
x
str(x)
# int [1:2, 1:3, 1:3] 1 2 3 4 5 6 7 8 9 10 ...
x <- letters[1:(2*3*3)] # array - a matrix with more than 2 dim
dim(x) <- c(2,3,3)
x
str(x)
# chr [1:2, 1:3, 1:3] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l"
# "m" "n" "o" "p" "q" "r"
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Lists - collect different types of data objects
data()
l <- list(mtcars,barley,environmental,ethanol,melanoma)
# most data from lattice package
str(l)
# List of 5
# $ :'data.frame': 32 obs. of 11 variables:
# ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
# ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ...
# ..$ disp: num [1:32] 160 160 108 258 360 ...
# ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ...
# ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
# ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
# ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ...
# ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ...
# ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ...
# ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ...
# ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ...
# $ :'data.frame': 120 obs. of 4 variables:
# ..$ yield : num [1:120] 27 48.9 27.4 39.9 33 ...
# ..$ variety: Factor w/10 levels "Svansota","No.462",..: 3 3 3 3 3 3 7 7 7 7...
# ..$ year : Factor w/2 levels "1932","1931": 2 2 2 2 2 2 2 2 2 2...
# ..$ site : Factor w/6 levels "Grand Rapids",..: 3 6 4 5 1 2 3 6 4 5...
# $ :'data.frame': 111 obs. of 4 variables:
# ..$ ozone : num [1:111] 41 36 12 18 23 19 8 16 11 14 ...
# ..$ radiation : num [1:111] 190 118 149 313 299 99 19 256 290 274 ...
# ..$ temperature: num [1:111] 67 72 74 62 65 59 61 69 66 68 ...
# ..$ wind : num [1:111] 7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
# $ :'data.frame': 88 obs. of 3 variables:
# ..$ NOx: num [1:88] 3.74 2.29 1.5 2.88 0.76 ...
# ..$ C : num [1:88] 12 12 12 12 12 9 9 9 12 12 ...
# ..$ E : num [1:88] 0.907 0.761 1.108 1.016 1.189 ...
# $ :'data.frame': 37 obs. of 2 variables:
# ..$ year : num [1:37] 1936 1937 1938 1939 1940 ...
# ..$ incidence: num [1:37] 0.9 0.8 0.8 1.3 1.4 1.2 1.7 1.8 1.6 1.5 ...
l[[1]]
l[[2]]
l[[3]]
l[[1]]
model <- lm(mpg~wt,data=l[[1]]) # building a model from a list object
names(model)
str(model) # the model itselfe is returned as a list
model$coefficients
model[[1]]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# some basic R functions
mtcars$mpg
# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...
mtcars$mpg^2
# [1] 441.00 441.00 519.84 457.96 349.69 327.61 204.49 595.36 519.84 ...
sqrt(mtcars$mpg^2)
# [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ...
pi
# [1] 3.141593
90*pi/180 # radiant conversion
sin(90*pi/180)
cos(90*pi/180)
cos(0*pi/180)
celsius <- 20
9/5*celsius+32
celsius <- -20:40
fahrenheit <- 9/5*celsius+32
plot(celsius,fahrenheit,type="s")