X

Basic R commands – 2 – probability distributions

Basic Commands and Statistics with R – 2 – probability distibutions. R Functions for the most common probability distibutions.

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Basic Commands and Statistics with R - 2 - probability distibutions 
# An overview of the most common probability distibutions 
# eg: # https://en.wikipedia.org/wiki/List_of_probability_distributions
# path: ~/ownCloud/STA_Statistics/Basic_Statistics/
# file_name: statistic_basics2.R
# files_used:

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Probability Plots

# Distribution                    Functions
#
# Normal                          pnorm       qnorm      dnorm      rnorm
# Beta                            pbeta       qbeta      dbeta      rbeta
# Binomial                        pbinom      qbinom     dbinom     rbinom
# Uniform                         punif       qunif      dunif      runif
# Cauchy                          pcauchy     qcauchy    dcauchy    rcauchy
# Chi-Square                      pchisq      qchisq     dchisq     rchisq
# Exponential                     pexp        qexp       dexp       rexp
# F                               pf          qf         df         rf
# Gamma                           pgamma      qgamma     dgamma     rgamma
# Geometric                       pgeom       qgeom      dgeom      rgeom
# Hypergeometric                  phyper      qhyper     dhyper     rhyper
# Logistic                        plogis      qlogis     dlogis     rlogis
# Log Normal                      plnorm      qlnorm     dlnorm     rlnorm
# Negative Binomial               pnbinom     qnbinom    dnbinom    rnbinom
# Poisson                         ppois       qpois      dpois      rpois
# Student t                       pt          qt         dt         rt
# Studentized Range               ptukey      qtukey     dtukey     rtukey
# Weibull                         pweibull    qweibull   dweibull   rweibull
# Wilcoxon Rank Sum Statistic     pwilcox     qwilcox    dwilcox    rwilcox
# Wilcoxon Signed Rank Statistic  psignrank   qsignrank  dsignrank  rsignrank
# ...

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Normal  pnorm, qnorm, dnorm, rnorm

# distribution function
# direct - P ( X < 23.5 )
pn <- pnorm( 23.5, mean = 50, sd = 20 )
str( pn ) 
# num 0.0926

pn <- pnorm( 1:100, mean = 50, sd = 20 )
str( pn )
# num [1:100] 0.0512 0.0548 0.0586 0.0626 0.0668 ...

par( mfrow = c( 1, 2 ) )

hist( pn, main = "normal mean = 50, sd = 20" )
plot( 1:100, pn, main = "normal mean = 50, sd = 20")
abline( h = 0.5, v = 50, col = "red" )

par( mfrow = c( 1, 1 ) )

# quantile function 
# Inverse - qnorm looks up the p-th quantile of the normal distribution

qn <- qnorm( 0.5, mean = 50, sd = 20 )
str( qn )
# num 50

qn <- qnorm( ( ( 1:100 ) / 100 ), mean = 50, sd = 20 )
str( qn ); qn;
# [1]  3.47  8.93 12.38 14.99 17.10 18.90 20.48 21.90 23.18 24.37 25.47 26.5
#hist(qn,breaks=150)

par( mfrow = c( 1, 2 ) )
hist( qn, main = "normal mean = 50, sd = 20" )
plot( ( ( 1:100 ) / 100 ), qn, main = "normal mean = 50, sd = 20" )
abline( v = 0.5, h = 50, col = "red" )

par( mfrow = c( 1, 1 ) )

# invertet plot

par( mfrow = c( 1, 2 ) )

plot( ( ( 1:100 ) / 100 ), qn, main = "normal mean = 50, sd = 20")
abline( h = 150, col = "red" )
plot( qn, ( ( 1:100 ) / 100 ), main = "normal mean = 50, sd = 20")
abline( v = 150, col = "red" )

# Density function

dn <- dnorm( 1:100, mean = 50, sd = 20)
str( dn )
dn

# [1]  0.00099187 0.00111973 0.00126091 0.00141635 0.00158698 0.00177373 
#      0.00197750 0.00219918 0.00243960 0.00269955 0.00297974 0.00328079
# [13] 0.00360324 0.00394751 0.00431387 0.00470245 0.00511325 0.00554604 
#      0.00600045 0.00647588 0.00697153 0.00748637 0.00801917 0.00856843
# [25] 0.00913245 0.00970930 0.01029681 0.01089261 0.01149411 0.01209854 
#      0.01270295 0.01330426 0.01389924 0.01448458 0.01505687 0.01561270
# [37] 0.01614862 0.01666123 0.01714719 0.01760327 0.01802635 0.01841351 
#      0.01876202 0.01906939 0.01933341 0.01955213 0.01972397 0.01984763
# [49] 0.01992220 0.01994711 0.01992220 0.01984763 0.01972397 0.01955213 
#      0.01933341 0.01906939 0.01876202 0.01841351 0.01802635 0.01760327
# [61] 0.01714719 0.01666123 0.01614862 0.01561270 0.01505687 0.01448458 
#      0.01389924 0.01330426 0.01270295 0.01209854 0.01149411 0.01089261
# [73] 0.01029681 0.00970930 0.00913245 0.00856843 0.00801917 0.00748637 
#      0.00697153 0.00647588 0.00600045 0.00554604 0.00511325 0.00470245
# [85] 0.00431387 0.00394751 0.00360324 0.00328079 0.00297974 0.00269955 
#      0.00243960 0.00219918 0.00197750 0.00177373 0.00158698 0.00141635
# [97] 0.00126091 0.00111973 0.00099187 0.00087642

sum( dn )

# [1] 0.98756

par( mfrow = c( 1, 2 ) )
hist( dn, main = "normal mean = 50, sd = 20" )
plot( 1:100, dn, main = "normal mean = 50, sd = 20" )
par( mfrow = c( 1, 1 ) )

cumsum(dn)

# [1]  0.00099187 0.00211159 0.00337251 0.00478886 0.00637584 0.00814957 
#      0.01012707 0.01232625 0.01476585 0.01746540 0.02044514 0.02372593
# [13] 0.02732917 0.03127668 0.03559054 0.04029300 0.04540624 0.05095229 
#      0.05695274 0.06342862 0.07040014 0.07788652 0.08590568 0.09447411
# [25] 0.10360657 0.11331587 0.12361268 0.13450529 0.14599940 0.15809794 
#      0.17080089 0.18410515 0.19800440 0.21248897 0.22754584 0.24315854
# [37] 0.25930716 0.27596839 0.29311558 0.31071885 0.32874520 0.34715870 
#      0.36592072 0.38499011 0.40432352 0.42387565 0.44359962 0.46344725
# [49] 0.48336944 0.50331656 0.52323875 0.54308638 0.56281035 0.58236248 
#      0.60169589 0.62076528 0.63952729 0.65794080 0.67596715 0.69357042
# [61] 0.71071761 0.72737884 0.74352746 0.75914015 0.77419702 0.78868160 
#      0.80258085 0.81588511 0.82858806 0.84068660 0.85218071 0.86307331
# [73] 0.87337013 0.88307943 0.89221188 0.90078031 0.90879948 0.91628585 
#      0.92325738 0.92973326 0.93573371 0.94127975 0.94639300 0.95109545
# [85] 0.95540932 0.95935683 0.96296007 0.96624086 0.96922060 0.97192015 
#      0.97435975 0.97655893 0.97853643 0.98031016 0.98189714 0.98331349
# [97] 0.98457440 0.98569413 0.98668600 0.98756241

par( mfrow = c( 1, 2 ) )
plot( 1:100, dn, main = "normal mean=50, sd=20" )
plot( 1:100, cumsum( dn ), main= "normal mean=50, sd=20" ) 

par( mfrow = c( 1,1 ) )

# Random generations - with specified normal distribution
# random generation for the normal distribution

rn <- rnorm( 100, mean = 50, sd = 20 )
par( mfrow=c( 1, 2 ) )
hist( rn, probability = FALSE, main = "normal mean = 50, sd = 20" )
hist( rn, probability = TRUE, main="normal mean = 50, sd = 20" )
#points( rn, dnorm( rn, mean = 50, sd = 20), col = "red" )
lines( sort( rn ), dnorm( sort( rn ), mean = 50, sd = 20 ), col= "red" )

par ( mfrow = c( 1, 1 ) )

 

 

 

 

 

 

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# distributions for the mtcars data dnorm densities
# https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/mtcars.html

attach( mtcars )

hist(mtcars$mpg, probability = TRUE)
lines( sort( mtcars$mpg ), 
       dnorm( sort( mtcars$mpg ) , 
              mean = mean(mtcars$mpg) ,sd = sd(mtcars$mpg) ), col= "red" )

par( mfrow = c( 3, 4 ) )
for( i in 1:length( mtcars[1,] ) ){ 
  hist( mtcars[,i], probability = TRUE, 
        main = paste( "Data: MTcars", colnames( mtcars )[i] ),
        xlab = paste( colnames( mtcars )[i] ) )
  lines( sort( mtcars[,i] ), 
         dnorm( sort( mtcars[,i] ) , 
                mean = mean( mtcars[,i] ) ,sd = sd( mtcars[,i] ) ), col= "red" )
}

# Martin Stoppacher                                                             #
# office@martinstoppacher.com                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#################################################################################

 

Martin Stoppacher: