# Intro til R, Syntaks fra Owen: the R-guide (2010)

# 1. Overview and History

dieroll <- c(2,5,1,6,5,5,4,1)
dieroll

ls()

newdieroll <- dieroll/2 # divide every element by two
newdieroll
ls()

rm(newdieroll) # this was a silly variable anyway
ls()

help(log)
?log

log(100)

log2(16) # same as log(16,base=2) or just log(16,2)

log(1000,base=10) # same as log10(1000)

log2(c(1,2,3,4)) # log base 2 of the vector (1,2,3,4)

apropos("norm")

??norm  # ES: utvidet søk

matrix()

a <- c(1,2,3,4,5,6,7,8)

A <- matrix(a,nrow=2,ncol=4, byrow=FALSE) # a is different from A

A

A <- matrix(a,2,4)



# 2. Basic math

2+3

3/2

2^3     # this also can be written as 2**3

4^2 - 3*2  # this is simply 16 - 6

(56-14)/6 – 4*7*10/(5^2-5) # this is more complicated  

sqrt(2)

abs(2-4)

cos(4*pi)

log(0) # not defined

factorial(6) # 6!

choose(52,5) # this is 52!/(47!*5!)

x <- c(1,2,3,4)

y <- c(5,6,7,8)

x*y

y/x

y-x

x^y

cos(x*pi) + cos(y*pi)

s <- c(1,1,3,4,7,11)

length(s)

sum(s)  # 1+1+3+4+7+11

prod(s) # 1*1*3*4*7*11

cumsum(s)

diff(s)  # 1-1, 3-1, 4-3, 7-4, 11-7

diff(s, lag = 2) # 3-1, 4-1, 7-3, 11-4

a <- c(1,2,3,4,5,6,7,8,9,10)

A <- matrix(a, nrow = 5, ncol = 2) # fill in by column

A

B <- matrix(a, nrow = 5, ncol = 2, byrow = TRUE) # fill in by row

B

C <- matrix(a, nrow = 2, ncol = 5, byrow = TRUE)

t(C) # this is the same as A!!

B%*%C

D <- C%*%B

det(D)

solve(D) # this is D-1



# 3. Getting data into R 

mykids <- c("Stephen", "Christopher") # put text in quotes

mykids

1:9

1.5:10     # you won’t get to 10 here

c(1.5:10,10)   # we can attach it to the end this way

prod(1:8) # same as factorial(8)

seq(1,5) # same as 1:5

seq(1,5,by=.5) # increment by 0.5

rep(10,10) # repeat the value 10 ten times

rep(c("A","B","C","D"),2)

matrix(rep(0,16),nrow=4) # a 4x4 matrix of zeroes

x <- scan() # read what is typed into the variable x

passengers <- scan()

passengers

passengers <- scan("passengers.txt")      # ligger på NTNU/IntroR

new.data <- data.frame()  # creates an "empty" data frame

new.data <- edit(new.data) # request that changes made are written to data frame

fix(new.data) # changes saved automatically

seatbelt <- c("Y","N","Y","Y","Y","Y","Y","Y","Y","Y", # return
+ "N","Y","Y","Y","Y","Y","Y","Y","Y","Y","Y","Y","Y", # return
+ "Y","Y","N","Y","Y","Y","Y")

seatbelt

car.dat <- data.frame(passengers,seatbelt)

car.dat

data()

data(trees)

trees

trees$Height

sum(trees$Height)  # sum of just these values

trees[4,3] # entry at forth row, third column

trees[4,] # get the whole forth row

attach(trees)

Height

search()

attributes(trees)

Height[Height > 75]

smith <- read.table(file.choose(), header=T)   # hent inn 'Ros.t.11.1.dat' som ligger på NTNU/IntroR



# 4. Graphics

plot(Height, Volume) # object trees is in the search path

curve(sin(x), from = 0, to = 2*pi)

par(mfrow = c(2, 2)) # gives a 2 x 2 layout of plots

par(lend = 1) # gives "butt" line end caps for line plots2

par(bg = "cornsilk") # plots drawn with this colored background

par(xlog = TRUE) # always plot x axis on a logarithmic scale

oldpar <- par(no.readonly = TRUE)

par(oldpar) # default (original) parameter settings restored


# 5. Summarizing Data

data(mtcars) # load in dataset

attach(mtcars) # add mtcars to search path

mtcars

mean(hp)

var(mpg)

quantile(qsec, probs = c(.20, .80))

cor(wt,mpg)

table(cyl)

table(cyl)/length(cyl) # note: length(cyl) = 32

barplot(table(cyl)/length(cyl)) # use relative frequencies on the y-axis

?hist

data(faithful)

attach(faithful)

hist(eruptions, main = "Old Faithful data", prob = T)

hist(eruptions, main = "Old Faithful data", prob = T, breaks=18)

stem(waiting)

boxplot(faithful)

plot(ecdf(x))

qqnorm(x) # creates the NPP for values stored in x

qqline(x) # adds a reference line for the NPP



# 6. Probability, Distributions, and Simulation

x <- rnorm(100) # simulate 100 standard normal RVs, put in x

w <- rexp(1000,rate=.1) # simulate 1000 from Exp( theta = 10, rate = 0.1)

dbinom(3,size=10,prob=.25) # P(X = 3) for X ~ Bin(n=10, p=.25)

dpois(0:2, lambda=4) # P(X=0), P(X=1), P(X=2) for X ~ Poisson

pbinom(3,size=10,prob=.25) # P(X . 3) in the above distribution

pnorm(12,mean=10,sd=2) # P(X . 12) for X~N(mu = 10, sigma = 2)

qnorm(.75,mean=10,sd=2) # 3rd quartile of N(mu = 10,sigma = 2)

qchisq(.10,df=8) # 10th percentile of chi-square (8)

qt(.95,df=20) # 95th percentile of t(20)


u <- runif(1000000, min=0, max=pi/2)

pi/2*mean(4*sin(2*u)*exp(-u^2)) # a=0, b=pi/2, so b–a=pi/2

u <- runif(1000000, min=0, max=pi/2) # generate a new uniform vector

pi/2*mean(4*sin(2*u)*exp(-u^2)) # a=0, b=pi/2, so b–a=pi/2


x <- 0:10

y <- dbinom(x, size=10, prob=.25) # evaluate probabilities

plot(x, y, type = "h", lwd = 30, main = "Binomial Probabilities w/ n = 10, p = .25", col = "gray", lend = 'butt')

plot(0:10, dbinom(x, size=10, prob=.25), type = "h", lwd = 30, lend = 'butt', col = 'gray')

curve(dnorm(x), from = -3, to = 3) # the standard normal curve

curve(pnorm(x, mean=10, sd=2), from = 4, to = 16) # a normal CDF

simdata <- rexp(1000, rate=.1)

hist(simdata, prob = T, breaks = "FD", main="Exp(theta = 10) RVs")

curve(dexp(x, rate=.1), add = T) # overlay the density curve

sample(1:100, 1)  # choose a number between 1 and 100

sample(1:6, 10, replace = T) # toss a fair die 10 times

sample(1:6, 10, T, c(.6,.2,.1,.05,.03,.02)) # not a fair die!!

urn <- c(rep("red",8),rep("blue",4),rep("yellow",3))

sample(urn, 6, replace = F) # draw 6 balls from this urn



# 7. Statistical Methods

?t.test

data(trees)

t.test(trees$Height, mu = 70)

drug <- c(15, 10, 13, 7, 9, 8, 21, 9, 14, 8)

plac <- c(15, 14, 12, 8, 14, 7, 16, 10, 15, 12)

t.test(drug, plac, alternative = "less", var.equal = T)

add <- c(24.6, 18.9, 27.3, 25.2, 22.0, 30.9)

noadd <- c(23.8, 17.7, 26.6, 25.1, 21.6, 29.6)

t.test(add, noadd, paired=T, alt = "greater")

aov(x ~ a) # one-way ANOVA model

aov(x ~ a + b) # two-way ANOVA with no interaction

aov(x ~ a + b + a:b) # two-way ANOVA with interaction

aov(x ~ a*b) # exactly the same as the above

Str <- c(3225,3320,3165,3145,3220,3410,3320,3370,3545,3600,3580,3485)

Type <- c(rep("A",4), rep("B",4), rep("C",4))

Type <- factor(Type) # consider these alternative expressions6

Type

levels(Type)

tapply(Str,Type,mean)

tapply(Str,Type,var)

boxplot(Str ~ Type, horizontal = T, xlab="Strength", col = "gray")

anova.fit <- aov(Str ~ Type)

summary(anova.fit)

TukeyHSD(anova.fit) # the default is 95% CIs for differences

plot(TukeyHSD(anova.fit))

lm(y ~ x) # simple linear regression (SLR) model

lm(y ~ x1 + x2) # a regression plane

lm(y ~ x1 + x2 + x3) # linear model with three regressors

lm(y ~ x – 1) # SLR w/ an intercept of zero

lm(y ~ x + I(x^2)) # quadratic regression model

lm(y ~ x + I(x^2) + I(x^3)) # cubic model


fit <- lm(dist ~ speed)  # cars must be loaded and attached

attributes(fit)

fit

summary(fit)

anova(fit)

confint(fit)

plot(speed, dist) # first plot the data

abline(fit) # could also type abline(-17.579, 3.932)

plot(speed, fit$residuals)

abline(0,0) # add a reference line at 0

predict.lm(fit, newdata=data.frame(speed=c(15,22)),int="conf")

predict.lm(fit,newdata=data.frame(speed=c(15,22)),int="predict")

?chisq.test

counts <- c(43, 49, 56, 45, 66, 41)

probs <- rep(1/6, 6)

chisq.test(counts, p = probs)

color.blind <- matrix(c(442, 514, 38, 6), nrow=2, byrow=T)

color.blind

dimnames(color.blind) <- list(c("normal","c-b"),c("Male","Female"))

chisq.test(color.blind, correct=F) # no correction for this one

out <- chisq.test(color.blind, correct=F)

attributes(out)

out$expected # these are the expected counts