analyse.r

R script acoompanying Exploratory data analysis lecture. 2011 See Exploratory Data Analysis

```
#

#probability distribution
x<-0:1
f<-dbinom(x, size=1, prob=.1)
plot(x,f,xlab="x",ylab="density",type="h",lwd=5)

set.seed(100)x<-rbinom(100, size=1, prob=.1)
hist(x)

x<-seq(-4,4,0.1)
f<-dnorm(x, mean=0, sd=1)
plot(x,f,xlab="x",ylab="density",lwd=5,type="l")

x<-rnorm(100, mean=0, sd=1)
hist(x)

#quantiles
q90<-qnorm(0.90, mean = 0, sd = 1)
x<-seq(-4,4,.1)
f<-dnorm(x, mean=0, sd=1)
plot(x,f,xlab="x",ylab="density",type="l",lwd=5)
abline(v=q90,col=2,lwd=5)

#descriptive statistics
x<-rnorm(100, mean=0, sd=1)
quantile(x)
quantile(x,probs=c(0.1,0.2,0.9))

x<-rnorm(100, mean=0, sd=1)
mean(x)
median(x)
IQR(x)
var(x)
sd(x)
summary(x)

#box plot
x<-rnorm(100, mean=0, sd=1)
boxplot(x)

#qqplot
x<-rnorm(100, mean=0, sd=1)
qqnorm(x)
qqline(x)

x<-rt(100,df=2)
qqnorm(x)qqline(x)
x<-seq(-4,4,.1)
f1<-dnorm(x, mean=0, sd=1)
f2<-dt(x, df=2)
plot(x,f1,xlab="x",ylab="density",lwd=5,type="l")
lines(x,f2,xlab="x",ylab="density",lwd=5,col=2)

x<-rnorm(100, mean=0, sd=1)
y<-rnorm(100, mean=0, sd=1)
qqplot(x,y)
x<-rt(100, df=2)
y<-rnorm(100, mean=0, sd=1)
qqplot(x,y)

#DLBCLpatientDataNEW.txt

dim(dat)
summary(dat)

#boxplot
boxplot(Follow.up..years.~Subgroup, data = dat)

#scatter plot
plot(dat\$Germinal.center.B.cell.signature,dat\$Lymph.node.signature)
cor(dat\$Germinal.center.B.cell.signature,dat\$Lymph.node.signature)

# Quick comment on correlation
theta<-runif(1000,0,2*pi)
x<-cos(theta)
y<-sin(theta)
plot(x,y)
cor(x,y)

#trellis graphics
plot(dat, pch=".")

#histogram
par(mfrow=c(2,2))
hist(dat[,7], 50, main = names(dat)[7], xlab="average signature")
hist(dat[,8], 50, main = names(dat)[8], xlab="average signature")
hist(dat[,9], 50, main = names(dat)[9], xlab="average signature")
hist(dat[,10], 50, main = names(dat)[10], xlab="average signature")

#plot of raw data
par(mfrow=c(2,1))
hist(Cy3[,55], 50,main=names(Cy3)[55], xlab="Cy3")
hist(Cy5[,55], 50, main=names(Cy3)[55], xlab="Cy5")

par(mfrow=c(2,2))
qqnorm(Cy3[,55])
qqline(Cy3[,55])
qqnorm(Cy3[,55])

qqnorm(Cy5[,55])
qqline(Cy5[,55])

#log2 transformed
hist(log2(Cy3[,55]), 50,main=names(Cy3)[55], xlab="log2(Cy3)")
hist(log2(Cy5[,55]), 50, main=names(Cy3)[55], xlab="log2(Cy5)")

par(mfrow=c(2,2))
qqnorm(log2(Cy3[,55]))
qqline(log2(Cy3[,55]))
qqnorm(log2(Cy3[,55]))

qqnorm(log2(Cy5[,55]))
qqline(log2(Cy5[,55]))

#mean sd plot
m <- apply(Cy3[,55:58],1,mean,na.rm=TRUE)
sd <- apply(Cy3[,55:58],1,sd,na.rm=TRUE)
plot(m,sd)
trend<-lowess(m,sd)
lines(trend,col=2,lwd=5)

m <- apply(log2(Cy3[,55:58]),1,mean,na.rm=TRUE)
sd <- apply(log2(Cy3[,55:58]),1,sd,na.rm=TRUE)
plot(m,sd)
trend<-lowess(m,sd)
lines(trend,col=2,lwd=5)

#scattterplot of ma2 dat
plot(Cy3[,55],Cy5[,55], xlab=names(Cy3)[55], ylab=names(Cy5)[55])

#MA plot
par(mfrow=c(2,2))
A<-(log2(Cy3[,55])+log2(Cy5[,55]))/2
M<-(log2(Cy3[,55])-log2(Cy5[,55]))
plot(A,M,xlab="A",ylab="M",main="patient 55")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

A<-(log2(Cy3[,56])+log2(Cy5[,56]))/2
M<-(log2(Cy3[,56])-log2(Cy5[,56]))
plot(A,M,xlab="A",ylab="M",main="patient 56")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

A<-(log2(Cy3[,57])+log2(Cy5[,57]))/2
M<-(log2(Cy3[,57])-log2(Cy5[,57]))
plot(A,M,xlab="A",ylab="M",main="patient 57")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

A<-(log2(Cy3[,58])+log2(Cy5[,58]))/2
M<-(log2(Cy3[,58])-log2(Cy5[,58]))
plot(A,M,xlab="A",ylab="M",main="patient 58")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

#data

```