analyse.r

From irefindex
Revision as of 13:30, 24 November 2011 by Ian.donaldson (talk | contribs) (Created page with "<pre> #R script acoompanying Exploratory data analysis lecture. 2011 #probability distribution x<-0:1 f<-dbinom(x, size=1, prob=.1) plot(x,f,xlab="x",ylab="density",type="h",l…")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

#R script acoompanying Exploratory data analysis lecture. 2011


#probability distribution
x<-0:1
f<-dbinom(x, size=1, prob=.1)
plot(x,f,xlab="x",ylab="density",type="h",lwd=5)

set.seed(100)x<-rbinom(100, size=1, prob=.1)
hist(x)

x<-seq(-4,4,0.1)
f<-dnorm(x, mean=0, sd=1)
plot(x,f,xlab="x",ylab="density",lwd=5,type="l")

x<-rnorm(100, mean=0, sd=1)
hist(x)

#quantiles
q90<-qnorm(0.90, mean = 0, sd = 1)
x<-seq(-4,4,.1)
f<-dnorm(x, mean=0, sd=1)
plot(x,f,xlab="x",ylab="density",type="l",lwd=5)
abline(v=q90,col=2,lwd=5)

#descriptive statistics
x<-rnorm(100, mean=0, sd=1)
quantile(x)
quantile(x,probs=c(0.1,0.2,0.9))

x<-rnorm(100, mean=0, sd=1)
mean(x)
median(x)
IQR(x)
var(x)
sd(x)
summary(x)

#box plot
x<-rnorm(100, mean=0, sd=1)
boxplot(x)

#qqplot
x<-rnorm(100, mean=0, sd=1)
qqnorm(x)
qqline(x)

x<-rt(100,df=2)
qqnorm(x)qqline(x)
x<-seq(-4,4,.1)
f1<-dnorm(x, mean=0, sd=1)
f2<-dt(x, df=2)
plot(x,f1,xlab="x",ylab="density",lwd=5,type="l")
lines(x,f2,xlab="x",ylab="density",lwd=5,col=2)

x<-rnorm(100, mean=0, sd=1)
y<-rnorm(100, mean=0, sd=1)
qqplot(x,y)
x<-rt(100, df=2)
y<-rnorm(100, mean=0, sd=1)
qqplot(x,y)



#DLBCLpatientDataNEW.txt

dat <- read.table(file = "M:/Undervisning/Statistikk/DLBCLpatientDataNEW.txt", header =TRUE, sep="\t")
head(dat)
dim(dat)
summary(dat)
 
#boxplot
boxplot(Follow.up..years.~Subgroup, data = dat)

#scatter plot 
plot(dat$Germinal.center.B.cell.signature,dat$Lymph.node.signature)
cor(dat$Germinal.center.B.cell.signature,dat$Lymph.node.signature)


# Quick comment on correlation
theta<-runif(1000,0,2*pi)
x<-cos(theta)
y<-sin(theta)
plot(x,y)
cor(x,y)

#trellis graphics
plot(dat, pch=".")

#histogram
par(mfrow=c(2,2))
hist(dat[,7], 50, main = names(dat)[7], xlab="average signature")
hist(dat[,8], 50, main = names(dat)[8], xlab="average signature")
hist(dat[,9], 50, main = names(dat)[9], xlab="average signature")
hist(dat[,10], 50, main = names(dat)[10], xlab="average signature")




Cy3 <- read.table(file="M:/Undervisning/Statistikk/NEJM_Web_Fig1data_CY3.txt", header=TRUE, sep="\t", dec=",")
Cy5 <- read.table(file="M:/Undervisning/Statistikk/NEJM_Web_Fig1data_CY5.txt", header=TRUE, sep="\t", dec=",")

#plot of raw data
par(mfrow=c(2,1))
hist(Cy3[,55], 50,main=names(Cy3)[55], xlab="Cy3")
hist(Cy5[,55], 50, main=names(Cy3)[55], xlab="Cy5")

par(mfrow=c(2,2))
qqnorm(Cy3[,55])
qqline(Cy3[,55])
qqnorm(Cy3[,55])

qqnorm(Cy5[,55])
qqline(Cy5[,55])


#log2 transformed
hist(log2(Cy3[,55]), 50,main=names(Cy3)[55], xlab="log2(Cy3)")
hist(log2(Cy5[,55]), 50, main=names(Cy3)[55], xlab="log2(Cy5)")

par(mfrow=c(2,2))
qqnorm(log2(Cy3[,55]))
qqline(log2(Cy3[,55]))
qqnorm(log2(Cy3[,55]))

qqnorm(log2(Cy5[,55]))
qqline(log2(Cy5[,55]))


#mean sd plot
m <- apply(Cy3[,55:58],1,mean,na.rm=TRUE)
sd <- apply(Cy3[,55:58],1,sd,na.rm=TRUE)
plot(m,sd)
trend<-lowess(m,sd)
lines(trend,col=2,lwd=5)


m <- apply(log2(Cy3[,55:58]),1,mean,na.rm=TRUE)
sd <- apply(log2(Cy3[,55:58]),1,sd,na.rm=TRUE)
plot(m,sd)
trend<-lowess(m,sd)
lines(trend,col=2,lwd=5)

#scattterplot of ma2 dat
plot(Cy3[,55],Cy5[,55], xlab=names(Cy3)[55], ylab=names(Cy5)[55])

#MA plot
par(mfrow=c(2,2))
A<-(log2(Cy3[,55])+log2(Cy5[,55]))/2
M<-(log2(Cy3[,55])-log2(Cy5[,55]))
plot(A,M,xlab="A",ylab="M",main="patient 55")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

A<-(log2(Cy3[,56])+log2(Cy5[,56]))/2
M<-(log2(Cy3[,56])-log2(Cy5[,56]))
plot(A,M,xlab="A",ylab="M",main="patient 56")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

A<-(log2(Cy3[,57])+log2(Cy5[,57]))/2
M<-(log2(Cy3[,57])-log2(Cy5[,57]))
plot(A,M,xlab="A",ylab="M",main="patient 57")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

A<-(log2(Cy3[,58])+log2(Cy5[,58]))/2
M<-(log2(Cy3[,58])-log2(Cy5[,58]))
plot(A,M,xlab="A",ylab="M",main="patient 58")
trend<-lowess(A,M)
lines(trend,col=2,lwd=5)

#data
ma2 <- read.table(file="M:/Undervisning/Statistikk/NEJM_Web_Fig1data.txt", header=TRUE, sep="\t", dec=",")