analyse.r
From irefindex
Revision as of 13:32, 24 November 2011 by Ian.donaldson (talk | contribs)
R script acoompanying Exploratory data analysis lecture. 2011 See Exploratory Data Analysis
# #probability distribution x<-0:1 f<-dbinom(x, size=1, prob=.1) plot(x,f,xlab="x",ylab="density",type="h",lwd=5) set.seed(100)x<-rbinom(100, size=1, prob=.1) hist(x) x<-seq(-4,4,0.1) f<-dnorm(x, mean=0, sd=1) plot(x,f,xlab="x",ylab="density",lwd=5,type="l") x<-rnorm(100, mean=0, sd=1) hist(x) #quantiles q90<-qnorm(0.90, mean = 0, sd = 1) x<-seq(-4,4,.1) f<-dnorm(x, mean=0, sd=1) plot(x,f,xlab="x",ylab="density",type="l",lwd=5) abline(v=q90,col=2,lwd=5) #descriptive statistics x<-rnorm(100, mean=0, sd=1) quantile(x) quantile(x,probs=c(0.1,0.2,0.9)) x<-rnorm(100, mean=0, sd=1) mean(x) median(x) IQR(x) var(x) sd(x) summary(x) #box plot x<-rnorm(100, mean=0, sd=1) boxplot(x) #qqplot x<-rnorm(100, mean=0, sd=1) qqnorm(x) qqline(x) x<-rt(100,df=2) qqnorm(x)qqline(x) x<-seq(-4,4,.1) f1<-dnorm(x, mean=0, sd=1) f2<-dt(x, df=2) plot(x,f1,xlab="x",ylab="density",lwd=5,type="l") lines(x,f2,xlab="x",ylab="density",lwd=5,col=2) x<-rnorm(100, mean=0, sd=1) y<-rnorm(100, mean=0, sd=1) qqplot(x,y) x<-rt(100, df=2) y<-rnorm(100, mean=0, sd=1) qqplot(x,y) #DLBCLpatientDataNEW.txt dat <- read.table(file = "M:/Undervisning/Statistikk/DLBCLpatientDataNEW.txt", header =TRUE, sep="\t") head(dat) dim(dat) summary(dat) #boxplot boxplot(Follow.up..years.~Subgroup, data = dat) #scatter plot plot(dat$Germinal.center.B.cell.signature,dat$Lymph.node.signature) cor(dat$Germinal.center.B.cell.signature,dat$Lymph.node.signature) # Quick comment on correlation theta<-runif(1000,0,2*pi) x<-cos(theta) y<-sin(theta) plot(x,y) cor(x,y) #trellis graphics plot(dat, pch=".") #histogram par(mfrow=c(2,2)) hist(dat[,7], 50, main = names(dat)[7], xlab="average signature") hist(dat[,8], 50, main = names(dat)[8], xlab="average signature") hist(dat[,9], 50, main = names(dat)[9], xlab="average signature") hist(dat[,10], 50, main = names(dat)[10], xlab="average signature") Cy3 <- read.table(file="M:/Undervisning/Statistikk/NEJM_Web_Fig1data_CY3.txt", header=TRUE, sep="\t", dec=",") Cy5 <- read.table(file="M:/Undervisning/Statistikk/NEJM_Web_Fig1data_CY5.txt", header=TRUE, sep="\t", dec=",") #plot of raw data par(mfrow=c(2,1)) hist(Cy3[,55], 50,main=names(Cy3)[55], xlab="Cy3") hist(Cy5[,55], 50, main=names(Cy3)[55], xlab="Cy5") par(mfrow=c(2,2)) qqnorm(Cy3[,55]) qqline(Cy3[,55]) qqnorm(Cy3[,55]) qqnorm(Cy5[,55]) qqline(Cy5[,55]) #log2 transformed hist(log2(Cy3[,55]), 50,main=names(Cy3)[55], xlab="log2(Cy3)") hist(log2(Cy5[,55]), 50, main=names(Cy3)[55], xlab="log2(Cy5)") par(mfrow=c(2,2)) qqnorm(log2(Cy3[,55])) qqline(log2(Cy3[,55])) qqnorm(log2(Cy3[,55])) qqnorm(log2(Cy5[,55])) qqline(log2(Cy5[,55])) #mean sd plot m <- apply(Cy3[,55:58],1,mean,na.rm=TRUE) sd <- apply(Cy3[,55:58],1,sd,na.rm=TRUE) plot(m,sd) trend<-lowess(m,sd) lines(trend,col=2,lwd=5) m <- apply(log2(Cy3[,55:58]),1,mean,na.rm=TRUE) sd <- apply(log2(Cy3[,55:58]),1,sd,na.rm=TRUE) plot(m,sd) trend<-lowess(m,sd) lines(trend,col=2,lwd=5) #scattterplot of ma2 dat plot(Cy3[,55],Cy5[,55], xlab=names(Cy3)[55], ylab=names(Cy5)[55]) #MA plot par(mfrow=c(2,2)) A<-(log2(Cy3[,55])+log2(Cy5[,55]))/2 M<-(log2(Cy3[,55])-log2(Cy5[,55])) plot(A,M,xlab="A",ylab="M",main="patient 55") trend<-lowess(A,M) lines(trend,col=2,lwd=5) A<-(log2(Cy3[,56])+log2(Cy5[,56]))/2 M<-(log2(Cy3[,56])-log2(Cy5[,56])) plot(A,M,xlab="A",ylab="M",main="patient 56") trend<-lowess(A,M) lines(trend,col=2,lwd=5) A<-(log2(Cy3[,57])+log2(Cy5[,57]))/2 M<-(log2(Cy3[,57])-log2(Cy5[,57])) plot(A,M,xlab="A",ylab="M",main="patient 57") trend<-lowess(A,M) lines(trend,col=2,lwd=5) A<-(log2(Cy3[,58])+log2(Cy5[,58]))/2 M<-(log2(Cy3[,58])-log2(Cy5[,58])) plot(A,M,xlab="A",ylab="M",main="patient 58") trend<-lowess(A,M) lines(trend,col=2,lwd=5) #data ma2 <- read.table(file="M:/Undervisning/Statistikk/NEJM_Web_Fig1data.txt", header=TRUE, sep="\t", dec=",")