[R]희선중간고사
#1
ntdata = read.csv("C:\\Users\\ChangHun\\Downloads\\midterm\\ntcnt.txt", sep="\t")
tpdata = read.csv("C:\\Users\\ChangHun\\Downloads\\midterm\\tpcnt.txt", sep="\t")
#2
ntdata[ntdata$ALT_CNT>0, ]
#3
ntdata2 = ntdata[!(ntdata$chr=="MT" | ntdata$chr=="X" | ntdata$chr=="Y") ,]
#4
index = c(1:156)
ntdata3 = ntdata[index%%2 == 0,]
#5
install.packages("stringr")#5??
library("stringr")
Y= str_count(ntdata$pos, "0")
Z= str_count(ntdata$pos,"1")
cnt_01=c()
cnt_01=Y+Z
ntdata = cbind(ntdata,cnt_01)
#6
nttpdata = merge( x = tpdata,y = ntdata, by = c('pos','chr'), all = T )
#7
new_nttpdata <- nttpdata[sapply(nttpdata,is.numeric)]
colMeans(new_nttpdata, na.rm=T)
#8
data1 = read.table("C:\\Users\\ChangHun\\Downloads\\midterm\\testfile.txt", sep="\t",row.names = 1, header = T, stringsAsFactors = F)
#9
tmp2=apply(data1,1,function(x) regexpr(x[1],x[3]))#9??
bartarget=substr(data1$target,tmp2,nchar(data1$target))
data1=cbind(data1,bartarget)
typeof(data1$target)
#10
df = data1$indel_rate
a = data1$Number_mismatch ==1
b = data1$Number_mismatch ==2
c = data1$Number_mismatch =='gx17'
d = data1$Number_mismatch =='gx18'
e = data1$Number_mismatch =='gx19'
DF = list(df[a],df[b],df[c],df[d],df[e])
boxplot(DF, col = rainbow(5, s = 0.5))
axis(side = 1, at=c(1,2,3,4,5) ,labels = c("1","2","gx17","gx18","gx19"))
#11
tmplist = apply(nttpdata,1, mu)
mu = function(x){
m = matrix( c(x[4], x[5], x[8], x[9]),
nrow=2, byrow=F )
return (m)
}
mu = function(x){
m = matrix( c(x[4], x[5], x[8], x[9]),
nrow=2, byrow=F )
return (m)
}
tmplist
nttpdata[,4]
nttpdata[,5]
nttpdata[,8]
nttpdata[,9]
#12
tmplist <- get(load("C:\\Users\\ChangHun\\Downloads\\midterm\\tmplist.11.RData"))
nttpdata_omit = na.omit(nttpdata)
fisher.test(mt)
mt = matrix(unlist(tmplist[1]),2)
nttpdata_omit =cbind(nttpdata_omit, lapply(tmplist, fu))
p_value_list = lapply(tmplist, fu)
fu = function(x){
mt = matrix(unlist(x),2)
r = fisher.test(mt)
return(r)
}
typeof(p_value_list[3])
list1 = p_value_list[3]
#=============================================================
set.seed(123)
n <- 50
x <- sample(c(0, 1), replace = TRUE, size = n)
y <- sample(c(1, 2), replace = TRUE, size = n)
library(tidyverse)
library(gmodels)
p <- tibble(x, y) %>%
table() %>%
CrossTable(prop.r = FALSE, prop.c = FALSE, prop.t = FALSE, prop.chisq = FALSE, fisher = TRUE) %>%
.$fisher.ts %>%
.$p.value
dim(x)
tmplist
#13
n = 1
tmplist1 = list()
for(i in tmplist){
print(i)
if(sum(i %in% 0) == 0)
{
tmplist1 = list(tmplist1 , matrix(unlist(tmplist[n]),2) + matrix( c(1,1,1,1),nrow=2 ) )
}
else
{
tmplist1 = list(tmplist1 , matrix(unlist(tmplist[n]),2))
}
n = n+1
}
tmplist1
#14
nttpdata_omit
tmplist
f14 = function(x){
mt = matrix(unlist(x),2)
result = (mt[1] * mt[4]) / (mt[2] * mt[3])
}
oddsratio = unlist(lapply(tmplist,f14))
result_14= cbind(nttpdata_omit,oddsratio)
typeof(unlist(oddsratio))
#15
do.call(rbind,mapply(cbind,oddsratio,nttpdata_omit))
result_14 = rbind(nttpdata_omit, mapply(cbind,nttpdata_omit,oddsratio))
x = tmplist[1]
typeof(x)
mt = matrix(unlist(x),2)
(mt[1] * mt[4]) / (mt[2] * mt[3])