R基本語法
獲取幫助文檔,查看命令或函數(shù)的使用方法、事例或適用范圍
>>> ?command>>> ??command #深度搜索或模糊搜索此命令>>> example(command) #得到命令的例子
R中的變量
> # 數(shù)字變量> a a[1] 10> > # 字符串變量> a a[1] “abc”> > # 邏輯變量> a > a[1] TRUE> > b > b[1] TRUE> > d > d[1] FALSE> # 向量> > a a[1] FALSE FALSE FALSE FALSE FALSE> > a is.vector(a)[1] TRUE> > # 矩陣> > a a ? ? [,1] [,2] [,3] [,4][1,] ? ?1 ? ?2 ? ?3 ? ?4[2,] ? ?5 ? ?6 ? ?7 ? ?8[3,] ? ?9 ? 10 ? 11 ? 12[4,] ? 13 ? 14 ? 15 ? 16[5,] ? 17 ? 18 ? 19 ? 20> > is.matrix(a)[1] TRUE> > dim(a) #查看或設(shè)置數(shù)組的維度向量[1] 5 4> > # 錯誤的用法> dim(a) # 正確的用法> a dim(a) a ? ? [,1] [,2] [,3] [,4][1,] ? ?1 ? ?6 ? 11 ? 16[2,] ? ?2 ? ?7 ? 12 ? 17[3,] ? ?3 ? ?8 ? 13 ? 18[4,] ? ?4 ? ?9 ? 14 ? 19[5,] ? ?5 ? 10 ? 15 ? 20> > print(paste(“矩陣a的行數(shù)”, nrow(a)))[1] “矩陣a的行數(shù) 5”> print(paste(“矩陣a的列數(shù)”, ncol(a)))[1] “矩陣a的列數(shù) 4”> > #查看或設(shè)置行列名> rownames(a)NULL> rownames(a) a ?[,1] [,2] [,3] [,4]a ? ?1 ? ?6 ? 11 ? 16b ? ?2 ? ?7 ? 12 ? 17c ? ?3 ? ?8 ? 13 ? 18d ? ?4 ? ?9 ? 14 ? 19e ? ?5 ? 10 ? 15 ? 20# R中獲取一系列的字母> letters[1:4][1] “a” “b” “c” “d”> colnames(a) a ?a ?b ?c ?da 1 ?6 11 16b 2 ?7 12 17c 3 ?8 13 18d 4 ?9 14 19e 5 10 15 20> # is系列和as系列函數(shù)用來判斷變量的屬性和轉(zhuǎn)換變量的屬性# 矩陣轉(zhuǎn)換為data.frame> is.character(a)[1] FALSE> is.numeric(a)[1] TRUE> is.matrix(a)[1] TRUE> is.data.frame(a)[1] FALSE> is.data.frame(as.data.frame(a))[1] TRUE
R中矩陣運算
# 數(shù)據(jù)產(chǎn)生# rnorm(n, mean = 0, sd = 1) 正態(tài)分布的隨機(jī)數(shù)# runif(n, min = 0, max = 1) 平均分布的隨機(jī)數(shù)# rep(1,5) 把1重復(fù)5次# scale(1:5) 標(biāo)準(zhǔn)化數(shù)據(jù)> a a [1] -0.41253556 ?0.12192929 -0.47635888 -0.97171653 ?1.09162243 ?1.87789657 [7] -0.11717937 ?2.92953522 ?1.33836620 -0.03269026 ?0.87540920 ?0.13005744[13] ?0.11900686 ?0.76663940 ?0.28407356 -0.91251181 ?0.17997973 ?0.50452258[19] ?0.25961316 -0.58052230 ?1.00000000 ?2.00000000 ?3.00000000 ?4.00000000[25] ?5.00000000 ?0.00000000 ?0.00000000 ?0.00000000 ?0.00000000 ?0.00000000[31] ?2.00000000 10.00000000 11.00000000 13.00000000 ?4.00000000 -1.26491106[37] -0.63245553 ?0.00000000 ?0.63245553 ?1.26491106> a a ? ? ? ? ? [,1] ? ? ? [,2] ? ? ? [,3] ? ? ? [,4] ? ? ? ?[,5][1,] -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243[2,] ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026[3,] ?0.8754092 ?0.1300574 ?0.1190069 ?0.7666394 ?0.28407356[4,] -0.9125118 ?0.1799797 ?0.5045226 ?0.2596132 -0.58052230[5,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000[6,] ?0.0000000 ?0.0000000 ?0.0000000 ?0.0000000 ?0.00000000[7,] ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000[8,] -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106# 求行的加和> rowSums(a)[1] -0.6470593 ?5.9959284 ?2.1751865 -0.5489186 15.0000000 ?0.0000000 40.0000000[8] ?0.0000000## 注意檢查括號的配對> a a * 2 ? ? ? ? ? [,1] ? ? ? [,2] ? ? ? [,3] ? ? ? [,4] ? ? ? ?[,5][1,] -0.8250711 ?0.2438586 -0.9527178 -1.9434331 ?2.18324487[2,] ?3.7557931 -0.2343587 ?5.8590704 ?2.6767324 -0.06538051[3,] ?1.7508184 ?0.2601149 ?0.2380137 ?1.5332788 ?0.56814712[4,] -1.8250236 ?0.3599595 ?1.0090452 ?0.5192263 -1.16104460[5,] ?2.0000000 ?4.0000000 ?6.0000000 ?8.0000000 10.00000000[6,] ?4.0000000 20.0000000 22.0000000 26.0000000 ?8.00000000[7,] -2.5298221 -1.2649111 ?0.0000000 ?1.2649111 ?2.52982213# 所有值取絕對值,再取對數(shù) (取對數(shù)前一般加一個數(shù)避免對0或負(fù)值取對數(shù))> log2(abs(a)+1) ? ? ? ? ?[,1] ? ? ?[,2] ? ? ?[,3] ? ? ?[,4] ? ? ?[,5][1,] 0.4982872 0.1659818 0.5620435 0.9794522 1.0646224[2,] 1.5250147 0.1598608 1.9743587 1.2255009 0.0464076[3,] 0.9072054 0.1763961 0.1622189 0.8210076 0.3607278[4,] 0.9354687 0.2387621 0.5893058 0.3329807 0.6604014[5,] 1.0000000 1.5849625 2.0000000 2.3219281 2.5849625[6,] 1.5849625 3.4594316 3.5849625 3.8073549 2.3219281[7,] 1.1794544 0.7070437 0.0000000 0.7070437 1.1794544# 取出最大值、最小值、行數(shù)、列數(shù)> max(a)[1] 13> min(a)[1] -1.264911> nrow(a)[1] 7> ncol(a)[1] 5# 增加一列或一行# cbind: column bind> cbind(a, 1:7) ? ? ? ? ? [,1] ? ? ? [,2] ? ? ? [,3] ? ? ? [,4] ? ? ? ?[,5] [,6][1,] -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243 ? ?1[2,] ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026 ? ?2[3,] ?0.8754092 ?0.1300574 ?0.1190069 ?0.7666394 ?0.28407356 ? ?3[4,] -0.9125118 ?0.1799797 ?0.5045226 ?0.2596132 -0.58052230 ? ?4[5,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000 ? ?5[6,] ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000 ? ?6[7,] -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106 ? ?7> cbind(a, seven=1:7) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? seven[1,] -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243 ? ? 1[2,] ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026 ? ? 2[3,] ?0.8754092 ?0.1300574 ?0.1190069 ?0.7666394 ?0.28407356 ? ? 3[4,] -0.9125118 ?0.1799797 ?0.5045226 ?0.2596132 -0.58052230 ? ? 4[5,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000 ? ? 5[6,] ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000 ? ? 6[7,] -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106 ? ? 7# rbind: row bind> rbind(a,1:5) ? ? ? ? ? [,1] ? ? ? [,2] ? ? ? [,3] ? ? ? [,4] ? ? ? ?[,5][1,] -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243[2,] ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026[3,] ?0.8754092 ?0.1300574 ?0.1190069 ?0.7666394 ?0.28407356[4,] -0.9125118 ?0.1799797 ?0.5045226 ?0.2596132 -0.58052230[5,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000[6,] ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000[7,] -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106[8,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000# 計算每一行的mad (中值絕對偏差,一般認(rèn)為比方差的魯棒性更強,更少受異常值的影響,更能反映數(shù)據(jù)間的差異)> apply(a,1,mad)[1] 0.7923976 2.0327283 0.2447279 0.4811672 1.4826000 4.4478000 0.9376786# 計算每一行的var (方差)# apply表示對數(shù)據(jù)(第一個參數(shù))的每一行 (第二個參數(shù)賦值為1) 或每一列 (2)操作# ? ? ?最后返回一個列表> apply(a,1,var)[1] ?0.6160264 ?1.6811161 ?0.1298913 ?0.3659391 ?2.5000000 22.5000000 ?1.0000000# 計算每一列的平均值> apply(a,2,mean)[1] 0.4519068 1.6689045 2.4395294 2.7179083 1.5753421# 取出中值絕對偏差大于0.5的行> b = a[apply(a,1,mad)>0.5,]> b ? ? ? ? ? [,1] ? ? ? [,2] ? ? ? [,3] ? ? ? [,4] ? ? ? ?[,5][1,] -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243[2,] ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026[3,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000[4,] ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000[5,] -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106# 矩陣按照mad的大小降序排列> c = b[order(apply(b,1,mad), decreasing=T),]> c ? ? ? ? ? [,1] ? ? ? [,2] ? ? ? [,3] ? ? ? [,4] ? ? ? ?[,5][1,] ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000[2,] ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026[3,] ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000[4,] -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106[5,] -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243> rownames(c) colnames(c) c ? ? ? ? ? ? ? ?A ? ? ? ? ?B ? ? ? ? ?C ? ? ? ? ?D ? ? ? ? ? EGene_a ?2.0000000 10.0000000 11.0000000 13.0000000 ?4.00000000Gene_b ?1.8778966 -0.1171794 ?2.9295352 ?1.3383662 -0.03269026Gene_c ?1.0000000 ?2.0000000 ?3.0000000 ?4.0000000 ?5.00000000Gene_d -1.2649111 -0.6324555 ?0.0000000 ?0.6324555 ?1.26491106Gene_e -0.4125356 ?0.1219293 -0.4763589 -0.9717165 ?1.09162243# 矩陣轉(zhuǎn)置> expr = t(c)> expr ?Gene_a ? ? ?Gene_b Gene_c ? ? Gene_d ? ? Gene_eA ? ? ?2 ?1.87789657 ? ? ?1 -1.2649111 -0.4125356B ? ? 10 -0.11717937 ? ? ?2 -0.6324555 ?0.1219293C ? ? 11 ?2.92953522 ? ? ?3 ?0.0000000 -0.4763589D ? ? 13 ?1.33836620 ? ? ?4 ?0.6324555 -0.9717165E ? ? ?4 -0.03269026 ? ? ?5 ?1.2649111 ?1.0916224# 矩陣值的替換> expr2 = expr> expr2[expr2 expr2 ?Gene_a ? Gene_b Gene_c ? ?Gene_d ? ?Gene_eA ? ? ?2 1.877897 ? ? ?1 0.0000000 0.0000000B ? ? 10 0.000000 ? ? ?2 0.0000000 0.1219293C ? ? 11 2.929535 ? ? ?3 0.0000000 0.0000000D ? ? 13 1.338366 ? ? ?4 0.6324555 0.0000000E ? ? ?4 0.000000 ? ? ?5 1.2649111 1.0916224# 矩陣中只針對某一列替換# expr2是個矩陣不是數(shù)據(jù)框,不能使用列名字索引> expr2[expr2$Gene_b expr2 str(expr2)’data.frame’: ? ?5 obs. of ?5 variables: $ Gene_a: num ?2 10 11 13 4 $ Gene_b: num ?1.88 1 2.93 1.34 1 $ Gene_c: num ?1 2 3 4 5 $ Gene_d: num ?0 0 0 0.632 1.265 $ Gene_e: num ?0 0.122 0 0 1.092> expr2[expr2$Gene_b expr2 ?Gene_a ? Gene_b Gene_c ? ?Gene_d ? ?Gene_eA ? ? ?2 1.877897 ? ? ?1 0.0000000 0.0000000B ? ? 10 1.000000 ? ? ?2 0.0000000 0.1219293C ? ? 11 2.929535 ? ? ?3 0.0000000 0.0000000D ? ? 13 1.338366 ? ? ?4 0.6324555 0.0000000E ? ? ?4 1.000000 ? ? ?5 1.2649111 1.0916224
R中矩陣篩選合并
# 讀入樣品信息> sampleInfo = “Samp;Group;Genotype+ A;Control;WT+ B;Control;WT+ D;Treatment;Mutant+ C;Treatment;Mutant+ E;Treatment;WT+ F;Treatment;WT”> phenoData = read.table(text=sampleInfo,sep=”;”, header=T, row.names=1, quote=””)> phenoData ? ? ?Group GenotypeA ? Control ? ? ? WTB ? Control ? ? ? WTD Treatment ? MutantC Treatment ? MutantE Treatment ? ? ? WTF Treatment ? ? ? WT# 把樣品信息按照基因表達(dá)矩陣中的樣品信息排序,并只保留有基因表達(dá)信息的樣品# match() returns a vector of the positions of (first) matches of ? ? ? ? ?its first argument in its second.> phenoData[match(rownames(expr), rownames(phenoData)),] ? ? ?Group GenotypeA ? Control ? ? ? WTB ? Control ? ? ? WTC Treatment ? MutantD Treatment ? MutantE Treatment ? ? ? WT# ‘%in%’ is a more intuitive interface as a binary operator, which ? ? returns a logical vector indicating if there is a match or not for ? ? its left operand.# 注意順序,%in%比match更好理解一些> phenoData = phenoData[rownames(phenoData) %in% rownames(expr),]> phenoData ? ? ?Group GenotypeA ? Control ? ? ? WTB ? Control ? ? ? WTC Treatment ? MutantD Treatment ? MutantE Treatment ? ? ? WT# 合并矩陣# by=0 表示按照行的名字排序# by=columnname 表示按照共有的某一列排序# 合并后多出了新的一列Row.names> merge_data = merge(expr, phenoData, by=0, all.x=T)> merge_data ?Row.names Gene_a ? ? ?Gene_b Gene_c ? ? Gene_d ? ? Gene_e ? ? Group Genotype1 ? ? ? ? A ? ? ?2 ?1.87789657 ? ? ?1 -1.2649111 -0.4125356 ? Control ? ? ? WT2 ? ? ? ? B ? ? 10 -0.11717937 ? ? ?2 -0.6324555 ?0.1219293 ? Control ? ? ? WT3 ? ? ? ? C ? ? 11 ?2.92953522 ? ? ?3 ?0.0000000 -0.4763589 Treatment ? Mutant4 ? ? ? ? D ? ? 13 ?1.33836620 ? ? ?4 ?0.6324555 -0.9717165 Treatment ? Mutant5 ? ? ? ? E ? ? ?4 -0.03269026 ? ? ?5 ?1.2649111 ?1.0916224 Treatment ? ? ? WT> rownames(merge_data) merge_data ?Row.names Gene_a ? ? ?Gene_b Gene_c ? ? Gene_d ? ? Gene_e ? ? Group GenotypeA ? ? ? ? A ? ? ?2 ?1.87789657 ? ? ?1 -1.2649111 -0.4125356 ? Control ? ? ? WTB ? ? ? ? B ? ? 10 -0.11717937 ? ? ?2 -0.6324555 ?0.1219293 ? Control ? ? ? WTC ? ? ? ? C ? ? 11 ?2.92953522 ? ? ?3 ?0.0000000 -0.4763589 Treatment ? MutantD ? ? ? ? D ? ? 13 ?1.33836620 ? ? ?4 ?0.6324555 -0.9717165 Treatment ? MutantE ? ? ? ? E ? ? ?4 -0.03269026 ? ? ?5 ?1.2649111 ?1.0916224 Treatment ? ? ? WT# 去除一列;-1表示去除第一列> merge_data = merge_data[,-1]> merge_data ?Gene_a ? ? ?Gene_b Gene_c ? ? Gene_d ? ? Gene_e ? ? Group GenotypeA ? ? ?2 ?1.87789657 ? ? ?1 -1.2649111 -0.4125356 ? Control ? ? ? WTB ? ? 10 -0.11717937 ? ? ?2 -0.6324555 ?0.1219293 ? Control ? ? ? WTC ? ? 11 ?2.92953522 ? ? ?3 ?0.0000000 -0.4763589 Treatment ? MutantD ? ? 13 ?1.33836620 ? ? ?4 ?0.6324555 -0.9717165 Treatment ? MutantE ? ? ?4 -0.03269026 ? ? ?5 ?1.2649111 ?1.0916224 Treatment ? ? ? WT# 提取出所有的數(shù)值列> merge_data[sapply(merge_data, is.numeric)] ?Gene_a ? ? ?Gene_b Gene_c ? ? Gene_d ? ? Gene_eA ? ? ?2 ?1.87789657 ? ? ?1 -1.2649111 -0.4125356B ? ? 10 -0.11717937 ? ? ?2 -0.6324555 ?0.1219293C ? ? 11 ?2.92953522 ? ? ?3 ?0.0000000 -0.4763589D ? ? 13 ?1.33836620 ? ? ?4 ?0.6324555 -0.9717165E ? ? ?4 -0.03269026 ? ? ?5 ?1.2649111 ?1.0916224
str的應(yīng)用
str: Compactly display the internal structure of an R object, a diagnostic function and an alternative to ‘summary (and to some extent, ‘dput’). Ideally, only one line for each ‘basic’ structure is displayed. It is especially well suited to compactly display the (abbreviated) contents of (possibly nested) lists. The idea is to give reasonable output for any R object. It calls ‘a(chǎn)rgs’ for (non-primitive) function objects.
str用來告訴結(jié)果的構(gòu)成方式,對于不少Bioconductor的包,或者復(fù)雜的R函數(shù)的輸出,都是一堆列表的嵌套,str(complex_result)會輸出每個列表的名字,方便提取對應(yīng)的信息。
# str的一個應(yīng)用例子> str(list(a = “A”, L = as.list(1:100)), list.len = 9)List of 2 $ a: chr “A” $ L:List of 100 ?..$ : int 1 ?..$ : int 2 ?..$ : int 3 ?..$ : int 4 ?..$ : int 5 ?..$ : int 6 ?..$ : int 7 ?..$ : int 8 ?..$ : int 9 ?.. [list output truncated]# 利用str查看pca的結(jié)果,具體的PCA應(yīng)用查看http://mp.weixin.qq.com/s/sRElBMkyR9rGa4TQp9KjNQ> pca_result pca_resultStandard deviations:[1] 4.769900e+00 1.790861e+00 1.072560e+00 1.578391e-01 2.752128e-16Rotation: ? ? ? ? ? ? ? PC1 ? ? ? ? PC2 ? ? ? ? ?PC3 ? ? ? ? PC4 ? ? ? ? PC5Gene_a ?0.99422750 -0.02965529 ?0.078809521 ?0.01444655 ?0.06490461Gene_b ?0.04824368 -0.44384942 -0.885305329 ?0.03127940 ?0.12619948Gene_c ?0.08258192 ?0.81118590 -0.451360828 ?0.05440417 -0.35842886Gene_d -0.01936958 ?0.30237826 -0.079325524 -0.66399283 ?0.67897952Gene_e -0.04460135 ?0.22948437 -0.002097256 ?0.74496081 ?0.62480128> str(pca_result)List of 5 $ sdev ? ?: num [1:5] 4.77 1.79 1.07 1.58e-01 2.75e-16 $ rotation: num [1:5, 1:5] 0.9942 0.0482 0.0826 -0.0194 -0.0446 … ?..- attr(*, “dimnames”)=List of 2 ?.. ..$ : chr [1:5] “Gene_a” “Gene_b” “Gene_c” “Gene_d” … ?.. ..$ : chr [1:5] “PC1” “PC2” “PC3” “PC4” … $ center ?: Named num [1:5] 8 1.229 3 0.379 0.243 ?..- attr(*, “names”)= chr [1:5] “Gene_a” “Gene_b” “Gene_c” “Gene_d” … $ scale ? : logi FALSE $ x ? ? ? : num [1:5, 1:5] -6.08 1.86 3.08 5.06 -3.93 … ?..- attr(*, “dimnames”)=List of 2 ?.. ..$ : chr [1:5] “A” “B” “C” “D” … ?.. ..$ : chr [1:5] “PC1” “PC2” “PC3” “PC4” … – attr(*, “class”)= chr “prcomp”# 取出每個主成分解釋的差異> pca_result$sdev[1] 4.769900e+00 1.790861e+00 1.072560e+00 1.578391e-01 2.752128e-16
R的包管理
# 什么時候需要安裝包> library(‘unExistedPackage’)Error in library(“unExistedPackage”) : ?不存在叫‘unExistedPackage’這個名字的程輯包# 安裝包> install.packages(“package_name”)# 指定安裝來源> install.packages(“package_name”, repo=”http://cran.us.r-project.org”)# 安裝Bioconductor的包> source(‘https://bioconductor.org/biocLite.R’)> biocLite(‘BiocInstaller’)> biocLite(c(“RUVSeq”,”pcaMethods”))# 安裝Github的R包> install.packages(“devtools”)> devtools::install_github(“JustinaZ/pcaReduce”)# 手動安裝, 首先下載包的源文件(壓縮版就可),然后在終端運行下面的命令。ct@ehbio:~$ R CMD INSTALL package.tar.gz# 移除包>remove.packages(“package_name”)# 查看所有安裝的包>library()# 查看特定安裝包的版本> installed.packages()[c(“DESeq2”), c(“Package”, “Version”)] Package ?Version “DESeq2” “1.14.1” > # 查看默認(rèn)安裝包的位置>.libPaths()# 調(diào)用安裝的包>library(package_name)#devtools::install_github(“hms-dbmi/scde”, build_vignettes = FALSE)#install.packages(c(“mvoutlier”,”ROCR”))#biocLite(c(“RUVSeq”,”pcaMethods”,”SC3″,”TSCAN”,”monocle”,”MultiAssayExperiment”,”SummarizedExperiment”))#devtools::install_github(“satijalab/seurat”)