les vecteurs :
## [1] "numeric"
## [1] "character"
les vecteurs, manipulations de bases :
## [1] 3
## [1] 1 5
## [1] 1 5
## [1] "a" "t" "c" "g"
## [1] "a" "a" "a" "a" "a"
## [1] "a" "g" "a" "g" "a" "g" "a" "g" "a" "g"
## [1] "a" "a" "a" "g" "g" "g"
## [1] "g" "g" "g"
les vecteurs, manipulations de bases :
## [1] "a" "t" "g" "c"
## [1] 1 3 2 2 4 3 3
## attr(,"levels")
## [1] "a" "t" "g" "c"
# matrice d'entier
a = matrix(c(1,5,10,10),2,2)
# de chaine de caractère
b = rbind(c("a","g"),c("t","t"),c("c","g"),c("t","g"))
c = cbind(c("a","g"),c("t","t"),c("c","g"),c("t","g"))
## [1] 4 2
## [,1] [,2] [,3] [,4]
## [1,] "a" "t" "c" "t"
## [2,] "g" "t" "g" "g"
## [1] 2 4
## [1] 1 10
## [1] "g" "t" "g" "g"
## [1] "a" "t" "c" "t"
## [,1] [,2]
## [1,] 0.41951819 0.01465429
## [2,] 0.24720419 0.51544183
## [3,] 0.56106883 0.86548252
## [4,] 0.90346387 0.26716315
## [5,] 0.04786057 0.04909399
## [,1] [,2]
## [1,] 0.04786057 0.04909399
## [2,] 0.12409573 0.67136019
## [3,] 0.02023296 0.06524041
## [4,] 0.02384230 0.70520791
## [5,] 0.26311237 0.50907077
## [1] 0.247204194 0.008334732 0.836343959 0.327485138 0.779676456
## [1] 3
## [,1] [,2]
## [1,] "a" "g"
## [2,] "t" "t"
## [3,] "c" "g"
## [4,] "t" "g"
## [,1] [,2] [,3] [,4]
## [1,] "a" "t" "c" "t"
## [2,] "g" "t" "g" "g"
## [,1] [,2]
## [1,] "a" "g"
## [2,] "t" "t"
## [3,] "c" "g"
## [4,] "t" "g"
d = data.frame(v1=rep("a",10),v2=1:10,v3=runif(10))
dim(d)
d$v1
d$v4 = factor(rep(c("a","b"),5),levels=c("a","b"))
d[d$v4=="a",]
d[,"v2"]
d[,c(3,1)]
d[,c("v2","v4")]
names(d)
summary(d)
f = function(a,b){
return(a-b)
}
f(5,6)
f(b=5,a=6)
f = function(a=32,b=12){
a-b
}
f()
f(5,6)
f(b=5,a=6)
! éviter les boucles for, while préférer les opérations vectorielle
## Time difference of 0.0577395 secs
Version vectorielle
## Time difference of 0.001916409 secs
## [1] 30.12901
somme (sum), somme cumulée (cumsum), différences finies (diff), max, min …
somme (sum), somme cumulée (cumsum), différences finies (diff), max, min …
Appliquer une fonction à chaque élément d’un objet
a=data.frame(v1=runif(5000),v2=rnorm(5000),v3=rbinom(5000,5,0.2))
# appliquer à chaque lignes
r=apply(a,1,sum)
head(r);class(r);dim(r)
# appliquer à chaque colonnes
r=apply(a,2,function(col){c(max(col),which.max(col))})
r;class(r);dim(r)
# appliquer à tous les éléments d'une liste
b=list(v1=runif(5000),v2=rnorm(5000),v3=rbinom(5000,5,0.2))
r=lapply(b,which.max)
r;class(r)
r=sapply(b,which.max)
r;class(r)
à préférer aux boucles…
Sélectionner une partie des données
## v1 v2 v3
## 2045 0.9983715 -1.29662175 3
## 2347 0.9930926 -1.85392911 3
## 4118 0.9995121 0.56747385 3
## 4839 0.9822671 -0.12438954 3
## 4937 0.9916997 1.50546079 3
## 4963 0.9913533 -0.02220117 3
## v1 v2 v3
## 2045 0.9983715 -1.29662175 3
## 2347 0.9930926 -1.85392911 3
## 4118 0.9995121 0.56747385 3
## 4839 0.9822671 -0.12438954 3
## 4937 0.9916997 1.50546079 3
## 4963 0.9913533 -0.02220117 3
Prétraiter les variables pour construires des facteurs // intervalles
## [1] "factor"
## [1] (-2,1] (1,2] (2, Inf] (1,2] (-2,1] (-2,1]
## Levels: (-Inf,-3] (-3,-2] (-2,1] (1,2] (2, Inf]
a=data.frame(id=1:500,val1=runif(500))
b=data.frame(id=sample(500,500),val2=runif(500))
# jointure par colonne de même nom
c=merge(a,b)
# recherche des indices de correspondances
match(a$id,b$id)[1:10]
## [1] 288 124 221 262 102 48 32 74 379 123
## [1] 0
# matching multiples
b=data.frame(id=sample(500,1000,replace=T),val2=runif(1000))
length(match(a$id,b$id))
## [1] 500
## [1] 1000
## [1] FALSE FALSE TRUE TRUE TRUE TRUE
## [1] 500
## [1] 1000 3
## [1] 1079 3
a=data.frame(id=1:500,val1=runif(500),val2=factor(rbinom(500,5,0.4)))
aggregate(a$val1,list(a$val2),sum)
## Group.1 x
## 1 0 17.665480
## 2 1 52.128483
## 3 2 92.693259
## 4 3 58.952624
## 5 4 19.455376
## 6 5 3.375534
## $`0`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.02177 0.36011 0.50975 0.51957 0.73167 0.99934
##
## $`1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001724 0.213052 0.491894 0.482671 0.726621 0.990338
##
## $`2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003318 0.251669 0.537985 0.512118 0.743248 0.997407
##
## $`3`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002744 0.222104 0.431882 0.453482 0.687431 0.969718
##
## $`4`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003815 0.160875 0.442334 0.474521 0.781798 0.978638
##
## $`5`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1373 0.4148 0.6288 0.5626 0.7042 0.9079
## : 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.02177 0.36011 0.50975 0.51957 0.73167 0.99934
## --------------------------------------------------------
## : 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001724 0.213052 0.491894 0.482671 0.726621 0.990338
## --------------------------------------------------------
## : 2
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003318 0.251669 0.537985 0.512118 0.743248 0.997407
## --------------------------------------------------------
## : 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002744 0.222104 0.431882 0.453482 0.687431 0.969718
## --------------------------------------------------------
## : 4
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003815 0.160875 0.442334 0.474521 0.781798 0.978638
## --------------------------------------------------------
## : 5
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1373 0.4148 0.6288 0.5626 0.7042 0.9079
##
## 0 1 2 3 4 5
## 34 108 181 130 41 6
## val3
## val2 a c g t
## 0 4 11 8 11
## 1 34 19 30 25
## 2 52 48 42 39
## 3 24 34 35 37
## 4 9 13 7 12
## 5 2 0 3 1