#Variables store one element
x <- 25
#R can be used as a calculator
10 + 20
## [1] 30
a = 4
#Or do calculations with a :
a * 5
## [1] 20
#Concatenation function
x <- c(1,2,3,4,5)
x
## [1] 1 2 3 4 5
y <- c("a", "b", "c", "d")
y
## [1] "a" "b" "c" "d"
#Vectors
#Vectors have only 1 dimension and represent enumerated sequence of data. They can also store variables
v1 <- c(1,2,3,4)
mean (v1)
## [1] 2.5
#The elements of a vector are specified/modified with braces (e.g. [number])
v1[1] <- 48
v1
## [1] 48 2 3 4
#Display all workplace objects (variables, vectors etc.) via ls():
a= 10
b =20
ls()
## [1] "a" "b" "v1" "x" "y"
#Useful tip : to save workplace and restore from a file use:
save.image(file="workspace.rda")
load(file="workspace.rda")
#Any function in R has help information
? mean
## starting httpd help server ... done
help(mean)
#Data types
x <- 1
mode(x)
## [1] "numeric"
y <- 3 < 4
y
## [1] TRUE
a = "bioinfo"
mode(a)
## [1] "character"
#The main data objects in R :
#Matrices (single data type)
#Data frames (supports various data types)
#Lists (contains set of vectors)
m <- matrix(0,2,3)
m
## [,1] [,2] [,3]
## [1,] 0 0 0
## [2,] 0 0 0
#List contain various vectors. Each vectors in the list can be acessed by double braces [[number]]
x <- c(1,2,3,4)
y <- c(2,3,4)
L1 <- list(x,y)
L1
## [[1]]
## [1] 1 2 3 4
##
## [[2]]
## [1] 2 3 4
#Data frames are similar to matrices but can contain various data types
x <- c(1,5,10)
y <- c("A","B","C")
z <- data.frame(x,y)
z
## x y
## 1 1 A
## 2 5 B
## 3 10 C
#Duplicate data can be removed during analysis.
duplicated(c(1,2,1,3,1,4))
## [1] FALSE FALSE TRUE FALSE TRUE FALSE
#To count the number of observations in each level of factor, we can use the R table() command as below:
data(iris)
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#If you want to combine data from different sources in R, you can combine different sets of data in three ways:
merged <-merge(iris,cars)
head(merged)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species speed dist
## 1 5.1 3.5 1.4 0.2 setosa 4 2
## 2 4.9 3.0 1.4 0.2 setosa 4 2
## 3 4.7 3.2 1.3 0.2 setosa 4 2
## 4 4.6 3.1 1.5 0.2 setosa 4 2
## 5 5.0 3.6 1.4 0.2 setosa 4 2
## 6 5.4 3.9 1.7 0.4 setosa 4 2
#To retrieve 5 rows and all columns of already built-in dataset iris, the below command, is used:
data(iris)
iris[1:5, ]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
#R standard graphics available through package graphics, include several functions that provide statistical plots
plot(iris$Sepal.Length,iris$Petal.Length)

#A histogram is used to plot a continuous variable.
hist(iris[,1])

hist(iris[,2])

hist(iris[,3])

#boxplot
x <- c(1,5,10)
y <- c(20,30,40)
boxplot(x,y)

#We use heatmap for the intensity of colours. It is also used to display a relationship between two or three or many variables in a two-dimensional image
iris_filtered <- iris[,1:4]
heatmap(as.matrix(iris_filtered))
#circular plots in r
library("circlize")
## ========================================
## circlize version 0.4.10
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================

mat = matrix(rnorm(36), 6, 6)
rownames(mat) = paste0("R", 1:6)
colnames(mat) = paste0("C", 1:6)
#draw network by igraph
#Define Nodes
nodes=cbind('id'=c('Fermenters','Methanogens','carbs','CO2','H2','other','CH4','H2O'),
'type'=c(rep('Microbe',2),rep('nonBio',6)))
nodes
## id type
## [1,] "Fermenters" "Microbe"
## [2,] "Methanogens" "Microbe"
## [3,] "carbs" "nonBio"
## [4,] "CO2" "nonBio"
## [5,] "H2" "nonBio"
## [6,] "other" "nonBio"
## [7,] "CH4" "nonBio"
## [8,] "H2O" "nonBio"
#Define Links
links=cbind('from'=c('carbs',rep('Fermenters',3),rep('Methanogens',2),'CO2','H2'),
'to'=c('Fermenters','other','CO2','H2','CH4','H2O',rep('Methanogens',2)),
'type'=c('uptake',rep('output',5),rep('uptake',2)),
'weight'=rep(1,8))
#Make the network
library(ggridges)
library(ggplot2)
library(igraph)
##
## Attaching package: 'igraph'
##
## The following object is masked from 'package:circlize':
##
## degree
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
net = graph_from_data_frame(links,vertices = nodes,directed = T)
plot(net)

#Change Appearance
colrs.v = c(nonBio = "lightblue",Microbe = "gold") #node colours
V(net)$color = colrs.v[V(net)$type]
colrs.e = c(output = "grey", uptake = "magenta") #edge colours
E(net)$color = colrs.e[E(net)$type]
plot(net, edge.curved=0.2,vertex.size=30) #make nodes bigger, curve arrows

#Density ridgeline plots
library(ggridges)
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges()
## Picking joint bandwidth of 0.181

#We can also specify quantiles by cut points rather than number. E.g., we can indicate the 2.5% and 97.5% tails
ggplot(iris, aes(x = Sepal.Length, y = Species)) + stat_density_ridges(quantile_lines = TRUE, quantiles = c(0.025, 0.975), alpha = 0.7)
## Picking joint bandwidth of 0.181

#another plot
ggplot(iris, aes(x=Sepal.Length, y=Species, fill = factor(stat(quantile)))) + stat_density_ridges( geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = 4, quantile_lines = TRUE ) + scale_fill_viridis_d(name = "Quartiles")
## Picking joint bandwidth of 0.181

#We can use the same approach to highlight the tails of the distributions.
ggplot(iris, aes(x = Sepal.Length, y = Species, fill = factor(stat(quantile)))) +
stat_density_ridges(
geom = "density_ridges_gradient",
calc_ecdf = TRUE,
quantiles = c(0.025, 0.975)
) +
scale_fill_manual(
name = "Probability", values = c("#FF0000A0", "#A0A0A0A0", "#0000FFA0"),
labels = c("(0, 0.025]", "(0.025, 0.975]", "(0.975, 1]")
)
## Picking joint bandwidth of 0.181

#Data transformation
library("dplyr")
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
starwars %>% filter(species == "Droid")
## # A tibble: 6 x 14
## name height mass hair_color skin_color eye_color birth_year sex gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 C-3PO 167 75 <NA> gold yellow 112 none mascu~
## 2 R2-D2 96 32 <NA> white, bl~ red 33 none mascu~
## 3 R5-D4 97 32 <NA> white, red red NA none mascu~
## 4 IG-88 200 140 none metal red 15 none mascu~
## 5 R4-P~ 96 NA none silver, r~ red, blue NA none femin~
## 6 BB8 NA NA none none black NA none mascu~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>