R_session_code.R

#Variables store one element

x <- 25

#R can be used as a calculator


10 + 20

## [1] 30

a = 4

#Or do calculations with a :
a * 5

## [1] 20

#Concatenation function



x <- c(1,2,3,4,5)
x

## [1] 1 2 3 4 5

y <- c("a", "b", "c", "d")
y

## [1] "a" "b" "c" "d"

#Vectors
#Vectors have only 1 dimension and represent enumerated sequence of data. They can also store variables


v1 <- c(1,2,3,4)
mean (v1)

## [1] 2.5

#The elements of a vector are specified/modified with braces (e.g. [number])


v1[1] <- 48
v1

## [1] 48  2  3  4

#Display all workplace objects (variables, vectors etc.) via ls():

a= 10
b =20
ls()

## [1] "a"  "b"  "v1" "x"  "y"

#Useful tip : to save workplace and restore from a file use:

save.image(file="workspace.rda")
load(file="workspace.rda")

#Any function in R has help information

? mean

## starting httpd help server ... done

help(mean)

#Data types

x <- 1
mode(x)

## [1] "numeric"

y <- 3 < 4
y

## [1] TRUE

a = "bioinfo"
mode(a)

## [1] "character"

#The main data objects in R :

#Matrices (single data type)

#Data frames (supports various data types)

#Lists (contains set of vectors)


m <- matrix(0,2,3)
m

##      [,1] [,2] [,3]
## [1,]    0    0    0
## [2,]    0    0    0

#List contain various vectors. Each vectors in the list can be acessed by double braces [[number]]

x <- c(1,2,3,4)
y <- c(2,3,4)
L1 <- list(x,y)
L1

## [[1]]
## [1] 1 2 3 4
## 
## [[2]]
## [1] 2 3 4

#Data frames are similar to matrices but can contain various data types

x <- c(1,5,10)

y <- c("A","B","C")
z <- data.frame(x,y)
z

##    x y
## 1  1 A
## 2  5 B
## 3 10 C

#Duplicate data can be removed during analysis.


duplicated(c(1,2,1,3,1,4))

## [1] FALSE FALSE  TRUE FALSE  TRUE FALSE

#To count the number of observations in each level of factor, we can use the R table() command as below:

data(iris)

table(iris$Species)

## 
##     setosa versicolor  virginica 
##         50         50         50

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

#If you want to combine data from different sources in R, you can combine different sets of data in three ways:

merged <-merge(iris,cars)

head(merged)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species speed dist
## 1          5.1         3.5          1.4         0.2  setosa     4    2
## 2          4.9         3.0          1.4         0.2  setosa     4    2
## 3          4.7         3.2          1.3         0.2  setosa     4    2
## 4          4.6         3.1          1.5         0.2  setosa     4    2
## 5          5.0         3.6          1.4         0.2  setosa     4    2
## 6          5.4         3.9          1.7         0.4  setosa     4    2

#To retrieve 5 rows and all columns of already built-in dataset iris, the below command, is used:

data(iris)

iris[1:5, ]

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa

#R standard graphics available through package graphics, include several functions that provide statistical plots



plot(iris$Sepal.Length,iris$Petal.Length)

#A histogram is used to plot a continuous variable.

hist(iris[,1])

hist(iris[,2])

hist(iris[,3])

#boxplot
x <- c(1,5,10)
y <- c(20,30,40)
boxplot(x,y)

#We use heatmap for the intensity of colours. It is also used to display a relationship between two or three or many variables in a two-dimensional image
iris_filtered <- iris[,1:4]

heatmap(as.matrix(iris_filtered))



#circular plots in r

library("circlize")

## ========================================
## circlize version 0.4.10
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================

mat = matrix(rnorm(36), 6, 6)

rownames(mat) = paste0("R", 1:6)

colnames(mat) = paste0("C", 1:6)



#draw network by igraph
#Define Nodes
nodes=cbind('id'=c('Fermenters','Methanogens','carbs','CO2','H2','other','CH4','H2O'),
            'type'=c(rep('Microbe',2),rep('nonBio',6)))
nodes

##      id            type     
## [1,] "Fermenters"  "Microbe"
## [2,] "Methanogens" "Microbe"
## [3,] "carbs"       "nonBio" 
## [4,] "CO2"         "nonBio" 
## [5,] "H2"          "nonBio" 
## [6,] "other"       "nonBio" 
## [7,] "CH4"         "nonBio" 
## [8,] "H2O"         "nonBio"

#Define Links
links=cbind('from'=c('carbs',rep('Fermenters',3),rep('Methanogens',2),'CO2','H2'),
            'to'=c('Fermenters','other','CO2','H2','CH4','H2O',rep('Methanogens',2)),
            'type'=c('uptake',rep('output',5),rep('uptake',2)),
            'weight'=rep(1,8))
#Make the network

library(ggridges)
library(ggplot2)
library(igraph)

## 
## Attaching package: 'igraph'
## 
## The following object is masked from 'package:circlize':
## 
##     degree
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union

net = graph_from_data_frame(links,vertices = nodes,directed = T)
plot(net)

#Change Appearance

colrs.v = c(nonBio = "lightblue",Microbe = "gold") #node colours
V(net)$color = colrs.v[V(net)$type]

colrs.e = c(output = "grey", uptake = "magenta") #edge colours
E(net)$color = colrs.e[E(net)$type]

plot(net, edge.curved=0.2,vertex.size=30) #make nodes bigger, curve arrows

#Density ridgeline plots

library(ggridges)
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges()

## Picking joint bandwidth of 0.181

#We can also specify quantiles by cut points rather than number. E.g., we can indicate the 2.5% and 97.5% tails

ggplot(iris, aes(x = Sepal.Length, y = Species)) + stat_density_ridges(quantile_lines = TRUE, quantiles = c(0.025, 0.975), alpha = 0.7)

## Picking joint bandwidth of 0.181

#another plot

ggplot(iris, aes(x=Sepal.Length, y=Species, fill = factor(stat(quantile)))) + stat_density_ridges( geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = 4, quantile_lines = TRUE ) + scale_fill_viridis_d(name = "Quartiles")

## Picking joint bandwidth of 0.181

#We can use the same approach to highlight the tails of the distributions.


ggplot(iris, aes(x = Sepal.Length, y = Species, fill = factor(stat(quantile)))) +
  stat_density_ridges(
    geom = "density_ridges_gradient",
    calc_ecdf = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  scale_fill_manual(
    name = "Probability", values = c("#FF0000A0", "#A0A0A0A0", "#0000FFA0"),
    labels = c("(0, 0.025]", "(0.025, 0.975]", "(0.975, 1]")
  )

## Picking joint bandwidth of 0.181

#Data transformation


library("dplyr")

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

starwars %>%   filter(species == "Droid")

## # A tibble: 6 x 14
##   name  height  mass hair_color skin_color eye_color birth_year sex   gender
##   <chr>  <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
## 1 C-3PO    167    75 <NA>       gold       yellow           112 none  mascu~
## 2 R2-D2     96    32 <NA>       white, bl~ red               33 none  mascu~
## 3 R5-D4     97    32 <NA>       white, red red               NA none  mascu~
## 4 IG-88    200   140 none       metal      red               15 none  mascu~
## 5 R4-P~     96    NA none       silver, r~ red, blue         NA none  femin~
## 6 BB8       NA    NA none       none       black             NA none  mascu~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

R_session_code.R

archana

2020-09-14