Last update: 24-10-16
Last markdown compiled: 24-10-16
# version: 22.03.24
knitr::opts_chunk$set(warning = FALSE, message = FALSE, results = FALSE)
# dirs
## Sebastian @Linux
dir_moa <- "/home/isar/Dropbox/scn_moa/"
## Sebastian @MAC
# data, revised 2024.01.09
# exp
load(file = paste0(dir_moa, "/data/final/", "gnp.pro.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mrn.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mic.RData"))
# results
load(file = paste0(dir_moa, "/data/final/", "dex.all.RData"))
load(file = paste0(dir_moa, "/data/final/", "timeomics.singles.RData"))
load(file = paste0(dir_moa, "/data/final/", "res.wgcna.RData"))
## packages
source(paste0(dir_moa, "/scripts/", "scn_moa_fct.R"))
# load packagea
sapply(packages_general, require, character.only = TRUE)
sapply(packages_enrichment, require, character.only = TRUE)
# additional packages (move at the end to package script)
library("viridis")
library(ggtext)
library(glue)
# extract data
fan <- gnp.mrn$fan
# ENTREZ
dex.all$dex.mrn$dex_stage_mrn_vsn$ENTREZ <- fan$ENTREZ[match(dex.all$dex.mrn$dex_stage_mrn_vsn$fid, fan$ENSG)]
dex.all$dex.pro$dex_stage_pro_vsn$ENTREZ <- fan$ENTREZ[match(dex.all$dex.pro$dex_stage_pro_vsn$fid, fan$UNIPROT)]
# ENSG
dex.all$dex.mrn$dex_stage_mrn_vsn$ENSG <- dex.all$dex.mrn$dex_stage_mrn_vsn$fid
dex.all$dex.pro$dex_stage_pro_vsn$ENSG <- fan$ENSG[match(dex.all$dex.pro$dex_stage_pro_vsn$fid, fan$UNIPROT)]
# SYMBOL
dex.all$dex.mrn$dex_stage_mrn_vsn$SYMBOL <- fan$SYMBOL[match(dex.all$dex.mrn$dex_stage_mrn_vsn$fid, fan$ENSG)]
dex.all$dex.pro$dex_stage_pro_vsn$SYMBOL <- fan$SYMBOL[match(dex.all$dex.pro$dex_stage_pro_vsn$fid, fan$UNIPROT)]
#### backgrounds
universe_mrn.ENSG <- unique(dex.all$dex.mrn$dex_stage_mrn_vsn$ENSG)
universe_pro.ENSG <- unique(dex.all$dex.pro$dex_stage_pro_vsn$ENSG)
universe_combo.ENSG <- unique(c(universe_mrn.ENSG, universe_pro.ENSG))
universe_mrn.ENTREZ <- unique(dex.all$dex.mrn$dex_stage_mrn_vsn$ENTREZ)
universe_pro.ENTREZ <- unique(dex.all$dex.pro$dex_stage_pro_vsn$ENTREZ)
universe_combo.ENTREZ <- unique(c(universe_mrn.ENTREZ, universe_pro.ENTREZ))
universe_mrn.SYMBOL <- fan$SYMBOL[match(universe_mrn.ENSG, fan$ENSG)]
universe_pro.SYMBOL <- fan$SYMBOL[match(universe_pro.ENSG, fan$ENSG)]
# remove invalid universe entries
universe_mrn.SYMBOL <- universe_mrn.SYMBOL[-c(2612)]
# write_clip(universe_mrn.SYMBOL)
# dex results: Fc filtered as ENSG vectors
get.dex.FC_ENSG <- function(dex, COMPoi, DIRECTION, FClevel){
dex_comp <- dex %>% filter(comparison %in% COMPoi)
if(DIRECTION == 'up'){
foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC >= FClevel) %>% arrange(desc(logFC)) %>% pull(ENSG)
}
if(DIRECTION == 'down'){
foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC <= -FClevel) %>% arrange(logFC) %>% pull(ENSG)
}
#fid <- fid[!is.na(fid)]
return(foi)
}
# dex results: Fc filtered as ENSG vectors
get.dex.FC_miRs <- function(dex, COMPoi, DIRECTION, FClevel){
dex_comp <- dex %>% filter(comparison %in% COMPoi)
if(DIRECTION == 'up'){
foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC >= FClevel) %>% arrange(desc(logFC)) %>% pull(fid)
}
if(DIRECTION == 'down'){
foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC <= -FClevel) %>% arrange(logFC) %>% pull(fid)
}
#fid <- fid[!is.na(fid)]
return(foi)
}
# fct for ENSG vector lists
apply_enrichment.GO <- function(l.dex, UNIV){
rs <- compareCluster(geneClusters = l.dex,
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
keyType = "ENSEMBL",
OrgDb = "org.Hs.eg.db",
readable = TRUE,
ont = "ALL",
fun = "enrichGO",
universe = UNIV)
rs <- rs@compareClusterResult
return(rs)
}
### for ENSEMBL
apply_enrichment.pathway <- function(l.dex, UNIV){
rs <- compareCluster(geneClusters = l.dex,
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
# keyType = "ENSEMBL",
#OrgDb = "org.Hs.eg.db",
readable = TRUE,
# ont = "ALL",
fun = "enrichPathway",
universe = UNIV)
rs <- rs@compareClusterResult
return(rs)
}
## fct enrich KEGG
apply_enrichment.kegg <- function(l.dex, UNIV){
rs <- compareCluster(geneClusters = l.dex,
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
# keyType = "ENSEMBL",
#OrgDb = "org.Hs.eg.db",
#readable = TRUE,
# ont = "ALL",
fun = "enrichKEGG",
universe = UNIV)
rs <- rs@compareClusterResult
return(rs)
}
## format enriched results
# add_fraction <- function(rs.enriched){ # not needed anymore I think
# rs.CP %<>% separate(col = BgRatio, into = c("inGroup", "background"))
# rs.CP$inGroup <- as.numeric(rs.CP$inGroup)
# rs.CP$Count <- as.numeric(rs.CP$Count)
# rs.CP$frac <- round(rs.CP$Count / rs.CP$inGroup * 100,0)
# return(rs.CP)
# }
add.enrichFactors <- function(rs.enriched){
rs.enriched %<>% mutate(richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))
rs.enriched %<>% mutate(FoldEnrichment = parse_ratio(GeneRatio) / parse_ratio(BgRatio))
return(rs.enriched)
}
# inspection functions
find_topPval.GO <- function(rs.all, TOPn){
topPval.MF <- rs.all %>%
filter(ONTOLOGY == 'MF') %>%
group_by(Cluster, ONTOLOGY) %>%
slice_min(p.adjust, n=TOPn) %>% pull(ID)
topPval.BP <- rs.all %>%
filter(ONTOLOGY == 'BP') %>%
group_by(Cluster, ONTOLOGY) %>%
slice_min(p.adjust, n=TOPn) %>% pull(ID)
topPval.CC <- rs.all %>%
filter(ONTOLOGY == 'CC') %>%
group_by(Cluster, ONTOLOGY) %>%
slice_min(p.adjust, n=TOPn) %>% pull(ID)
vec <- unique(c(topPval.MF, topPval.BP, topPval.CC))
return(vec)
}
inspect.top.GO <- function(rs.CP, topP){
dat <- rs.CP %>%
filter(ID %in% topP)
#dat$Description <- factor(dat$Description, levels = orderY)
#dat$DESC <- stringr::str_wrap(dat$Description, termLength)
#dat$DESC <- factor(dat$DESC, levels = rev(LEVELS))
dat %>%
ggplot(aes(x= Cluster, y = Description, color = Cluster)) +
geom_point(aes(size = Count)) +
scale_size(range = c(1, 5)) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
axis.title.x = element_blank()) +
scale_color_manual(values = colors_progression.arrow)
#ggtitle(title)
}
inspect.selTerms.GO <- function(rs.enrichment, selTerms){
dat <- rs.enrichment %>%
# filter(ID %in% topP) %>%
filter(Description %in% selTerms)
dat %>%
ggplot(aes(x= Cluster, y = Description, color = Cluster)) +
geom_point(aes(size = FoldEnrichment)) +
scale_size(range = c(1, 5)) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
axis.title.x = element_blank()) +
scale_color_manual(values = colors_progression.arrow)
#ggtitle(title)
}
# final plot
plot.GO_final <- function(rs.CP, fondSize, LEVELS, HJUST){
p <- rs.CP %>%
ggplot(aes(x= Cluster, y = factor(DESC, levels = LEVELS), color = Cluster, label = Count)) +
geom_point(aes(size = 3)) +
geom_text(color = 'black', hjust = HJUST, size = 5) +
# scale_size(range = c(5, 15)) +
theme_light(base_size=20) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
axis.title.x = element_blank() ,
axis.title.y = element_blank() ,
text = element_text(size=fondSize)) +
scale_color_manual(values = colors_progression.arrow, guide = 'none') +
scale_size(range = c(4, 10)) +
theme(legend.position = "none")
# theme(
# legend.position = c(.95, .95),
# legend.justification = c("right", "top"),
# legend.box.just = "right",
# legend.margin = margin(6, 6, 6, 6)) +
# labs(size="Fold\nEnrichment")
return(p)
}
Goal: lists with threshold filtered ENSG & ENTREZ vectors.
# Fc1
l.dex.mrn_ENSG_up <- list(
'MB->PM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MBtoPM', 'up', 1),
'PM->MC' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'PMtoMC', 'up', 1),
'MC->MM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MCtoMM', 'up', 1),
'MM->B' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MMtoB', 'up', 1),
'B->S' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'BtoS', 'up', 1),
'S->PMN' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'StoPMN', 'up', 1)
)
l.dex.mrn_ENSG_dn <- list(
'MB->PM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MBtoPM', 'down', 1),
'PM->MC' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'PMtoMC', 'down', 1),
'MC->MM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MCtoMM', 'down', 1),
'MM->B' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MMtoB', 'down', 1),
'B->S' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'BtoS', 'down', 1),
'S->PMN' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'StoPMN', 'down', 1)
)
## pro
l.dex.pro_ENSG_up <- list(
'MB->PM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MBtoPM', 'up', 1),
'PM->MC' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'PMtoMC', 'up', 1),
'MC->MM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MCtoMM', 'up', 1),
'MM->B' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MMtoB', 'up', 1),
'B->PMN' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'BtoPMN', 'up', 1)
)
l.dex.pro_ENSG_dn <- list(
'MB->PM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MBtoPM', 'down', 1),
'PM->MC' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'PMtoMC', 'down', 1),
'MC->MM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MCtoMM', 'down', 1),
'MM->B' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MMtoB', 'down', 1),
'B->PMN' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'BtoPMN', 'down', 1)
)
# change fid
l.dex.mrn_ENTREZ_up <- purrr::map(l.dex.mrn_ENSG_up, turn.ENSGintoENTREZ)
l.dex.mrn_ENTREZ_dn <- purrr::map(l.dex.mrn_ENSG_dn, turn.ENSGintoENTREZ)
l.dex.pro_ENTREZ_up <- purrr::map(l.dex.pro_ENSG_up, turn.ENSGintoENTREZ)
l.dex.pro_ENTREZ_dn <- purrr::map(l.dex.pro_ENSG_dn, turn.ENSGintoENTREZ)
Goal: apply enrichment functions to the lists of feature vectors
# loading saved data below. takes very long
# # GO TERMS
# rs.GO.dex_mrn.up <- apply_enrichment.GO(l.dex.mrn_ENSG_up, universe_mrn.ENSG)
# rs.GO.dex_mrn.dn <- apply_enrichment.GO(l.dex.mrn_ENSG_dn, universe_mrn.ENSG)
#
# rs.GO.dex_pro.up <- apply_enrichment.GO(l.dex.pro_ENSG_up, universe_pro.ENSG)
# rs.GO.dex_pro.dn <- apply_enrichment.GO(l.dex.pro_ENSG_dn, universe_pro.ENSG)
#
#
# # pathways
# rs.PW.dex_mrn.up <- apply_enrichment.pathway(l.dex.mrn_ENTREZ_up, universe_mrn.ENTREZ)
# rs.PW.dex_mrn.dn <- apply_enrichment.pathway(l.dex.mrn_ENTREZ_dn, universe_mrn.ENTREZ)
#
# rs.PW.dex_pro.up <- apply_enrichment.pathway(l.dex.pro_ENTREZ_up, universe_pro.ENTREZ)
# rs.PW.dex_pro.dn <- apply_enrichment.pathway(l.dex.pro_ENTREZ_dn, universe_pro.ENTREZ)
#
#
# # kegg
# rs.KG.dex_mrn.up <- apply_enrichment.kegg(l.dex.mrn_ENTREZ_up, universe_mrn.ENTREZ)
# rs.KG.dex_mrn.dn <- apply_enrichment.kegg(l.dex.mrn_ENTREZ_dn, universe_mrn.ENTREZ)
#
# rs.KG.dex_pro.up <- apply_enrichment.kegg(l.dex.pro_ENTREZ_up, universe_pro.ENTREZ)
# rs.KG.dex_pro.dn <- apply_enrichment.kegg(l.dex.pro_ENTREZ_dn, universe_pro.ENTREZ)
#
#
# res.enrichment.dex <- list(
# #GO
# rs.GO.dex_mrn.up = rs.GO.dex_mrn.up,
# rs.GO.dex_mrn.dn = rs.GO.dex_mrn.dn,
# rs.GO.dex_pro.up = rs.GO.dex_pro.up,
# rs.GO.dex_pro.dn = rs.GO.dex_pro.dn,
# # pathways
# rs.PW.dex_mrn.up = rs.PW.dex_mrn.up,
# rs.PW.dex_mrn.dn = rs.PW.dex_mrn.dn,
# rs.PW.dex_pro.up = rs.PW.dex_pro.up,
# rs.PW.dex_pro.dn = rs.PW.dex_pro.dn,
# # kegg
# rs.KG.dex_mrn.up = rs.KG.dex_mrn.up,
# rs.KG.dex_mrn.dn = rs.KG.dex_mrn.dn,
# rs.KG.dex_pro.up = rs.KG.dex_pro.up,
# rs.KG.dex_pro.dn = rs.KG.dex_pro.dn
# )
# save(res.enrichment.dex,
# file = paste0(dir_moa, "/data/final/preps/enrichment/", "res.enrichment.dex.RData"))
load(file = paste0(dir_moa, "/data/final/preps/enrichment/", "res.enrichment.dex.RData"))
# copy & add enrichFactor
rs.GO.mrn_UP <- res.enrichment.dex$rs.GO.dex_mrn.up
rs.GO.mrn_UP <- add.enrichFactors(rs.GO.mrn_UP)
## inspect top terms
# find
top.GO_mrn_UP <- find_topPval.GO(rs.GO.mrn_UP, 2)
# plot
inspect.top.GO(rs.GO.mrn_UP, top.GO_mrn_UP)
# check & inspect frequency of terms
terms.mrn.UP <- rs.GO.mrn_UP %>%
count('Description') %>%
arrange(desc(freq))
# select terms to show:
selTerms_mrn.UP <- c(
'azurophil granule',
'specific granule',
'tertiary granule',
'ficolin-1-rich granule',
'cell-cell adhesion',
#'signaling receptor activity',
#'cell surface',
'mitochondrial matrix'
)
# inspect selTerms
inspect.selTerms.GO(rs.GO.mrn_UP, selTerms_mrn.UP)
# define order of visible terms and line breaks
selTerms_freq_mrn.UP <- rs.GO.mrn_UP %>%
#filter(ID %in% topP_mrn_UP) %>%
filter(Description %in% selTerms_mrn.UP) %>%
#select(Description, Cluster) %>%
#arrange(Description) %>%
count('Description') %>%
arrange(desc(freq))
rs.GO.mrn_UP %>%
#filter(ID %in% topP_mrn_UP) %>%
filter(Description %in% selTerms_mrn.UP) %>%
select(Description, Cluster) %>%
left_join(selTerms_freq_mrn.UP, by = "Description") %>%
mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
arrange(desc(freq))
# define levels for plot
rs.GO.mrn_UP.fin <- rs.GO.mrn_UP %>%
filter(Description %in% selTerms_mrn.UP) %>%
mutate(DESC = stringr::str_wrap(Description, 16))
# rename to shorten (optional)
## reformat and define levels
rs.GO.mrn_UP.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.mrn_UP.fin$DESC, perl = TRUE)
unique(rs.GO.mrn_UP.fin$DESC)
LEVELS.mrn.UP <- rev(c(
"Azurophil\ngranule",
'Specific granule',
'Tertiary granule',
"Ficolin-1-rich\ngranule",
"Cell-cell\nadhesion",
"Mitochondrial\nmatrix"
))
rs.GO.mrn_UP.fin$DESC <- factor(rs.GO.mrn_UP.fin$DESC, levels = LEVELS.mrn.UP)
# plot
p.GO_mrn.UP <- plot.GO_final(rs.GO.mrn_UP.fin, 20, LEVELS.mrn.UP, -0.6)
p.GO_mrn.UP
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_mrn.UP", ".svg"),
plot = p.GO_mrn.UP,
device = svg,
width = 6.5,
height = 6)
# copy & add enrichFactor
rs.GO.mrn_DN <- res.enrichment.dex$rs.GO.dex_mrn.dn
rs.GO.mrn_DN <- add.enrichFactors(rs.GO.mrn_DN)
## inspect top terms
# find
top.GO_mrn_DN <- find_topPval.GO(rs.GO.mrn_DN, 2)
# plot
inspect.top.GO(rs.GO.mrn_DN, top.GO_mrn_DN)
# check & inspect frequency of terms
terms.mrn.DN <- rs.GO.mrn_DN %>%
count('Description') %>%
arrange(desc(freq))
# select terms to show:
selTerms_mrn.DN <- c(
'ribosome',
'specific granule',
'azurophil granule',
'cell surface',
'DNA-binding transcription factor activity, RNA polymerase II-specific',
'tertiary granule',
'translation',
'mitochondrial matrix'
)
# inspect second plot
inspect.selTerms.GO(rs.GO.mrn_DN, selTerms_mrn.DN)
# define order of visible terms
selTerms_freq_mrn.DN <- rs.GO.mrn_DN %>%
#filter(ID %in% topP_mrn_UP) %>%
filter(Description %in% selTerms_mrn.DN) %>%
#select(Description, Cluster) %>%
#arrange(Description) %>%
count('Description') %>%
arrange(desc(freq))
rs.GO.mrn_DN %>%
#filter(ID %in% topP_mrn_DN) %>%
filter(Description %in% selTerms_mrn.DN) %>%
select(Description, Cluster) %>%
left_join(selTerms_freq_mrn.DN, by = "Description") %>%
mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
arrange(desc(freq))
# define levels for plot
# format final data
rs.GO.mrn_DN.fin <- rs.GO.mrn_DN %>%
filter(Description %in% selTerms_mrn.DN) %>%
#filter(ID %in% topP_mrn_DN) %>%
mutate(DESC = stringr::str_wrap(Description, 16))
# shorten terms
rs.GO.mrn_DN.fin$DESC[17] <- 'TF-activity,\n RNA poly.II spc.'
## reformat and define levels
rs.GO.mrn_DN.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.mrn_DN.fin$DESC, perl = TRUE)
unique(rs.GO.mrn_DN.fin$DESC)
LEVELS.mrn.DN <- c(
"Azurophil\ngranule",
"Specific granule",
"Tertiary granule",
"TF-activity,\n RNA poly.II spc.",
"Ribosome" ,
"Mitochondrial\nmatrix",
"Translation",
"Cell surface"
)
rs.GO.mrn_DN.fin$DESC <- factor(rs.GO.mrn_DN.fin$DESC, levels = LEVELS.mrn.DN)
# plot
p.GO_mrn.DN <- plot.GO_final(rs.GO.mrn_DN.fin, 20, rev(LEVELS.mrn.DN), -0.6)
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_mrn.DN", ".svg"),
plot = p.GO_mrn.DN,
device = svg,
width = 6.5,
height = 6)
# copy & add enrichFactor
rs.GO.pro_UP <- res.enrichment.dex$rs.GO.dex_pro.up
rs.GO.pro_UP <- add.enrichFactors(rs.GO.pro_UP)
## inspect top terms
# find
top.GO_pro_UP <- find_topPval.GO(rs.GO.pro_UP, 2)
# plot
inspect.top.GO(rs.GO.pro_UP, top.GO_pro_UP)
# check & inspect frequency of terms
terms.pro.UP <- rs.GO.pro_UP %>%
count('Description') %>%
arrange(desc(freq))
# select terms to show:
selTerms_pro.UP <- c(
'ficolin-1-rich granule',
#'secretory granule',
'tertiary granule',
'specific granule',
'azurophil granule',
#'cell migration',
'cell-cell adhesion',
'cytosolic ribosome',
'respiratory burst'
)
# inspect selTerms
inspect.selTerms.GO(rs.GO.pro_UP, selTerms_pro.UP)
# define order of visible terms and line breaks
selTerms_freq_pro.UP <- rs.GO.pro_UP %>%
#filter(ID %in% topP_pro_UP) %>%
filter(Description %in% selTerms_pro.UP) %>%
#select(Description, Cluster) %>%
#arrange(Description) %>%
count('Description') %>%
arrange(desc(freq))
rs.GO.pro_UP %>%
#filter(ID %in% topP_pro_UP) %>%
filter(Description %in% selTerms_pro.UP) %>%
select(Description, Cluster) %>%
left_join(selTerms_freq_pro.UP, by = "Description") %>%
mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
arrange(desc(freq))
# define levels for plot
rs.GO.pro_UP.fin <- rs.GO.pro_UP %>%
filter(Description %in% selTerms_pro.UP) %>%
mutate(DESC = stringr::str_wrap(Description, 16))
# rename to shorten (optional)
## reformat and define levels
rs.GO.pro_UP.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.pro_UP.fin$DESC, perl = TRUE)
unique(rs.GO.pro_UP.fin$DESC)
LEVELS.pro.UP <- rev(c(
"Azurophil\ngranule",
'Specific granule',
'Tertiary granule',
"Ficolin-1-rich\ngranule",
"Cytosolic\nribosome",
"Cell-cell\nadhesion",
#"Cell migration",
"Mitochondrial\nmatrix",
"Respiratory\nburst"
))
rs.GO.pro_UP.fin$DESC <- factor(rs.GO.pro_UP.fin$DESC, levels = LEVELS.pro.UP)
# plot
p.GO_pro.UP <- plot.GO_final(rs.GO.pro_UP.fin, 20, LEVELS.pro.UP, -0.6)
p.GO_pro.UP
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_pro.UP", ".svg"),
plot = p.GO_pro.UP,
device = svg,
width = 6.5,
height = 6)
# copy & add enrichFactor
rs.GO.pro_DN <- res.enrichment.dex$rs.GO.dex_pro.dn
rs.GO.pro_DN <- add.enrichFactors(rs.GO.pro_DN)
## inspect top terms
# find
top.GO_pro_DN <- find_topPval.GO(rs.GO.pro_DN, 2)
# plot
inspect.top.GO(rs.GO.pro_DN, top.GO_pro_DN)
# check & inspect frequency of terms
terms.pro.DN <- rs.GO.pro_DN %>%
count('Description') %>%
arrange(desc(freq))
# select terms to show:
selTerms_pro.DN <- c(
'RNA polymerase complex',
'DNA replication',
'mitochondrial matrix',
'sulfur compound binding',
'glycosaminoglycan binding'
)
# inspect second plot
inspect.selTerms.GO(rs.GO.pro_DN, selTerms_pro.DN)
# define order of visible terms
selTerms_freq_pro.DN <- rs.GO.pro_DN %>%
#filter(ID %in% topP_pro_UP) %>%
filter(Description %in% selTerms_pro.DN) %>%
#select(Description, Cluster) %>%
#arrange(Description) %>%
count('Description') %>%
arrange(desc(freq))
rs.GO.pro_DN %>%
#filter(ID %in% topP_pro_DN) %>%
filter(Description %in% selTerms_pro.DN) %>%
select(Description, Cluster) %>%
left_join(selTerms_freq_pro.DN, by = "Description") %>%
mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
arrange(desc(freq))
# define levels for plot
# format final data
rs.GO.pro_DN.fin <- rs.GO.pro_DN %>%
filter(Description %in% selTerms_pro.DN) %>%
#filter(ID %in% topP_pro_DN) %>%
mutate(DESC = stringr::str_wrap(Description, 16))
# shorten terms
## define levels x axis
rs.GO.pro_DN.fin$Cluster <- factor(rs.GO.pro_DN.fin$Cluster, levels = c('MB->PM', 'PM->MC', 'MC->MM', 'MM->B', '-', 'B->PMN'))
## reformat and define levels
rs.GO.pro_DN.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.pro_DN.fin$DESC, perl = TRUE)
unique(rs.GO.pro_DN.fin$DESC)
LEVELS.pro.DN <- c(
"RNA polymerase\ncomplex",
"Sulfur compound\nbinding",
"Glycosaminoglycan\nbinding",
"Mitochondrial\nmatrix",
"DNA replication"
)
rs.GO.pro_DN.fin$DESC <- factor(rs.GO.pro_DN.fin$DESC, levels = LEVELS.pro.DN)
# plot
p.GO_pro.DN <- plot.GO_final(rs.GO.pro_DN.fin, 20, rev(LEVELS.pro.DN), -0.6)
p.GO_pro.DN
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_pro.DN", ".svg"),
plot = p.GO_pro.DN,
device = svg,
width = 6.5,
height = 6)
# data
dex.all$dex.mic$dex_stage_mic_vsn
# for export
univ.mic <- unique(dex.all$dex.mic$dex_stage_mic_vsn$fid)
# write_clip(univ.mic)
# MBtoMC
mi.MBtoMC.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MBtoMC', 'down', 1)
# write_clip(mi.MBtoMC.dn)
mi.MBtoMC.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MBtoMC', 'up', 1)
# write_clip(mi.MBtoMC.up)
# MCtoMM
mi.MCtoMM.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MCtoMM', 'down', 1)
# write_clip(mi.MCtoMM.dn)
mi.MCtoMM.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MCtoMM', 'up', 1)
# write_clip(mi.MCtoMM.up)
# MMtoB: all 0! -> discarded
# BtoS
mi.BtoS.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'BtoS', 'down', 1)
# write_clip(mi.BtoS.dn)
mi.BtoS.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'BtoS', 'up', 1)
# write_clip(mi.BtoS.up)
# StoPMN
mi.StoPMN.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'StoPMN', 'down', 1)
# write_clip(mi.StoPMN.dn) # only 2 -> no results!
mi.StoPMN.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'StoPMN', 'up', 1)
# write_clip(mi.StoPMN.up)
Using miRTargetLink 2.0
https://ccb-compute.cs.uni-saarland.de/mirtargetlink2/unidirectional_search/
miEAA 2.0 with only strong validated targets in homo sapiens
# import downloaded files from miEAA 2.0 over representation analysis
miRT_MBtoMC.dn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "MBtoMC_dn", ".csv"))
miRT_MBtoMC.up.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "MBtoMC_up", ".csv"))
miRT_MCtoMM.dn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "MCtoMM_dn", ".csv"))
miRT_MCtoMM.up.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "MCtoMM_up", ".csv"))
miRT_BtoS.dn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "BtoS_dn", ".csv"))
miRT_BtoS.up.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "BtoS_up", ".csv"))
miRT_StoPMN.up.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miRTarget/in/dex/", "StoPMN_up", ".csv"))
# empty: MMtoB both, StoPMN_dn
## format results
prep_miRT <- function(miRT.raw, ClusterName){
# colnames 1
names(miRT.raw)[4] <- 'pvalue'
names(miRT.raw)[5] <- 'p.adjust'
names(miRT.raw)[6] <- 'qvalue'
# calc
miRT.raw$dif <- miRT.raw$Expected - miRT.raw$Observed
miRT.raw$pminus1 <- 1- miRT.raw$p.adjust
miRT.raw$Count <- str_count(miRT.raw$miRNAs.precursors, ';')
miRT.raw$Count <- miRT.raw$Count +1
# colnames 2
miRT.raw$FoldEnrichment = miRT.raw$dif * -1
miRT.raw$features <- miRT.raw$miRNAs.precursors
miRT.raw$Description <- miRT.raw$Subcategory
miRT.raw$Cluster <- ClusterName
# filter
miRT.pre <- miRT.raw %>% filter(p.adjust < 0.05) # padjust filtered!
return(miRT.pre)
}
miRT_MBtoMC.dn_pre <- prep_miRT(miRT_MBtoMC.dn.raw, "MBtoMC.dn")
miRT_MBtoMC.up_pre <- prep_miRT(miRT_MBtoMC.up.raw, "MBtoMC.up")
miRT_MCtoMM.dn_pre <- prep_miRT(miRT_MCtoMM.dn.raw, "MCtoMM.dn")
miRT_MCtoMM.up_pre <- prep_miRT(miRT_MCtoMM.up.raw, "MCtoMM.up")
miRT_BtoS.dn_pre <- prep_miRT(miRT_BtoS.dn.raw, "BtoS.dn")
miRT_BtoS.up_pre <- prep_miRT(miRT_BtoS.up.raw, "BtoS.up")
miRT_StoPMN.up_pre <- prep_miRT(miRT_StoPMN.up.raw, "StoPMN.up")
miRT_progression_pre <- list(
miRT_MBtoMC.dn = miRT_MBtoMC.dn_pre,
miRT_MBtoMC.up = miRT_MBtoMC.up_pre,
miRT_MCtoMM.dn = miRT_MCtoMM.dn_pre,
miRT_MCtoMM.up = miRT_MCtoMM.up_pre,
miRT_BtoS.dn = miRT_BtoS.dn_pre,
miRT_BtoS.up = miRT_BtoS.up_pre,
miRT_StoPMN.up = miRT_StoPMN.up_pre)
## check
# miRT_MCtoMM.up_pre %>% group_by(Category) %>% slice_min(p.adjust, n=5) %>% view()
# miRT_MCtoMM.up_pre %>% filter(Category == 'Annotation (Gene Ontology)') %>% slice_min(p.adjust, n=5)
# prep for integration with mrn & pro
prep_mic.enrich4mrnNpro <- function(res.miEAA_pre){
res <- res.miEAA_pre %>% select(Category, Cluster, Description, FoldEnrichment, Count, features)
return(res)
}
miRT_progression <- map(miRT_progression_pre, prep_mic.enrich4mrnNpro)
miRT_progression$miRT_MBtoMC.dn
# try different selections
selTerms_mrn.pro_all <- c(
selTerms_mrn.UP,
selTerms_mrn.DN,
selTerms_pro.UP,
selTerms_pro.DN)
select_top8_mic <- function(res.miEAA){
res <- res.miEAA %>%
# filter(str_detect(Description, "pathway")) %>%
# filter(Category == 'GO Biological process (miRPathDB)') %>%
# filter(! Category %in% c('Diseases (MNDR)', 'Localization (RNALocate)')) %>%
# filter(Description %in% selTerms_mrn.pro_all) %>%
filter(str_detect(Description, "signal|cascade|pathway|activation|complex|checkpoint|regulation")) %>%
slice_max(FoldEnrichment, n = 8)
return(res)
}
miRT_progression_top8 <- map(miRT_progression, select_top8_mic)
map(miRT_progression_top8, nrow)
miRT_progression_top8_all <- bind_rows(miRT_progression_top8, .id = "step")
miRT_progression_top8_dn <- miRT_progression_top8_all %>% filter(str_detect(step, '.dn'))
miRT_progression_top8_up <- miRT_progression_top8_all %>% filter(str_detect(step, '.up'))
miRT_progression_top8_dn %>%
ggplot(aes(x= Cluster, y = Description, color = Cluster, label = Count)) +
geom_point(aes(size = FoldEnrichment), stroke = 2)
miRT_progression_top8_up %>%
ggplot(aes(x= Cluster, y = Description, color = Cluster, label = Count)) +
geom_point(aes(size = FoldEnrichment), stroke = 2)
# try with all of them?
miRT_progression_all <- bind_rows(miRT_progression, .id = "step")
# try with terms from mrn & pro
select_SelTermsMRNnPRO_mic <- function(res.miEAA){
res <- res.miEAA %>%
# filter(str_detect(Description, "pathway")) %>%
# filter(Category == 'GO Biological process (miRPathDB)') %>%
# filter(! Category %in% c('Diseases (MNDR)', 'Localization (RNALocate)')) %>%
filter(Description %in% selTerms_mrn.pro_all) %>%
#filter(str_detect(Description, "signal|cascade|pathway|activation|complex|checkpoint|regulation")) %>%
#slice_max(FoldEnrichment, n = 8)
return(res)
}
miRT_progression_stRNAnPRO <- map(miRT_progression, select_SelTermsMRNnPRO_mic)
map(miRT_progression_stRNAnPRO, nrow) # only MB to MC
miRT_progression_selTerms <- map(miRT_progression, select_SelTermsMRNnPRO_mic)
map(miRT_progression_selTerms, nrow) # only MB to MC, one BtoS
miRT_progression_selTerms$miRT_MBtoMC.dn %>%
# filter(Description %in% miRT_terms_final) %>%
ggplot(aes(x= Cluster, y = Description, color = Cluster, label = Count)) +
geom_point(aes(size = FoldEnrichment), stroke = 2)
# combine selTerms with signaling
select_signaling_mic <- function(res.miEAA){
res <- res.miEAA %>%
# filter(str_detect(Description, "pathway")) %>%
# filter(Category == 'GO Biological process (miRPathDB)') %>%
# filter(! Category %in% c('Diseases (MNDR)', 'Localization (RNALocate)')) %>%
# filter(Description %in% selTerms_mrn.pro_all) %>%
filter(str_detect(Description, "signal|cascade|pathway|activation|complex|checkpoint|regulation")) %>%
#slice_max(FoldEnrichment, n = 8)
return(res)
}
miRT_progression_sig <- map(miRT_progression, select_signaling_mic)
map(miRT_progression_sig, nrow)
# combine
miRT_progression_sig <- bind_rows(miRT_progression_sig, .id = "step")
miRT_progression_selTerms <- bind_rows(miRT_progression_selTerms, .id = "step")
miRT_progression_comp <- rbind(miRT_progression_sig, miRT_progression_selTerms)
colnames(miRT_progression_comp)
miRT_terms_final <- miRT_progression_comp %>%
group_by(step) %>%
count('Description') %>%
dplyr::arrange(desc(freq)) %>%
filter(freq == 3)
# head(20) %>%
# pull(Description)
miRT_progression_comp %>%
#filter(Description %in% miRT_terms_final) %>%
ggplot(aes(x= Cluster, y = Description, color = Cluster, label = Count)) +
geom_point(aes(size = FoldEnrichment), stroke = 2)
Nothing actually useful from this kind of analysis
Via: https://ccb-compute2.cs.uni-saarland.de/mieaa/
Setup: - Over-Representation Analysis (ORA) - Homo sapiens - Test set: dex list - no reference set - all categories - BHB with 0.05
-> only useful for localisations
# import
ora.MBtoMC.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MBtoMC.up.csv'))
ora.MBtoMC.dn <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MBtoMC.dn.csv'))
ora.MCtoMM.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MCtoMM.up.csv'))
ora.MCtoMM.dn <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MCtoMM.dn.csv'))
ora.BtoS.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.BtoS.up.csv'))
ora.BtoS.dn <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.BtoS.dn.csv'))
ora.StoPMN.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.StoPMN.up.csv'))
# format
table(ora.MBtoMC.up$Category) # 8 loc, , 3 pw kegg
table(ora.MBtoMC.dn$Category) # loc & pw kegg
table(ora.MCtoMM.up$Category) # loc
table(ora.MCtoMM.dn$Category) # loc
table(ora.BtoS.up$Category) # loc
table(ora.BtoS.dn$Category) # disease
table(ora.StoPMN.up$Category) # disease
# try loc and pw
ora.MBtoMC.up.loc <- ora.MBtoMC.up %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.MBtoMC.up.loc$stage <- 'MB->MC'
ora.MBtoMC.dn.loc <- ora.MBtoMC.dn %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.MBtoMC.dn.loc$stage <- 'MB->MC'
ora.MCtoMM.up.loc <- ora.MCtoMM.up %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.MCtoMM.up.loc$stage <- 'MC->MM'
ora.MCtoMM.dn.loc <- ora.MCtoMM.dn %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.MCtoMM.dn.loc$stage <- 'MC->MM'
ora.BtoS.up.loc <- ora.BtoS.up %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.BtoS.up.loc$stage <- 'B->S'
ora.BtoS.dn.loc <- ora.BtoS.dn %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.BtoS.dn.loc$stage <- 'B->S'
ora.StoPMN.up.loc <- ora.StoPMN.up %>%
filter(Category %in% c("Localization (RNALocate)"))
ora.StoPMN.up.loc$stage <- 'S->PMN'
### up
mic.locs.up <- rbind(ora.MBtoMC.up.loc, ora.MCtoMM.up.loc, ora.BtoS.up.loc, ora.StoPMN.up.loc)
mic.locs.up %<>% add_row(stage = 'MM->B', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.up %<>% add_row(stage = 'S->PMN', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.up$stage <- factor(mic.locs.up$stage, levels = c('MB->MC', 'MC->MM','MM->B', 'B->S', 'S->PMN'))
unique(mic.locs.up$Subcategory)
levels.mic.locs.up <- rev(c("Nucleus", "Mitochondrion", "Cytoplasm", "Microvesicle", "Nucleolus", "Extracellular vesicle"))
mic.locs.up$count <- str_count(mic.locs.up$`miRNAs/precursors`, ';')
mic.locs.up$count <- mic.locs.up$count +1
p.mic.locs.up_pre <- mic.locs.up %>%
mutate(size = 1- `P-adjusted` ) %>%
filter(! Subcategory %in% c("microvesicle", "exosome", 'Exosome', "Circulating")) %>%
ggplot(aes(x = stage, y = factor(Subcategory, levels = levels.mic.locs.up), color = stage, label = count)) +
geom_point(aes(size = size)) +
scale_color_manual(values = colors_progression.arrow.mic, guide = 'none') +
geom_text(color = 'black', hjust = -1, size = 5)
p.mic.locs.up_pre
p.mic.locs.up <- p.mic.locs.up_pre +
#theme(legend.position="none")+
#ylab("Terms") +
#scale_size(range = c(4, 10)) +
theme(
) +
labs(size="1-FDR") +
theme_light(base_size=20) +
theme(legend.position = "none")
# theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
# axis.title.x = element_blank(),
# axis.title.y=element_blank(),
# legend.position = c(.95, .95),
# legend.justification = c("right", "top"),
# legend.box.just = "right",
# legend.margin = margin(6, 6, 6, 6))
p.mic.locs.up
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_2/", "p.mic.locs.up", ".svg"),
plot = p.mic.locs.up,
device = svg,
width = 6.5,
height = 6)
## down
mic.locs.dn <- rbind(ora.MBtoMC.dn.loc, ora.MCtoMM.dn.loc, ora.BtoS.dn.loc)
mic.locs.dn %<>% add_row(stage = 'MM->B', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.dn %<>% add_row(stage = 'B->S', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.dn %<>% add_row(stage = 'S->PMN', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.dn$stage <- factor(mic.locs.dn$stage, levels = c('MB->MC', 'MC->MM', 'MM->B', 'B->S', 'S->PMN'))
unique(mic.locs.dn$Subcategory)
levels.mic.locs.dn <- rev(c("Nucleus", "Mitochondrion", "Cytoplasm", "Microvesicle", "Nucleolus", "Extracellular vesicle"))
# add count
mic.locs.dn$count <- str_count(mic.locs.dn$`miRNAs/precursors`, ';')
mic.locs.dn$count <- mic.locs.dn$count +1
p.mic.locs.dn_pre <- mic.locs.dn %>%
mutate(size = round(1- `P-adjusted`, 2)) %>%
filter(! Subcategory %in% c("microvesicle", "exosome", 'Exosome', "Circulating")) %>%
ggplot(aes(x = stage, y = Subcategory, color = stage, label = count)) +
geom_point(aes(size = size)) +
scale_color_manual(values = colors_progression.arrow.mic, guide = 'none') +
geom_text(color = 'black', hjust = -1, size = 5)
p.mic.locs.dn <- p.mic.locs.dn_pre +
labs(size="1-FDR") +
theme_light(base_size=20) +
theme(legend.position = "none")
# theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
# axis.title.x = element_blank(),
# axis.title.y=element_blank(),
# legend.position="none")
p.mic.locs.dn
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_2/", "p.mic.locs.dn", ".scg"),
plot = p.mic.locs.dn,
device = svg,
width = 6.5,
height = 6)
Combined analysis to compare enrichment of trajectories on mrn, pro and mic levels
# centrality selected features
load(file = paste0(dir_moa, "/data/final/preps/timeomics/", "centralities.RData"))
# filter to 90% quantile (50% doesnt result in enough features!)
centrality.mrn <- centrality.mrn %>% select(ENSG, traj, centrality) %>%
group_by(traj) %>%
arrange(traj, desc(centrality)) %>%
filter(centrality > quantile(centrality, .10))
centrality.pro <- centrality.pro %>% select(UNIPROT = ENSG, traj, centrality) %>%
group_by(traj) %>%
arrange(traj, desc(centrality)) %>%
filter(centrality > quantile(centrality, .10))
centrality.pro <- add.fan2UNIPROT(centrality.pro)
# create lists: combined and unique mrn/pro
## 90
l.trj_mrn.pro_ENSG <- list(
mrn.Up = centrality.mrn %>% filter(traj == "up") %>% pull(ENSG),
mrn.Dn = centrality.mrn %>% filter(traj == "dn") %>% pull(ENSG),
mrn.UpDn = centrality.mrn %>% filter(traj == "upDn") %>% pull(ENSG),
mrn.DnUp = centrality.mrn %>% filter(traj == "dnUp") %>% pull(ENSG),
mrn.UpDnUp = centrality.mrn %>% filter(traj == "upDnup") %>% pull(ENSG),
mrn.DnUpDn = centrality.mrn %>% filter(traj == "dnUpDn") %>% pull(ENSG),
mrn.UpDnUpDn = centrality.mrn %>% filter(traj == "upDnUpDn") %>% pull(ENSG),
mrn.DnUpDnUp = centrality.mrn %>% filter(traj == "dnUpDnUp") %>% pull(ENSG),
pro.Up = centrality.pro %>% filter(traj == "up") %>% pull(ENSG),
pro.Dn = centrality.pro %>% filter(traj == "dn") %>% pull(ENSG),
pro.UpDn = centrality.pro %>% filter(traj == "upDn") %>% pull(ENSG),
pro.DnUp = centrality.pro %>% filter(traj == "dnUp") %>% pull(ENSG),
pro.UpDnUp = centrality.pro %>% filter(traj == "upDnup") %>% pull(ENSG),
pro.DnUpDn = centrality.pro %>% filter(traj == "dnUpDn") %>% pull(ENSG)
)
l.trj_mrn_ENSG <- list(
mrn.Up = centrality.mrn %>% filter(traj == "up") %>% pull(ENSG),
mrn.Dn = centrality.mrn %>% filter(traj == "dn") %>% pull(ENSG),
mrn.UpDn = centrality.mrn %>% filter(traj == "upDn") %>% pull(ENSG),
mrn.DnUp = centrality.mrn %>% filter(traj == "dnUp") %>% pull(ENSG),
mrn.UpDnUp = centrality.mrn %>% filter(traj == "upDnup") %>% pull(ENSG),
mrn.DnUpDn = centrality.mrn %>% filter(traj == "dnUpDn") %>% pull(ENSG),
mrn.UpDnUpDn = centrality.mrn %>% filter(traj == "upDnUpDn") %>% pull(ENSG),
mrn.DnUpDnUp = centrality.mrn %>% filter(traj == "dnUpDnUp") %>% pull(ENSG)
)
l.trj_pro_ENSG <- list(
pro.Up = centrality.pro %>% filter(traj == "up") %>% pull(ENSG),
pro.Dn = centrality.pro %>% filter(traj == "dn") %>% pull(ENSG),
pro.UpDn = centrality.pro %>% filter(traj == "upDn") %>% pull(ENSG),
pro.DnUp = centrality.pro %>% filter(traj == "dnUp") %>% pull(ENSG),
pro.UpDnUp = centrality.pro %>% filter(traj == "upDnup") %>% pull(ENSG),
pro.DnUpDn = centrality.pro %>% filter(traj == "dnUpDn") %>% pull(ENSG)
)
# convert to ENTREZ
turn.ENSGintoENTREZ <- function(ENSG.vector){
ENTREZ.vector <- fan$ENTREZ[match(ENSG.vector, fan$ENSG)]
ENTREZ.vector <- na.omit(ENTREZ.vector)
ENTREZ.vector <- as.vector(ENTREZ.vector)
return(ENTREZ.vector)
}
l.trj_mrn.pro_ENTREZ <- purrr::map(l.trj_mrn.pro_ENSG, turn.ENSGintoENTREZ)
l.trj_mrn_ENTREZ <- purrr::map(l.trj_mrn_ENSG, turn.ENSGintoENTREZ)
l.trj_pro_ENTREZ <- purrr::map(l.trj_pro_ENSG, turn.ENSGintoENTREZ)
# # enrich
# rs.PW.trj_mrn.pro <- apply_enrichment.pathway(l.trj_mrn.pro_ENTREZ, universe_combo.ENTREZ) #
# rs.PW.trj_mrn <- apply_enrichment.pathway(l.trj_mrn_ENTREZ, universe_mrn.ENTREZ) #
# rs.PW.trj_pro <- apply_enrichment.pathway(l.trj_pro_ENTREZ, universe_pro.ENTREZ) #
#
# rs.KG.trj_mrn.pro <- apply_enrichment.kegg(l.trj_mrn.pro_ENTREZ, universe_combo.ENTREZ) #
# rs.KG.trj_mrn <- apply_enrichment.kegg(l.trj_mrn_ENTREZ, universe_mrn.ENTREZ) #
# rs.KG.trj_pro <- apply_enrichment.kegg(l.trj_pro_ENTREZ, universe_pro.ENTREZ) #
#
# rs.GO.trj_mrn.pro <- apply_enrichment.GO(l.trj_mrn.pro_ENSG, universe_combo.ENSG) #
# rs.GO.trj_mrn <- apply_enrichment.GO(l.trj_mrn_ENSG, universe_mrn.ENSG) #
# rs.GO.trj_pro <- apply_enrichment.GO(l.trj_pro_ENSG, universe_pro.ENSG) #
#
# # add factors (do for all at some point....)
# rs.PW.trj_mrn.pro <- add.enrichFactors(rs.PW.trj_mrn.pro)
# rs.PW.trj_mrn <- add.enrichFactors(rs.PW.trj_mrn)
# rs.PW.trj_pro <- add.enrichFactors(rs.PW.trj_pro)
#
# rs.KG.trj_mrn.pro <- add.enrichFactors(rs.KG.trj_mrn.pro)
# rs.KG.trj_mrn <- add.enrichFactors(rs.KG.trj_mrn)
# rs.KG.trj_pro <- add.enrichFactors(rs.KG.trj_pro)
#
# rs.GO.trj_mrn.pro <- add.enrichFactors(rs.GO.trj_mrn.pro)
# rs.GO.trj_mrn <- add.enrichFactors(rs.GO.trj_mrn)
# rs.GO.trj_pro <- add.enrichFactors(rs.GO.trj_pro)
#
#
# res.enrichment.trj <- list(
# # PW
# rs.PW.trj_mrn.pro = rs.PW.trj_mrn.pro,
# rs.PW.trj_mrn = rs.PW.trj_mrn,
# rs.PW.trj_pro = rs.PW.trj_pro,
# # kegg
# rs.KG.trj_mrn.pro = rs.KG.trj_mrn.pro,
# rs.KG.trj_mrn = rs.KG.trj_mrn,
# rs.KG.trj_pro = rs.KG.trj_pro,
# # GO
# rs.GO.trj_mrn.pro = rs.GO.trj_mrn.pro,
# rs.GO.trj_mrn = rs.GO.trj_mrn,
# rs.GO.trj_pro = rs.GO.trj_pro
# )
#
# save(res.enrichment.trj,
# file = paste0(dir_moa, "/data/final/preps/enrichment/", "res.enrichment.trj.RData"))
load(file = paste0(dir_moa, "/data/final/preps/enrichment/", "res.enrichment.trj.RData"))
################ CLEAN UP BELOW!!
# final format
prep_enrich.mrnNpro.names <- function(rs.enrich.mrnNpro){
res <- rs.enrich.mrnNpro %>% select(Cluster, Description, FoldEnrichment, Count, features = geneID)
return(res)
}
rs.PW.trj_mrn.pro_fin <- prep_enrich.mrnNpro.names(res.enrichment.trj$rs.PW.trj_mrn.pro)
rs.KG.trj_mrn.pro_fin <- prep_enrich.mrnNpro.names(res.enrichment.trj$rs.KG.trj_mrn.pro)
rs.GO.trj_mrn.pro_fin <- prep_enrich.mrnNpro.names(res.enrichment.trj$rs.GO.trj_mrn.pro)
## check
# combos
table(rs.PW.trj_mrn.pro_fin$Cluster) # only in mRNA: only up and down. Pro also bimodal. intersting...
table(rs.KG.trj_mrn.pro_fin$Cluster) # same (1 mrn.DnUpDnUp)
table(rs.GO.trj_mrn.pro_fin$Cluster) # same!
#
# # # mrn
# # table(rs.PW.trj_mrn$Cluster) # only one UpDnUpDn
# # table(rs.KG.trj_mrn$Cluster) # same
# # table(rs.GO.trj_mrn$Cluster) # same
# # # somehow the multimodal features do not seem to have a unifying biological theme?
# #
# # # pro
# # table(rs.PW.trj_pro$Cluster) # only one UpDnUpDn
# # table(rs.KG.trj_pro$Cluster) # same
# # table(rs.GO.trj_pro$Cluster) # same
#
#
# # which are the unique mrn terms?
# rs.PW.trj_mrn %>% filter(Cluster == 'mrn.UpDnUpDn') # Epigenetic regulation of gene expression
# rs.KG.trj_mrn %>% filter(Cluster == 'mrn.DnUpDnUp') # Terpenoid backbone biosynthesis
# rs.GO.trj_mrn.pro %>% filter(Cluster == 'mrn.UpDnUpDn') # 8: misfolded protein binding, RNA helicase activity
#
#
# # check the combos first
#
#
#
# rs.PW.trj90_mrn.pro
#
#
# names(rs.PW.trj90_mrn.pro)
# rs.PW.trj90_mrn.pro$FoldEnrichment # from 1 - 20. = size of dot -> make all same in mic? Or use pminus1 (= all close to 1)
# # Description = Subcategory
# # Cluster: up, down
# # FoldEnrichment =
# # Count = number of features
#
#
# names(rs.miEAA.trj90_mic.upDn)
# # Subcategory = Description !
# # Cluster: add. = up, dn
# # count: make number of precoursors
# # FoldEnrichment = use the diff * -1
#
Using miRTargetLink 2.0
https://ccb-compute.cs.uni-saarland.de/mirtargetlink2/unidirectional_search/
miEAA 2.0 with only strong validated targets in homo sapiens
centrality.mic <- centrality.mic %>% select(ENSG, traj, centrality) %>%
group_by(traj) %>%
arrange(traj, desc(centrality)) %>%
filter(centrality > quantile(centrality, .10))
table(centrality.mic$traj)
# copy to clip for website
# centrality.mic %>% filter(traj == 'dn') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'up') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'dnUp') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'upDn') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'upDn late') %>% pull(ENSG) %>% write_clip()
# import downloaded files from miEAA 2.0 over representation analysis
rs.miEAA.trj_mic.dn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "dn", ".csv"))
rs.miEAA.trj_mic.up.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "up", ".csv"))
rs.miEAA.trj_mic.dnUp.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "dnUp", ".csv"))
rs.miEAA.trj_mic.upDn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "upDn", ".csv"))
rs.miEAA.trj_mic.upDnLate.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "upDnLate", ".csv"))
## format results
prep_miRT <- function(miRT.raw, ClusterName){
# colnames 1
names(miRT.raw)[4] <- 'pvalue'
names(miRT.raw)[5] <- 'p.adjust'
names(miRT.raw)[6] <- 'qvalue'
# calc
miRT.raw$dif <- miRT.raw$Expected - miRT.raw$Observed
miRT.raw$pminus1 <- 1- miRT.raw$p.adjust
miRT.raw$Count <- str_count(miRT.raw$miRNAs.precursors, ';')
miRT.raw$Count <- miRT.raw$Count +1
# colnames 2
miRT.raw$FoldEnrichment = miRT.raw$dif * -1
miRT.raw$features <- miRT.raw$miRNAs.precursors
miRT.raw$Description <- miRT.raw$Subcategory
miRT.raw$Cluster <- ClusterName
return(miRT.raw)
}
rs.miEAA.trj_mic.dn_pre <- prep_miRT(rs.miEAA.trj_mic.dn.raw, 'mic.Dn')
rs.miEAA.trj_mic.up_pre <- prep_miRT(rs.miEAA.trj_mic.up.raw, 'mic.Up')
rs.miEAA.trj_mic.dnUp_pre <- prep_miRT(rs.miEAA.trj_mic.dnUp.raw, 'mic.DnUp')
rs.miEAA.trj_mic.upDn_pre <- prep_miRT(rs.miEAA.trj_mic.upDn.raw, 'mic.UpDn')
rs.miEAA.trj_mic.upDnLate_pre <- prep_miRT(rs.miEAA.trj_mic.upDnLate.raw, 'mic.UpDnL')
# prep for integration with mrn & pro
prep_mic.enrich4mrnNpro <- function(res.miEAA_pre){
res <- res.miEAA_pre %>% select(Cluster, Description, FoldEnrichment, Count, features)
return(res)
}
rs.miEAA.trj_mic.dn <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.dn_pre)
rs.miEAA.trj_mic.up <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.up_pre)
rs.miEAA.trj_mic.dnUp <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.dnUp_pre)
rs.miEAA.trj_mic.upDn <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.upDn_pre)
rs.miEAA.trj_mic.upDnLate <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.upDnLate_pre)
# prep final mic enricment to combine with mrn & pro results
rs.miEAA.trj_mic <- rbind(rs.miEAA.trj_mic.dn, rs.miEAA.trj_mic.up, rs.miEAA.trj_mic.dnUp, rs.miEAA.trj_mic.upDn, rs.miEAA.trj_mic.upDnLate)
# intersectios of the single enrichment sets
intersect(rs.PW.trj_mrn.pro_fin$Description, rs.miEAA.trj_mic$Description) # 51
intersect(rs.KG.trj_mrn.pro_fin$Description, rs.miEAA.trj_mic$Description) # 148
intersect(rs.GO.trj_mrn.pro_fin$Description, rs.miEAA.trj_mic$Description) # 179
## combine all
# all mrn.pro
rs.ALL.trj_mrn.pro <- rbind(rs.PW.trj_mrn.pro_fin, rs.KG.trj_mrn.pro_fin, rs.GO.trj_mrn.pro_fin)
# with mic results
rs.ALL.trj <- rbind(rs.ALL.trj_mrn.pro, rs.miEAA.trj_mic)
# check overlaps
data <- rs.ALL.trj %>% select(Description, Cluster)
data <- table(data)
counts <- apply(data, 1, function(x) sum(x))
cluster <- apply(data, 1, function(x) names(which(x!=0)))
cluster <- lapply(cluster, function(x) paste(x, collapse = ";")) %>% unlist()
tab <- data.frame(pathway = names(counts), n = counts, cluster = cluster)
# select terms
selTerms.ALL.disease <- c(
'Acute myeloid leukemia',
'Inflammatory bowel disease'
)
selTerms.ALL.important <- c(
'SRP-dependent cotranslational protein targeting to membrane',
# 'Apoptosis',
'Cell Cycle',
'Ribosome',
'Translation',
'leukocyte cell-cell adhesion',
# 'ficolin-1-rich granule lumen',
'mitochondrial membrane',
'Neutrophil extracellular trap formation',
'Nonsense-Mediated Decay (NMD)'
# 'Pentose phosphate pathway',
)
selTerms.ALL.interesting <- c(
'HIF-1 signaling pathway',
'L13a-mediated translational silencing of Ceruloplasmin expression',
#'FoxO signaling pathway',
'mTOR signaling pathway',
#'Signaling by ROBO receptors',
'Regulation of expression of SLITs and ROBOs',
#'ErbB signaling pathway',
'MAPK cascade',
#'negative regulation of transcription by RNA polymerase II',
'Ras protein signal transduction',
'VEGF signaling pathway'
)
inspect.selTerms.GO(rs.ALL.trj, c(selTerms.ALL.disease, selTerms.ALL.important)) #
inspect.selTerms.GO(rs.ALL.trj, selTerms.ALL.interesting) #
inspect.selTerms.GO(rs.ALL.trj, c(selTerms.ALL.interesting, selTerms.ALL.important)) #
# reduce to terms of interest
rs.ALL.trj.fin <- rs.ALL.trj %>%
filter(Description %in% c(selTerms.ALL.interesting, selTerms.ALL.important))
rs.ALL.trj.fin <- rs.ALL.trj.fin[-36,] # SRP is double mentioned in pro
# prep levels to order axis elements. NEEDED???
#rs.ALL.trj90.fin$Cluster <- droplevels(rs.ALL.trj90.fin$Cluster)
# check
sort(unique(rs.ALL.trj.fin$Description))
# define levels Y axis
unique(rs.ALL.trj.fin$Description)
# shorten terms
rs.ALL.trj.fin$desc <- rs.ALL.trj.fin$Description
unique(rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "leukocyte cell-cell adhesion",
"Leukocyte cell-cell adh.", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "mitochondrial membrane",
"Mito. membrane", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "L13a-mediated translational silencing of Ceruloplasmin expression",
"L13a silencing Cerulop.", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "SRP-dependent cotranslational protein targeting to membrane",
"SRP-dep. loc.", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Regulation of expression of SLITs and ROBOs",
"SLIT and ROBO reg.", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "VEGF signaling pathway",
"VEGF pathway", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "mTOR signaling pathway" ,
"mTOR pathway" , rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Neutrophil extracellular trap formation",
"NET formation", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "HIF-1 signaling pathway",
"HIF-1 pathway", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Ras protein signal transduction",
"Ras signal", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Nonsense-Mediated Decay (NMD)",
"NMD", rs.ALL.trj.fin$desc)
#
# rs.ALL.trj90.fin$desc <- ifelse(rs.ALL.trj90.fin$desc == ,
# , rs.ALL.trj90.fin$desc)
unique(rs.ALL.trj.fin$desc)
levels.LL.trj.fin <- c(
"VEGF pathway",
"Ras signal",
"NET formation",
"mTOR pathway",
"MAPK cascade",
"Leukocyte cell-cell adh.",
"HIF-1 pathway",
"Ribosome",
"Translation",
"Mito. membrane",
"SRP-dep. loc.",
"NMD",
"L13a silencing Cerulop.",
"SLIT and ROBO reg.",
"Cell Cycle"
)
setdiff(rs.ALL.trj.fin$desc, levels.LL.trj.fin) # ok
## shorten terms
# check
#rs.ALL.trj90.fin$desc2 <- stringr::str_wrap(rs.ALL.trj90.fin$desc, 20)
#levels.LL.trj90.fin <- stringr::str_wrap(levels.LL.trj90.fin, 20)
## plot options
# shapes
rs.ALL.trj.fin$shape <- case_when(
str_detect(rs.ALL.trj.fin$Cluster, 'mrn') ~ 'mrn',
str_detect(rs.ALL.trj.fin$Cluster, 'mic') ~ 'mic',
str_detect(rs.ALL.trj.fin$Cluster, 'pro') ~ 'pro'
)
shps <- c(1, 16, 5)
rs.ALL.trj.fin$shape <- factor(rs.ALL.trj.fin$shape, levels = c('mrn', 'pro', 'mic'))
# colors
table(rs.ALL.trj.fin$Cluster)
colors_trj <- c(
'#f89540', # mrn up
"#7e03a8", # mrn down
'#f89540', # pro up
"#7e03a8", # pro down
"#2166AC", # pro upDn
'hotpink', # pro dnUp
'lightblue', # pro DnUpDn
"#7e03a8", # mic down
'#f89540', # mic up
"#2166AC") # mic upDn
names(colors_trj) <- c('mrn.Up', 'mrn.Dn', 'pro.Up', 'pro.Dn', 'pro.UpDn', 'pro.DnUp', 'pro.DnUpDn', 'mic.Dn', 'mic.Up', 'mic.UpDn')
plot.enriched.trj <- function(rs.CP, desc.levels, fondSize, labSize, colorsX){
p <- rs.CP %>%
ggplot(aes(x= Cluster, y = desc, color = Cluster, label = Count, shape = shape)) +
#geom_point(aes(size = FoldEnrichment), stroke = 2) +
geom_point(aes(size = 3), stroke = 2) +
geom_label(color = 'black', label.padding = unit(0.09, "lines"), hjust = -0.5, size = labSize) +
# scale_size(range = c(5, 15)) +
theme_light(base_size = 18) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1, size=fondSize),
axis.title.x = element_blank() ,
axis.text.y = element_text(size=fondSize),
legend.position = "none") +
scale_color_manual(values = colorsX, guide = 'none') +
scale_shape_manual(values=shps) +
#ggtitle(title) +
ylab('Terms') +
scale_y_discrete(limits=rev(desc.levels)) +
guides(shape = "none")
return(p)
}
p.ALL.trj <- plot.enriched.trj(rs.ALL.trj.fin, levels.LL.trj.fin, 20, 5, colors_trj)
p.ALL.trj
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_3/", "p.ALL.trj", ".svg"),
plot = p.ALL.trj,
device = svg,
width = 9,
height = 8)
library(ComplexUpset)
# turn into SYMBOLS
turnENSG2SYMBOL <- function(vectorENSG){
vectorSYMBOL <- fan$SYMBOL[match(vectorENSG, fan$ENSG)]
return(vectorSYMBOL)
}
l.trj_mrn.pro_SYMBOL <- purrr::map(l.trj_mrn.pro_ENSG, turnENSG2SYMBOL)
# create TRUE/FALSE table
names(l.trj_mrn.pro_SYMBOL)
trj4upset <- data.frame(SYMBOL = fan$SYMBOL)
trj4upset$mrn.Up <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.Up"]], T, F)
trj4upset$mrn.Dn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.Dn"]], T, F)
trj4upset$mrn.UpDn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.UpDn"]], T, F)
trj4upset$mrn.DnUp <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.DnUp"]], T, F)
trj4upset$mrn.UpDnUp <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.UpDnUp"]], T, F)
trj4upset$mrn.DnUpDn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.DnUpDn"]], T, F)
trj4upset$pro.Up <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.Up"]], T, F)
trj4upset$pro.Dn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.Dn"]], T, F)
trj4upset$pro.UpDn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.UpDn"]], T, F)
trj4upset$pro.DnUp <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.DnUp"]], T, F)
# plot prep
trj.groups <- colnames(trj4upset)[-1]
# plot
p.upset.traj <- upset(trj4upset, trj.groups, name='Traj',
width_ratio=0.1, min_degree=2, min_size= 18,
# mode= 'inclusive_intersection', # does not matter because these are exclusive lists anyways
themes = upset_default_themes(text=element_text(size=20)),
set_sizes=(
upset_set_size() + theme(axis.text.x=element_text(angle=35), text=element_text(size=15))),
base_annotations=list(
'Intersection size'=intersection_size(
text=list(size=6),
#width=0.4))
)))
p.upset.traj
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_3/", "p.upset.traj", ".svg"),
plot = p.upset.traj,
device = svg,
width = 10,
height = 6)
## add disease and unregulated genes
trj4upsetWdis <- trj4upset
SCN.genes <- read_csv(paste0(dir_moa, "data/anno/", "genotypes_of_SCN.csv"))
AML.genes <- read_csv(paste0(dir_moa, "data/anno/", "genes_of_AML.csv"))
total.trj <- unique(Reduce(c,l.trj_mrn.pro_SYMBOL))
total.mrn.noTRJ <- unique(dex.all$dex.mrn$dex_stage_mrn_vsn$SYMBOL)
total.mrn.noTRJ <- total.mrn.noTRJ[!total.mrn.noTRJ %in% total.trj]
total.pro.noTRJ <- unique(dex.all$dex.pro$dex_stage_pro_vsn$SYMBOL)
total.pro.noTRJ <- total.pro.noTRJ[!total.pro.noTRJ %in% total.trj]
# add
trj4upsetWdis$SCN <- ifelse(trj4upset$SYMBOL %in% SCN.genes$SYMBOL, T, F)
trj4upsetWdis$AML <- ifelse(trj4upset$SYMBOL %in% AML.genes$SYMBOL, T, F)
# # remove non-overlapping SYMBOLS: not needed anymore
trj4upsetWdis$RowSum <- rowSums(trj4upsetWdis[,-1])
trj4upsetWdis.fin <- trj4upsetWdis %>% filter(RowSum > 0) %>% select(-RowSum)
# mrn
trj4upsetWdis$unreg.mRNA <- ifelse(trj4upset$SYMBOL %in% total.mrn.noTRJ, T, F)
sum(trj4upsetWdis$unreg.mRNA) # 3510
length(total.mrn.noTRJ) # 3510
# SCN
intersect(total.mrn.noTRJ, SCN.genes$SYMBOL) # "CSF3R" "GFI1" "LAMTOR2" "USB1" "SRP19" "SMARCD2"
intersect(total.mrn.noTRJ, AML.genes$SYMBOL) # "TET2" "CBL" "ASXL1" "CEBPA"
# pro
trj4upsetWdis$unreg.Prot <- ifelse(trj4upset$SYMBOL %in% total.pro.noTRJ, T, F)
sum(trj4upsetWdis$unreg.Prot) # 959
length(total.pro.noTRJ) # 959
intersect(total.pro.noTRJ, SCN.genes$SYMBOL) # "SMARCD2" "SRP19"
intersect(total.pro.noTRJ, AML.genes$SYMBOL) # "CEBPA
#traj.pro.unreg <- trj904upsetWdis %>% filter(unreg.Prot == T) %>% pull(SYMBOL)
#intersect(traj.pro.unreg, SCN.genes$SYMBOL) #
# plot prep
# trj.groupsWdis <- colnames(trj4upsetWdis.fin)[-1]
trj.groupsWdis <- colnames(trj4upsetWdis.fin)[-1]
# first trial for dimensions
upset(trj4upsetWdis.fin, trj.groupsWdis, name='Traj',
set_sizes= FALSE,
mode= 'inclusive_intersection', #
intersections=list(
c('SCN', "mrn.Up"), # 2
c('SCN', "mrn.Dn"), # 4
c('SCN', "mrn.UpDn"), # 2
#c('SCN', "mrn.DnUp"), # 0
#c('SCN', "mrn.UpDnUp"), # 0
#c('SCN', "mrn.DnUpDn"), # 0
c('SCN', "pro.Up"), # 1
#c('SCN', "pro.Dn"), # 0
c('SCN', "pro.UpDn"), # 4
#c('SCN', "pro.DnUp"), # 0
#c('AML', "mrn.Up"), # 0
c('AML', "mrn.Dn"), # 1
c('AML', "pro.Dn") # 2
#c('AML', "mrn.UpDn"), # 0
#c('AML', "mrn.DnUp"), # 0
#c('AML', "mrn.UpDnUp"), # 0
#c('AML', "mrn.DnUpDn"), # 0
#c('AML', "pro.Up"), # 0
#c('AML', "pro.UpDn"), # 0
#c('AML', "pro.DnUp") # 0
#c('SCN', "unreg.mRNA"), # works
#c('SCN', "unreg.Prot"), # why does this show 0 if intersect shows more ???
#c('AML', "unreg.mRNA"), # 4
#c('AML', "unreg.Prot") # 4
)
)
# add gene names
trj.groupsWdis.2 <- c("mrn.Up", "mrn.Dn", "mrn.UpDn", "pro.Up" , "pro.Dn", "pro.UpDn", "SCN", "AML")
p.upset.dis <- upset(trj4upsetWdis.fin, trj.groupsWdis.2, name='Feature sets',
set_sizes= FALSE,
mode= 'inclusive_intersection', #
themes = upset_default_themes(text=element_text(size=20)),
intersections=list(
c('SCN', "mrn.Up"), # 2
c('SCN', "mrn.Dn"), # 4
c('SCN', "mrn.UpDn"), # 2
c('SCN', "pro.Up"), # 1
c('SCN', "pro.UpDn"), # 4
c('AML', "mrn.Dn"), # 1
c('AML', "pro.Dn") # 2
),
,
base_annotations = list(
'Intersection size'=(
intersection_size(color='grey9',
fill='grey80',
mode= 'inclusive_intersection',
text=list(size=6))
+ ylim(c(0, 9))
#+ theme(plot.background=element_rect(fill='#E5D3B3'))
#+ ylab('# observations in intersection')
) +
geom_text(
mapping=aes(label=SYMBOL),
position=position_stack(),
na.rm=TRUE,
vjust=1.3
)
)
)
p.upset.dis
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_3/", "p.upset.dis", ".svg"),
plot = p.upset.dis,
device = svg,
width = 10,
height = 6)
Use this to show disease gene names that overlap https://stackoverflow.com/questions/75990226/add-labels-to-upset-plot-so-the-values-of-intersection-would-be-visible-along-t