Last update: 24-10-16

Last markdown compiled: 24-10-16

# version: 22.03.24
knitr::opts_chunk$set(warning = FALSE, message = FALSE, results = FALSE) 

# dirs
## Sebastian @Linux
dir_moa <- "/home/isar/Dropbox/scn_moa/"
## Sebastian @MAC



# data, revised 2024.01.09
# exp
load(file = paste0(dir_moa, "/data/final/", "gnp.pro.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mrn.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mic.RData"))

# results
load(file = paste0(dir_moa, "/data/final/", "dex.all.RData"))
load(file = paste0(dir_moa, "/data/final/", "timeomics.singles.RData"))
load(file = paste0(dir_moa, "/data/final/", "res.wgcna.RData"))


## packages
source(paste0(dir_moa, "/scripts/", "scn_moa_fct.R"))

# load packagea
sapply(packages_general, require, character.only = TRUE)
sapply(packages_enrichment, require, character.only = TRUE)

# additional packages (move at the end to package script)
library("viridis")
library(ggtext)
library(glue)

Universes

# extract data
fan <- gnp.mrn$fan

# ENTREZ
dex.all$dex.mrn$dex_stage_mrn_vsn$ENTREZ <- fan$ENTREZ[match(dex.all$dex.mrn$dex_stage_mrn_vsn$fid, fan$ENSG)]
dex.all$dex.pro$dex_stage_pro_vsn$ENTREZ <- fan$ENTREZ[match(dex.all$dex.pro$dex_stage_pro_vsn$fid, fan$UNIPROT)]

# ENSG
dex.all$dex.mrn$dex_stage_mrn_vsn$ENSG <- dex.all$dex.mrn$dex_stage_mrn_vsn$fid
dex.all$dex.pro$dex_stage_pro_vsn$ENSG <- fan$ENSG[match(dex.all$dex.pro$dex_stage_pro_vsn$fid, fan$UNIPROT)]

# SYMBOL
dex.all$dex.mrn$dex_stage_mrn_vsn$SYMBOL <- fan$SYMBOL[match(dex.all$dex.mrn$dex_stage_mrn_vsn$fid, fan$ENSG)]
dex.all$dex.pro$dex_stage_pro_vsn$SYMBOL <- fan$SYMBOL[match(dex.all$dex.pro$dex_stage_pro_vsn$fid, fan$UNIPROT)]


#### backgrounds
universe_mrn.ENSG <- unique(dex.all$dex.mrn$dex_stage_mrn_vsn$ENSG)
universe_pro.ENSG <- unique(dex.all$dex.pro$dex_stage_pro_vsn$ENSG)
universe_combo.ENSG <- unique(c(universe_mrn.ENSG, universe_pro.ENSG))

universe_mrn.ENTREZ <- unique(dex.all$dex.mrn$dex_stage_mrn_vsn$ENTREZ)
universe_pro.ENTREZ <- unique(dex.all$dex.pro$dex_stage_pro_vsn$ENTREZ)
universe_combo.ENTREZ <- unique(c(universe_mrn.ENTREZ, universe_pro.ENTREZ))

universe_mrn.SYMBOL <- fan$SYMBOL[match(universe_mrn.ENSG, fan$ENSG)]
universe_pro.SYMBOL <- fan$SYMBOL[match(universe_pro.ENSG, fan$ENSG)]


# remove invalid universe entries
universe_mrn.SYMBOL <- universe_mrn.SYMBOL[-c(2612)]
# write_clip(universe_mrn.SYMBOL)

fct

feature selection

# dex results: Fc filtered as ENSG vectors
get.dex.FC_ENSG <- function(dex, COMPoi, DIRECTION, FClevel){
  dex_comp <- dex %>% filter(comparison %in% COMPoi)
  if(DIRECTION == 'up'){
    foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC >= FClevel) %>% arrange(desc(logFC)) %>% pull(ENSG)
  }
  if(DIRECTION == 'down'){
    foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC <= -FClevel) %>% arrange(logFC) %>% pull(ENSG)
  }
  #fid <- fid[!is.na(fid)]
  return(foi)
}

# dex results: Fc filtered as ENSG vectors
get.dex.FC_miRs <- function(dex, COMPoi, DIRECTION, FClevel){
  dex_comp <- dex %>% filter(comparison %in% COMPoi)
  if(DIRECTION == 'up'){
    foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC >= FClevel) %>% arrange(desc(logFC)) %>% pull(fid)
  }
  if(DIRECTION == 'down'){
    foi <- dex_comp %>% filter(adj.P.Val < 0.05 & logFC <= -FClevel) %>% arrange(logFC) %>% pull(fid)
  }
  #fid <- fid[!is.na(fid)]
  return(foi)
}

enrichment functions

# fct for ENSG vector lists
apply_enrichment.GO <- function(l.dex, UNIV){
  rs <- compareCluster(geneClusters = l.dex,
                          pAdjustMethod = "BH",
                         pvalueCutoff = 0.05,
                         keyType = "ENSEMBL",
                         OrgDb = "org.Hs.eg.db",
                         readable = TRUE,
                          ont = "ALL",
                         fun = "enrichGO",
                         universe = UNIV)
  rs <- rs@compareClusterResult
  return(rs)
}

### for ENSEMBL
apply_enrichment.pathway <- function(l.dex, UNIV){
  rs <- compareCluster(geneClusters = l.dex,
                          pAdjustMethod = "BH",
                         pvalueCutoff = 0.05,
                         # keyType = "ENSEMBL",
                         #OrgDb = "org.Hs.eg.db",
                         readable = TRUE,
                         # ont = "ALL",
                         fun = "enrichPathway",
                         universe = UNIV)
  rs <- rs@compareClusterResult
  return(rs)
}


## fct enrich KEGG
apply_enrichment.kegg <- function(l.dex, UNIV){
  rs <- compareCluster(geneClusters = l.dex,
                          pAdjustMethod = "BH",
                         pvalueCutoff = 0.05,
                         # keyType = "ENSEMBL",
                         #OrgDb = "org.Hs.eg.db",
                         #readable = TRUE,
                         # ont = "ALL",
                         fun = "enrichKEGG",
                         universe = UNIV)
  rs <- rs@compareClusterResult
  return(rs)
}


## format enriched results

# add_fraction <- function(rs.enriched){   # not needed anymore I think
#   rs.CP %<>% separate(col = BgRatio, into = c("inGroup", "background"))
#   rs.CP$inGroup <- as.numeric(rs.CP$inGroup)
#   rs.CP$Count <- as.numeric(rs.CP$Count)
#   rs.CP$frac <- round(rs.CP$Count / rs.CP$inGroup * 100,0)
#   return(rs.CP)
# }

add.enrichFactors <- function(rs.enriched){
  rs.enriched %<>% mutate(richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))
  rs.enriched %<>% mutate(FoldEnrichment = parse_ratio(GeneRatio) / parse_ratio(BgRatio))
  return(rs.enriched)
}

plotter

# inspection functions

find_topPval.GO <- function(rs.all, TOPn){
  topPval.MF <- rs.all %>%
    filter(ONTOLOGY == 'MF') %>%
    group_by(Cluster, ONTOLOGY) %>%
    slice_min(p.adjust, n=TOPn) %>% pull(ID)
  topPval.BP <- rs.all %>%
    filter(ONTOLOGY == 'BP') %>%
    group_by(Cluster, ONTOLOGY) %>%
    slice_min(p.adjust, n=TOPn) %>% pull(ID)
  topPval.CC <- rs.all %>%
    filter(ONTOLOGY == 'CC') %>%
    group_by(Cluster, ONTOLOGY) %>%
    slice_min(p.adjust, n=TOPn) %>% pull(ID)
  vec <- unique(c(topPval.MF, topPval.BP, topPval.CC))
  return(vec)
}


inspect.top.GO <- function(rs.CP, topP){
  dat <- rs.CP %>%
    filter(ID %in% topP)
  #dat$Description <- factor(dat$Description, levels = orderY)
  #dat$DESC <- stringr::str_wrap(dat$Description, termLength)
  #dat$DESC <- factor(dat$DESC, levels = rev(LEVELS))
  dat %>%
    ggplot(aes(x= Cluster, y = Description,  color = Cluster)) +
    geom_point(aes(size = Count)) +
    scale_size(range = c(1, 5)) +
    theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
          axis.title.x = element_blank()) +
    scale_color_manual(values = colors_progression.arrow)
    #ggtitle(title)
}


inspect.selTerms.GO <- function(rs.enrichment, selTerms){
  dat <- rs.enrichment %>% 
    # filter(ID %in% topP) %>%
    filter(Description %in% selTerms)
  
  dat %>%
    ggplot(aes(x= Cluster, y = Description,  color = Cluster)) +
    geom_point(aes(size = FoldEnrichment)) +
    scale_size(range = c(1, 5)) +
    theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
          axis.title.x = element_blank()) +
    scale_color_manual(values = colors_progression.arrow)
  #ggtitle(title)
}

# final plot
plot.GO_final <- function(rs.CP, fondSize, LEVELS, HJUST){
  p <- rs.CP %>%
    ggplot(aes(x= Cluster, y = factor(DESC, levels = LEVELS),  color = Cluster, label = Count)) +
    geom_point(aes(size = 3)) +
    geom_text(color = 'black', hjust = HJUST, size = 5) +
    # scale_size(range = c(5, 15)) +
    theme_light(base_size=20) +
    theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
          axis.title.x = element_blank() ,
          axis.title.y = element_blank() ,
          text = element_text(size=fondSize)) +
    scale_color_manual(values = colors_progression.arrow, guide = 'none') +
    scale_size(range = c(4, 10)) +
    theme(legend.position = "none")
    # theme(
    #   legend.position = c(.95, .95),
    #   legend.justification = c("right", "top"),
    #   legend.box.just = "right",
    #   legend.margin = margin(6, 6, 6, 6)) +
    # labs(size="Fold\nEnrichment")
  return(p)
}

DEX: mrn & pro

prep

Goal: lists with threshold filtered ENSG & ENTREZ vectors.

# Fc1
l.dex.mrn_ENSG_up <- list(
  'MB->PM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MBtoPM', 'up', 1),
  'PM->MC' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'PMtoMC', 'up', 1),
  'MC->MM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MCtoMM', 'up', 1),
  'MM->B' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MMtoB', 'up', 1),
  'B->S' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'BtoS', 'up', 1),
  'S->PMN' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'StoPMN', 'up', 1)
)

l.dex.mrn_ENSG_dn <- list(
  'MB->PM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MBtoPM', 'down', 1),
  'PM->MC' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'PMtoMC', 'down', 1),
  'MC->MM' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MCtoMM', 'down', 1),
  'MM->B' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'MMtoB', 'down', 1),
  'B->S' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'BtoS', 'down', 1),
  'S->PMN' = get.dex.FC_ENSG(dex.all$dex.mrn$dex_stage_mrn_vsn, 'StoPMN', 'down', 1)
)

## pro
l.dex.pro_ENSG_up <- list(
  'MB->PM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MBtoPM', 'up', 1),
  'PM->MC' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'PMtoMC', 'up', 1),
  'MC->MM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MCtoMM', 'up', 1),
  'MM->B' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MMtoB', 'up', 1),
  'B->PMN' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'BtoPMN', 'up', 1)
)

l.dex.pro_ENSG_dn <- list(
  'MB->PM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MBtoPM', 'down', 1),
  'PM->MC' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'PMtoMC', 'down', 1),
  'MC->MM' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MCtoMM', 'down', 1),
  'MM->B' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'MMtoB', 'down', 1),
  'B->PMN' = get.dex.FC_ENSG(dex.all$dex.pro$dex_stage_pro_vsn, 'BtoPMN', 'down', 1)
)

# change fid
l.dex.mrn_ENTREZ_up <- purrr::map(l.dex.mrn_ENSG_up, turn.ENSGintoENTREZ)
l.dex.mrn_ENTREZ_dn <- purrr::map(l.dex.mrn_ENSG_dn, turn.ENSGintoENTREZ)

l.dex.pro_ENTREZ_up <- purrr::map(l.dex.pro_ENSG_up, turn.ENSGintoENTREZ)
l.dex.pro_ENTREZ_dn <- purrr::map(l.dex.pro_ENSG_dn, turn.ENSGintoENTREZ)

enrich

Goal: apply enrichment functions to the lists of feature vectors

# loading saved data below. takes very long

# # GO TERMS
# rs.GO.dex_mrn.up <- apply_enrichment.GO(l.dex.mrn_ENSG_up, universe_mrn.ENSG)
# rs.GO.dex_mrn.dn <- apply_enrichment.GO(l.dex.mrn_ENSG_dn, universe_mrn.ENSG)
# 
# rs.GO.dex_pro.up <- apply_enrichment.GO(l.dex.pro_ENSG_up, universe_pro.ENSG)
# rs.GO.dex_pro.dn <- apply_enrichment.GO(l.dex.pro_ENSG_dn, universe_pro.ENSG)
# 
# 
# # pathways
# rs.PW.dex_mrn.up <- apply_enrichment.pathway(l.dex.mrn_ENTREZ_up, universe_mrn.ENTREZ)
# rs.PW.dex_mrn.dn <- apply_enrichment.pathway(l.dex.mrn_ENTREZ_dn, universe_mrn.ENTREZ)
# 
# rs.PW.dex_pro.up <- apply_enrichment.pathway(l.dex.pro_ENTREZ_up, universe_pro.ENTREZ)
# rs.PW.dex_pro.dn <- apply_enrichment.pathway(l.dex.pro_ENTREZ_dn, universe_pro.ENTREZ)
# 
# 
# # kegg
# rs.KG.dex_mrn.up <- apply_enrichment.kegg(l.dex.mrn_ENTREZ_up, universe_mrn.ENTREZ)
# rs.KG.dex_mrn.dn <- apply_enrichment.kegg(l.dex.mrn_ENTREZ_dn, universe_mrn.ENTREZ)
# 
# rs.KG.dex_pro.up <- apply_enrichment.kegg(l.dex.pro_ENTREZ_up, universe_pro.ENTREZ)
# rs.KG.dex_pro.dn <- apply_enrichment.kegg(l.dex.pro_ENTREZ_dn, universe_pro.ENTREZ)
# 
# 
# res.enrichment.dex <- list(
#   #GO
#   rs.GO.dex_mrn.up = rs.GO.dex_mrn.up,
#   rs.GO.dex_mrn.dn = rs.GO.dex_mrn.dn,
#   rs.GO.dex_pro.up = rs.GO.dex_pro.up,
#   rs.GO.dex_pro.dn = rs.GO.dex_pro.dn,
#   # pathways
#   rs.PW.dex_mrn.up = rs.PW.dex_mrn.up,
#   rs.PW.dex_mrn.dn = rs.PW.dex_mrn.dn,
#   rs.PW.dex_pro.up = rs.PW.dex_pro.up,
#   rs.PW.dex_pro.dn = rs.PW.dex_pro.dn,
#   # kegg
#   rs.KG.dex_mrn.up = rs.KG.dex_mrn.up,
#   rs.KG.dex_mrn.dn = rs.KG.dex_mrn.dn,
#   rs.KG.dex_pro.up = rs.KG.dex_pro.up,
#   rs.KG.dex_pro.dn = rs.KG.dex_pro.dn
# )

# save(res.enrichment.dex,
#      file = paste0(dir_moa, "/data/final/preps/enrichment/", "res.enrichment.dex.RData"))

load(file = paste0(dir_moa,  "/data/final/preps/enrichment/", "res.enrichment.dex.RData"))

mrn

UP

# copy & add enrichFactor
rs.GO.mrn_UP <- res.enrichment.dex$rs.GO.dex_mrn.up
rs.GO.mrn_UP <- add.enrichFactors(rs.GO.mrn_UP)

## inspect top terms
# find
top.GO_mrn_UP <- find_topPval.GO(rs.GO.mrn_UP, 2)
# plot
inspect.top.GO(rs.GO.mrn_UP, top.GO_mrn_UP)

# check & inspect frequency of terms
terms.mrn.UP <- rs.GO.mrn_UP %>%
  count('Description') %>%
  arrange(desc(freq))

# select terms to show:
selTerms_mrn.UP <- c(
  'azurophil granule',
  'specific granule',
  'tertiary granule',
  'ficolin-1-rich granule',
    'cell-cell adhesion',
  #'signaling receptor activity',
  #'cell surface',
  'mitochondrial matrix'
)

# inspect selTerms
inspect.selTerms.GO(rs.GO.mrn_UP, selTerms_mrn.UP)

# define order of visible terms and line breaks
selTerms_freq_mrn.UP <- rs.GO.mrn_UP %>% 
  #filter(ID %in% topP_mrn_UP) %>%
  filter(Description %in% selTerms_mrn.UP) %>%
  #select(Description, Cluster) %>%
  #arrange(Description) %>%
  count('Description') %>%
  arrange(desc(freq))

rs.GO.mrn_UP %>% 
  #filter(ID %in% topP_mrn_UP) %>%
  filter(Description %in% selTerms_mrn.UP) %>%
  select(Description, Cluster) %>%
  left_join(selTerms_freq_mrn.UP, by = "Description") %>%
  mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
  mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
  arrange(desc(freq))

# define levels for plot
rs.GO.mrn_UP.fin <- rs.GO.mrn_UP %>% 
  filter(Description %in% selTerms_mrn.UP) %>%
  mutate(DESC = stringr::str_wrap(Description, 16))

# rename to shorten (optional)


## reformat and define levels
rs.GO.mrn_UP.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.mrn_UP.fin$DESC, perl = TRUE)

unique(rs.GO.mrn_UP.fin$DESC)

LEVELS.mrn.UP <- rev(c(
  "Azurophil\ngranule",
  'Specific granule',
  'Tertiary granule',
  "Ficolin-1-rich\ngranule",
  "Cell-cell\nadhesion",
  "Mitochondrial\nmatrix"
))

rs.GO.mrn_UP.fin$DESC <- factor(rs.GO.mrn_UP.fin$DESC, levels = LEVELS.mrn.UP)

# plot
p.GO_mrn.UP <- plot.GO_final(rs.GO.mrn_UP.fin, 20, LEVELS.mrn.UP, -0.6)
p.GO_mrn.UP

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_mrn.UP", ".svg"),
  plot = p.GO_mrn.UP,
  device = svg,
  width = 6.5,
  height = 6)

DN

# copy & add enrichFactor
rs.GO.mrn_DN <- res.enrichment.dex$rs.GO.dex_mrn.dn
rs.GO.mrn_DN <- add.enrichFactors(rs.GO.mrn_DN)

## inspect top terms
# find
top.GO_mrn_DN <- find_topPval.GO(rs.GO.mrn_DN, 2)
# plot
inspect.top.GO(rs.GO.mrn_DN, top.GO_mrn_DN)

# check & inspect frequency of terms
terms.mrn.DN <- rs.GO.mrn_DN %>%
  count('Description') %>%
  arrange(desc(freq))

# select terms to show:
selTerms_mrn.DN <- c(
  'ribosome',
  'specific granule',
  'azurophil granule',
  'cell surface',
  'DNA-binding transcription factor activity, RNA polymerase II-specific',
  'tertiary granule',
  'translation',
  'mitochondrial matrix'
)
  
# inspect second plot
inspect.selTerms.GO(rs.GO.mrn_DN, selTerms_mrn.DN)

# define order of visible terms
selTerms_freq_mrn.DN <- rs.GO.mrn_DN %>% 
  #filter(ID %in% topP_mrn_UP) %>%
  filter(Description %in% selTerms_mrn.DN) %>%
  #select(Description, Cluster) %>%
  #arrange(Description) %>%
  count('Description') %>%
  arrange(desc(freq))

rs.GO.mrn_DN %>% 
  #filter(ID %in% topP_mrn_DN) %>%
  filter(Description %in% selTerms_mrn.DN) %>%
  select(Description, Cluster) %>%
  left_join(selTerms_freq_mrn.DN, by = "Description") %>%
  mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
  mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
  arrange(desc(freq))

# define levels for plot

# format final data
rs.GO.mrn_DN.fin <- rs.GO.mrn_DN %>% 
  filter(Description %in% selTerms_mrn.DN) %>%
  #filter(ID %in% topP_mrn_DN) %>%
  mutate(DESC = stringr::str_wrap(Description, 16))

# shorten terms
rs.GO.mrn_DN.fin$DESC[17] <- 'TF-activity,\n RNA poly.II spc.'

## reformat and define levels
rs.GO.mrn_DN.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.mrn_DN.fin$DESC, perl = TRUE)

unique(rs.GO.mrn_DN.fin$DESC)

LEVELS.mrn.DN <- c(
  "Azurophil\ngranule",
  "Specific granule",
  "Tertiary granule",
  "TF-activity,\n RNA poly.II spc.",
  "Ribosome" ,
  "Mitochondrial\nmatrix",
  "Translation",
  "Cell surface"
)

rs.GO.mrn_DN.fin$DESC <- factor(rs.GO.mrn_DN.fin$DESC, levels = LEVELS.mrn.DN)


# plot
p.GO_mrn.DN <- plot.GO_final(rs.GO.mrn_DN.fin, 20, rev(LEVELS.mrn.DN), -0.6)

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_mrn.DN", ".svg"),
  plot = p.GO_mrn.DN,
  device = svg,
  width = 6.5,
  height = 6)

pro

UP

# copy & add enrichFactor
rs.GO.pro_UP <- res.enrichment.dex$rs.GO.dex_pro.up
rs.GO.pro_UP <- add.enrichFactors(rs.GO.pro_UP)

## inspect top terms
# find
top.GO_pro_UP <- find_topPval.GO(rs.GO.pro_UP, 2)
# plot
inspect.top.GO(rs.GO.pro_UP, top.GO_pro_UP)

# check & inspect frequency of terms
terms.pro.UP <- rs.GO.pro_UP %>%
  count('Description') %>%
  arrange(desc(freq))

# select terms to show:
selTerms_pro.UP <- c(
  'ficolin-1-rich granule',
  #'secretory granule',
  'tertiary granule',
  'specific granule',
  'azurophil granule',
  #'cell migration',
  'cell-cell adhesion',
  'cytosolic ribosome',
  'respiratory burst'
)

# inspect selTerms
inspect.selTerms.GO(rs.GO.pro_UP, selTerms_pro.UP)

# define order of visible terms and line breaks
selTerms_freq_pro.UP <- rs.GO.pro_UP %>% 
  #filter(ID %in% topP_pro_UP) %>%
  filter(Description %in% selTerms_pro.UP) %>%
  #select(Description, Cluster) %>%
  #arrange(Description) %>%
  count('Description') %>%
  arrange(desc(freq))

rs.GO.pro_UP %>% 
  #filter(ID %in% topP_pro_UP) %>%
  filter(Description %in% selTerms_pro.UP) %>%
  select(Description, Cluster) %>%
  left_join(selTerms_freq_pro.UP, by = "Description") %>%
  mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
  mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
  arrange(desc(freq))

# define levels for plot
rs.GO.pro_UP.fin <- rs.GO.pro_UP %>% 
  filter(Description %in% selTerms_pro.UP) %>%
  mutate(DESC = stringr::str_wrap(Description, 16))

# rename to shorten (optional)


## reformat and define levels
rs.GO.pro_UP.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.pro_UP.fin$DESC, perl = TRUE)

unique(rs.GO.pro_UP.fin$DESC)

LEVELS.pro.UP <- rev(c(
  "Azurophil\ngranule",
  'Specific granule',
  'Tertiary granule',
  "Ficolin-1-rich\ngranule",
  "Cytosolic\nribosome",
  "Cell-cell\nadhesion",
  #"Cell migration",
  "Mitochondrial\nmatrix",
  "Respiratory\nburst"
))

rs.GO.pro_UP.fin$DESC <- factor(rs.GO.pro_UP.fin$DESC, levels = LEVELS.pro.UP)

# plot
p.GO_pro.UP <- plot.GO_final(rs.GO.pro_UP.fin, 20, LEVELS.pro.UP, -0.6)
p.GO_pro.UP

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_pro.UP", ".svg"),
  plot = p.GO_pro.UP,
  device = svg,
  width = 6.5,
  height = 6)

DN

# copy & add enrichFactor
rs.GO.pro_DN <- res.enrichment.dex$rs.GO.dex_pro.dn
rs.GO.pro_DN <- add.enrichFactors(rs.GO.pro_DN)

## inspect top terms
# find
top.GO_pro_DN <- find_topPval.GO(rs.GO.pro_DN, 2)
# plot
inspect.top.GO(rs.GO.pro_DN, top.GO_pro_DN)

# check & inspect frequency of terms
terms.pro.DN <- rs.GO.pro_DN %>%
  count('Description') %>%
  arrange(desc(freq))

# select terms to show:
selTerms_pro.DN <- c(
  'RNA polymerase complex',
  'DNA replication',
  'mitochondrial matrix',
  'sulfur compound binding',
  'glycosaminoglycan binding'
)
  
# inspect second plot
inspect.selTerms.GO(rs.GO.pro_DN, selTerms_pro.DN)

# define order of visible terms
selTerms_freq_pro.DN <- rs.GO.pro_DN %>% 
  #filter(ID %in% topP_pro_UP) %>%
  filter(Description %in% selTerms_pro.DN) %>%
  #select(Description, Cluster) %>%
  #arrange(Description) %>%
  count('Description') %>%
  arrange(desc(freq))

rs.GO.pro_DN %>% 
  #filter(ID %in% topP_pro_DN) %>%
  filter(Description %in% selTerms_pro.DN) %>%
  select(Description, Cluster) %>%
  left_join(selTerms_freq_pro.DN, by = "Description") %>%
  mutate(DESC_pre = stringr::str_wrap(Description, 16)) %>%
  mutate(DESC = paste0("\"", DESC_pre, "\"")) %>%
  arrange(desc(freq))

# define levels for plot

# format final data
rs.GO.pro_DN.fin <- rs.GO.pro_DN %>% 
  filter(Description %in% selTerms_pro.DN) %>%
  #filter(ID %in% topP_pro_DN) %>%
  mutate(DESC = stringr::str_wrap(Description, 16))

# shorten terms


## define levels x axis
rs.GO.pro_DN.fin$Cluster <- factor(rs.GO.pro_DN.fin$Cluster, levels = c('MB->PM', 'PM->MC', 'MC->MM',  'MM->B', '-', 'B->PMN'))


## reformat and define levels
rs.GO.pro_DN.fin$DESC <- sub("(\\w)", "\\U\\1", rs.GO.pro_DN.fin$DESC, perl = TRUE)

unique(rs.GO.pro_DN.fin$DESC)

LEVELS.pro.DN <- c(
  "RNA polymerase\ncomplex",
  "Sulfur compound\nbinding",
  "Glycosaminoglycan\nbinding",
  "Mitochondrial\nmatrix",
  "DNA replication"
)

rs.GO.pro_DN.fin$DESC <- factor(rs.GO.pro_DN.fin$DESC, levels = LEVELS.pro.DN)


# plot
p.GO_pro.DN <- plot.GO_final(rs.GO.pro_DN.fin, 20, rev(LEVELS.pro.DN), -0.6)
p.GO_pro.DN

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_2/", "p.GO_pro.DN", ".svg"),
  plot = p.GO_pro.DN,
  device = svg,
  width = 6.5,
  height = 6)

DEX: mic

prep

# data
dex.all$dex.mic$dex_stage_mic_vsn

# for export
univ.mic <- unique(dex.all$dex.mic$dex_stage_mic_vsn$fid)
# write_clip(univ.mic)

# MBtoMC
mi.MBtoMC.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MBtoMC', 'down', 1) 
# write_clip(mi.MBtoMC.dn)

mi.MBtoMC.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MBtoMC', 'up', 1) 
# write_clip(mi.MBtoMC.up)

# MCtoMM
mi.MCtoMM.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MCtoMM', 'down', 1) 
# write_clip(mi.MCtoMM.dn)

mi.MCtoMM.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'MCtoMM', 'up', 1) 
# write_clip(mi.MCtoMM.up)

# MMtoB: all 0! -> discarded

# BtoS
mi.BtoS.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'BtoS', 'down', 1) 
# write_clip(mi.BtoS.dn)

mi.BtoS.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'BtoS', 'up', 1) 
# write_clip(mi.BtoS.up)


# StoPMN
mi.StoPMN.dn <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'StoPMN', 'down', 1) 
# write_clip(mi.StoPMN.dn) # only 2 -> no results!

mi.StoPMN.up <- get.dex.FC_miRs(dex.all$dex.mic$dex_stage_mic_vsn, 'StoPMN', 'up', 1) 
# write_clip(mi.StoPMN.up)

miEAA

Via: https://ccb-compute2.cs.uni-saarland.de/mieaa/

Setup: - Over-Representation Analysis (ORA) - Homo sapiens - Test set: dex list - no reference set - all categories - BHB with 0.05

-> only useful for localisations

# import
ora.MBtoMC.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MBtoMC.up.csv'))
ora.MBtoMC.dn <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MBtoMC.dn.csv'))
ora.MCtoMM.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MCtoMM.up.csv'))
ora.MCtoMM.dn <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.MCtoMM.dn.csv'))
ora.BtoS.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.BtoS.up.csv'))
ora.BtoS.dn <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.BtoS.dn.csv'))
ora.StoPMN.up <- read_csv(paste0(dir_moa, '/data/final/preps/enrichment/miEAA/', 'ora.StoPMN.up.csv'))


# format
table(ora.MBtoMC.up$Category) # 8 loc, , 3 pw kegg
table(ora.MBtoMC.dn$Category) #  loc & pw kegg
table(ora.MCtoMM.up$Category) # loc
table(ora.MCtoMM.dn$Category) # loc
table(ora.BtoS.up$Category) # loc
table(ora.BtoS.dn$Category) # disease
table(ora.StoPMN.up$Category) # disease


# try loc and pw
ora.MBtoMC.up.loc <- ora.MBtoMC.up %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.MBtoMC.up.loc$stage <- 'MB->MC'

ora.MBtoMC.dn.loc <- ora.MBtoMC.dn %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.MBtoMC.dn.loc$stage <- 'MB->MC'

ora.MCtoMM.up.loc <- ora.MCtoMM.up %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.MCtoMM.up.loc$stage <- 'MC->MM'

ora.MCtoMM.dn.loc <- ora.MCtoMM.dn %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.MCtoMM.dn.loc$stage <- 'MC->MM'

ora.BtoS.up.loc <- ora.BtoS.up %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.BtoS.up.loc$stage <- 'B->S'

ora.BtoS.dn.loc <- ora.BtoS.dn %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.BtoS.dn.loc$stage <- 'B->S'

ora.StoPMN.up.loc <- ora.StoPMN.up %>%
  filter(Category %in% c("Localization (RNALocate)"))
ora.StoPMN.up.loc$stage <- 'S->PMN'


### up
mic.locs.up <- rbind(ora.MBtoMC.up.loc, ora.MCtoMM.up.loc, ora.BtoS.up.loc, ora.StoPMN.up.loc)
mic.locs.up %<>% add_row(stage = 'MM->B', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.up %<>% add_row(stage = 'S->PMN', Subcategory = "Mitochondrion", `P-adjusted`= NA)

mic.locs.up$stage <- factor(mic.locs.up$stage, levels = c('MB->MC', 'MC->MM','MM->B', 'B->S', 'S->PMN'))

unique(mic.locs.up$Subcategory)
levels.mic.locs.up <- rev(c("Nucleus", "Mitochondrion", "Cytoplasm", "Microvesicle", "Nucleolus", "Extracellular vesicle"))


mic.locs.up$count <- str_count(mic.locs.up$`miRNAs/precursors`, ';')
mic.locs.up$count <- mic.locs.up$count +1

p.mic.locs.up_pre <- mic.locs.up %>% 
  mutate(size = 1- `P-adjusted` ) %>%
  filter(! Subcategory %in% c("microvesicle", "exosome", 'Exosome', "Circulating")) %>%
  ggplot(aes(x = stage, y = factor(Subcategory, levels = levels.mic.locs.up), color = stage, label = count)) +
  geom_point(aes(size = size)) +
  scale_color_manual(values = colors_progression.arrow.mic, guide = 'none') +
  geom_text(color = 'black', hjust = -1, size = 5)
p.mic.locs.up_pre

p.mic.locs.up <- p.mic.locs.up_pre  +
    #theme(legend.position="none")+
    #ylab("Terms") +
    #scale_size(range = c(4, 10)) +
    theme(
      ) +
  labs(size="1-FDR") +
  theme_light(base_size=20) +
  theme(legend.position = "none")
  # theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
  #         axis.title.x = element_blank(),
  #         axis.title.y=element_blank(),
  #         legend.position = c(.95, .95),
  #         legend.justification = c("right", "top"),
  #         legend.box.just = "right",
  #         legend.margin = margin(6, 6, 6, 6))
  
p.mic.locs.up

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_2/", "p.mic.locs.up", ".svg"),
  plot = p.mic.locs.up,
  device = svg,
  width = 6.5,
  height = 6)


## down
mic.locs.dn <- rbind(ora.MBtoMC.dn.loc, ora.MCtoMM.dn.loc, ora.BtoS.dn.loc)
mic.locs.dn %<>% add_row(stage = 'MM->B', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.dn %<>% add_row(stage = 'B->S', Subcategory = "Mitochondrion", `P-adjusted`= NA)
mic.locs.dn %<>% add_row(stage = 'S->PMN', Subcategory = "Mitochondrion", `P-adjusted`= NA)

mic.locs.dn$stage <- factor(mic.locs.dn$stage, levels = c('MB->MC', 'MC->MM', 'MM->B', 'B->S', 'S->PMN'))

unique(mic.locs.dn$Subcategory)
levels.mic.locs.dn <- rev(c("Nucleus", "Mitochondrion", "Cytoplasm", "Microvesicle", "Nucleolus", "Extracellular vesicle"))

# add count
mic.locs.dn$count <- str_count(mic.locs.dn$`miRNAs/precursors`, ';')
mic.locs.dn$count <- mic.locs.dn$count +1

p.mic.locs.dn_pre <- mic.locs.dn %>% 
  mutate(size = round(1- `P-adjusted`, 2)) %>%
  filter(! Subcategory %in% c("microvesicle", "exosome", 'Exosome', "Circulating")) %>%
  ggplot(aes(x = stage, y = Subcategory, color = stage, label = count)) +
  geom_point(aes(size = size)) +
  scale_color_manual(values = colors_progression.arrow.mic, guide = 'none') +
  geom_text(color = 'black', hjust = -1, size = 5)


p.mic.locs.dn <- p.mic.locs.dn_pre  +
  labs(size="1-FDR") +
  theme_light(base_size=20) +
  theme(legend.position = "none")
  # theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1),
  #         axis.title.x = element_blank(),
  #         axis.title.y=element_blank(),
  #         legend.position="none")
  
p.mic.locs.dn

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_2/", "p.mic.locs.dn", ".scg"),
  plot = p.mic.locs.dn,
  device = svg,
  width = 6.5,
  height = 6)

Trajectories

Combined analysis to compare enrichment of trajectories on mrn, pro and mic levels

dotplot

prep mrn & pro

# centrality selected features 
load(file = paste0(dir_moa, "/data/final/preps/timeomics/", "centralities.RData"))

# filter to 90% quantile (50% doesnt result in enough features!)
centrality.mrn <- centrality.mrn %>% select(ENSG, traj, centrality) %>% 
  group_by(traj) %>% 
  arrange(traj, desc(centrality)) %>% 
  filter(centrality > quantile(centrality, .10))

centrality.pro <- centrality.pro %>% select(UNIPROT = ENSG, traj, centrality) %>% 
  group_by(traj) %>% 
  arrange(traj, desc(centrality)) %>% 
  filter(centrality > quantile(centrality, .10))

centrality.pro <- add.fan2UNIPROT(centrality.pro)


# create lists: combined and unique mrn/pro

## 90
l.trj_mrn.pro_ENSG <- list(
  mrn.Up = centrality.mrn %>% filter(traj == "up") %>% pull(ENSG),
  mrn.Dn = centrality.mrn %>% filter(traj == "dn") %>% pull(ENSG),
  mrn.UpDn = centrality.mrn %>% filter(traj == "upDn") %>% pull(ENSG),
  mrn.DnUp = centrality.mrn %>% filter(traj == "dnUp") %>% pull(ENSG),
  mrn.UpDnUp = centrality.mrn %>% filter(traj == "upDnup") %>% pull(ENSG),
  mrn.DnUpDn = centrality.mrn %>% filter(traj == "dnUpDn") %>% pull(ENSG),
  mrn.UpDnUpDn = centrality.mrn %>% filter(traj == "upDnUpDn") %>% pull(ENSG),
  mrn.DnUpDnUp = centrality.mrn %>% filter(traj == "dnUpDnUp") %>% pull(ENSG),
  pro.Up = centrality.pro %>% filter(traj == "up") %>% pull(ENSG),
  pro.Dn = centrality.pro %>% filter(traj == "dn") %>% pull(ENSG),
  pro.UpDn = centrality.pro %>% filter(traj == "upDn") %>% pull(ENSG),
  pro.DnUp = centrality.pro %>% filter(traj == "dnUp") %>% pull(ENSG),
  pro.UpDnUp = centrality.pro %>% filter(traj == "upDnup") %>% pull(ENSG),
  pro.DnUpDn = centrality.pro %>% filter(traj == "dnUpDn") %>% pull(ENSG)
)

l.trj_mrn_ENSG <- list(
  mrn.Up = centrality.mrn %>% filter(traj == "up") %>% pull(ENSG),
  mrn.Dn = centrality.mrn %>% filter(traj == "dn") %>% pull(ENSG),
  mrn.UpDn = centrality.mrn %>% filter(traj == "upDn") %>% pull(ENSG),
  mrn.DnUp = centrality.mrn %>% filter(traj == "dnUp") %>% pull(ENSG),
  mrn.UpDnUp = centrality.mrn %>% filter(traj == "upDnup") %>% pull(ENSG),
  mrn.DnUpDn = centrality.mrn %>% filter(traj == "dnUpDn") %>% pull(ENSG),
  mrn.UpDnUpDn = centrality.mrn %>% filter(traj == "upDnUpDn") %>% pull(ENSG),
  mrn.DnUpDnUp = centrality.mrn %>% filter(traj == "dnUpDnUp") %>% pull(ENSG)
)

l.trj_pro_ENSG <- list(
  pro.Up = centrality.pro %>% filter(traj == "up") %>% pull(ENSG),
  pro.Dn = centrality.pro %>% filter(traj == "dn") %>% pull(ENSG),
  pro.UpDn = centrality.pro %>% filter(traj == "upDn") %>% pull(ENSG),
  pro.DnUp = centrality.pro %>% filter(traj == "dnUp") %>% pull(ENSG),
  pro.UpDnUp = centrality.pro %>% filter(traj == "upDnup") %>% pull(ENSG),
  pro.DnUpDn = centrality.pro %>% filter(traj == "dnUpDn") %>% pull(ENSG)
)


# convert to ENTREZ
turn.ENSGintoENTREZ <- function(ENSG.vector){
  ENTREZ.vector <- fan$ENTREZ[match(ENSG.vector, fan$ENSG)]
  ENTREZ.vector <- na.omit(ENTREZ.vector)
  ENTREZ.vector <- as.vector(ENTREZ.vector)
  return(ENTREZ.vector)
}

l.trj_mrn.pro_ENTREZ <- purrr::map(l.trj_mrn.pro_ENSG, turn.ENSGintoENTREZ)
l.trj_mrn_ENTREZ <- purrr::map(l.trj_mrn_ENSG, turn.ENSGintoENTREZ)
l.trj_pro_ENTREZ <- purrr::map(l.trj_pro_ENSG, turn.ENSGintoENTREZ)

# # enrich
# rs.PW.trj_mrn.pro <- apply_enrichment.pathway(l.trj_mrn.pro_ENTREZ, universe_combo.ENTREZ) #
# rs.PW.trj_mrn <- apply_enrichment.pathway(l.trj_mrn_ENTREZ, universe_mrn.ENTREZ) #
# rs.PW.trj_pro <- apply_enrichment.pathway(l.trj_pro_ENTREZ, universe_pro.ENTREZ) #
# 
# rs.KG.trj_mrn.pro <- apply_enrichment.kegg(l.trj_mrn.pro_ENTREZ, universe_combo.ENTREZ) #
# rs.KG.trj_mrn <- apply_enrichment.kegg(l.trj_mrn_ENTREZ, universe_mrn.ENTREZ) #
# rs.KG.trj_pro <- apply_enrichment.kegg(l.trj_pro_ENTREZ, universe_pro.ENTREZ) #
# 
# rs.GO.trj_mrn.pro <- apply_enrichment.GO(l.trj_mrn.pro_ENSG, universe_combo.ENSG) #
# rs.GO.trj_mrn <- apply_enrichment.GO(l.trj_mrn_ENSG, universe_mrn.ENSG) #
# rs.GO.trj_pro <- apply_enrichment.GO(l.trj_pro_ENSG, universe_pro.ENSG) #
# 
# # add factors (do for all at some point....)
# rs.PW.trj_mrn.pro <- add.enrichFactors(rs.PW.trj_mrn.pro)
# rs.PW.trj_mrn <- add.enrichFactors(rs.PW.trj_mrn)
# rs.PW.trj_pro <- add.enrichFactors(rs.PW.trj_pro)
# 
# rs.KG.trj_mrn.pro <- add.enrichFactors(rs.KG.trj_mrn.pro)
# rs.KG.trj_mrn <- add.enrichFactors(rs.KG.trj_mrn)
# rs.KG.trj_pro <- add.enrichFactors(rs.KG.trj_pro)
# 
# rs.GO.trj_mrn.pro <- add.enrichFactors(rs.GO.trj_mrn.pro)
# rs.GO.trj_mrn <- add.enrichFactors(rs.GO.trj_mrn)
# rs.GO.trj_pro <- add.enrichFactors(rs.GO.trj_pro)
# 
# 
# res.enrichment.trj <- list(
#   # PW
#   rs.PW.trj_mrn.pro = rs.PW.trj_mrn.pro,
#   rs.PW.trj_mrn = rs.PW.trj_mrn,
#   rs.PW.trj_pro = rs.PW.trj_pro,
#   # kegg
#   rs.KG.trj_mrn.pro = rs.KG.trj_mrn.pro,
#   rs.KG.trj_mrn = rs.KG.trj_mrn,
#   rs.KG.trj_pro = rs.KG.trj_pro,
#   # GO
#   rs.GO.trj_mrn.pro = rs.GO.trj_mrn.pro,
#   rs.GO.trj_mrn = rs.GO.trj_mrn,
#   rs.GO.trj_pro = rs.GO.trj_pro
# )
# 
# save(res.enrichment.trj,
#      file = paste0(dir_moa, "/data/final/preps/enrichment/", "res.enrichment.trj.RData"))

load(file = paste0(dir_moa,  "/data/final/preps/enrichment/", "res.enrichment.trj.RData"))


################ CLEAN UP BELOW!!

# final format
prep_enrich.mrnNpro.names <- function(rs.enrich.mrnNpro){
  res <- rs.enrich.mrnNpro %>% select(Cluster, Description, FoldEnrichment, Count, features = geneID)
  return(res)
}

rs.PW.trj_mrn.pro_fin <- prep_enrich.mrnNpro.names(res.enrichment.trj$rs.PW.trj_mrn.pro)
rs.KG.trj_mrn.pro_fin <- prep_enrich.mrnNpro.names(res.enrichment.trj$rs.KG.trj_mrn.pro)
rs.GO.trj_mrn.pro_fin <- prep_enrich.mrnNpro.names(res.enrichment.trj$rs.GO.trj_mrn.pro)



## check

# combos
table(rs.PW.trj_mrn.pro_fin$Cluster) # only in mRNA: only up and down. Pro also bimodal. intersting...
table(rs.KG.trj_mrn.pro_fin$Cluster) # same (1 mrn.DnUpDnUp)
table(rs.GO.trj_mrn.pro_fin$Cluster) # same!

# 
# # # mrn
# # table(rs.PW.trj_mrn$Cluster) # only one UpDnUpDn
# # table(rs.KG.trj_mrn$Cluster) # same
# # table(rs.GO.trj_mrn$Cluster) # same
# # # somehow the multimodal features do not seem to have a unifying biological theme?
# # 
# # # pro
# # table(rs.PW.trj_pro$Cluster) # only one UpDnUpDn
# # table(rs.KG.trj_pro$Cluster) # same
# # table(rs.GO.trj_pro$Cluster) # same
# 
# 
# # which are the unique mrn terms?
# rs.PW.trj_mrn %>% filter(Cluster == 'mrn.UpDnUpDn') # Epigenetic regulation of gene expression
# rs.KG.trj_mrn %>% filter(Cluster == 'mrn.DnUpDnUp') # Terpenoid backbone biosynthesis
# rs.GO.trj_mrn.pro %>% filter(Cluster == 'mrn.UpDnUpDn') # 8: misfolded protein binding, RNA helicase activity
# 
# 
# # check the combos first
# 
# 
# 
# rs.PW.trj90_mrn.pro
# 
# 
# names(rs.PW.trj90_mrn.pro) 
# rs.PW.trj90_mrn.pro$FoldEnrichment # from 1 - 20. = size of dot -> make all same in mic? Or use pminus1 (= all close to 1)
# # Description = Subcategory
# # Cluster: up, down 
# # FoldEnrichment = 
# # Count = number of features
# 
# 
# names(rs.miEAA.trj90_mic.upDn)
# # Subcategory = Description !
# # Cluster: add. = up, dn 
# # count: make number of precoursors
# # FoldEnrichment = use the diff * -1
# 

mic

Using miRTargetLink 2.0

https://ccb-compute.cs.uni-saarland.de/mirtargetlink2/unidirectional_search/

miEAA 2.0 with only strong validated targets in homo sapiens

centrality.mic <- centrality.mic %>% select(ENSG, traj, centrality) %>% 
  group_by(traj) %>% 
  arrange(traj, desc(centrality)) %>% 
  filter(centrality > quantile(centrality, .10))

table(centrality.mic$traj)

# copy to clip for website 
# centrality.mic %>% filter(traj == 'dn') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'up') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'dnUp') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'upDn') %>% pull(ENSG) %>% write_clip()
# centrality.mic %>% filter(traj == 'upDn late') %>% pull(ENSG) %>% write_clip()

# import downloaded files from miEAA 2.0 over representation analysis 
rs.miEAA.trj_mic.dn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "dn", ".csv"))
rs.miEAA.trj_mic.up.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "up", ".csv"))
rs.miEAA.trj_mic.dnUp.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "dnUp", ".csv"))
rs.miEAA.trj_mic.upDn.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "upDn", ".csv"))
rs.miEAA.trj_mic.upDnLate.raw <- read.csv(paste0(dir_moa, "/data/final/ext/miEAA/in/", "upDnLate", ".csv"))

## format results

prep_miRT <- function(miRT.raw, ClusterName){
  # colnames 1
  names(miRT.raw)[4] <- 'pvalue'
  names(miRT.raw)[5] <- 'p.adjust'
  names(miRT.raw)[6] <- 'qvalue'
  # calc
  miRT.raw$dif <- miRT.raw$Expected - miRT.raw$Observed
  miRT.raw$pminus1 <- 1- miRT.raw$p.adjust
  miRT.raw$Count <- str_count(miRT.raw$miRNAs.precursors, ';')
  miRT.raw$Count <- miRT.raw$Count +1
  # colnames 2
  miRT.raw$FoldEnrichment = miRT.raw$dif * -1
  miRT.raw$features <- miRT.raw$miRNAs.precursors
  miRT.raw$Description <- miRT.raw$Subcategory
  miRT.raw$Cluster <- ClusterName
 return(miRT.raw)
}

rs.miEAA.trj_mic.dn_pre <- prep_miRT(rs.miEAA.trj_mic.dn.raw, 'mic.Dn')
rs.miEAA.trj_mic.up_pre <- prep_miRT(rs.miEAA.trj_mic.up.raw, 'mic.Up')
rs.miEAA.trj_mic.dnUp_pre <- prep_miRT(rs.miEAA.trj_mic.dnUp.raw, 'mic.DnUp')
rs.miEAA.trj_mic.upDn_pre <- prep_miRT(rs.miEAA.trj_mic.upDn.raw, 'mic.UpDn')
rs.miEAA.trj_mic.upDnLate_pre <- prep_miRT(rs.miEAA.trj_mic.upDnLate.raw, 'mic.UpDnL')


# prep for integration with mrn & pro
prep_mic.enrich4mrnNpro <- function(res.miEAA_pre){
  res <- res.miEAA_pre %>% select(Cluster, Description, FoldEnrichment, Count, features)
  return(res)
}
  
rs.miEAA.trj_mic.dn <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.dn_pre)
rs.miEAA.trj_mic.up <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.up_pre)
rs.miEAA.trj_mic.dnUp <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.dnUp_pre)
rs.miEAA.trj_mic.upDn <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.upDn_pre)
rs.miEAA.trj_mic.upDnLate <- prep_mic.enrich4mrnNpro(rs.miEAA.trj_mic.upDnLate_pre)

# prep final mic enricment to combine with mrn & pro results 
rs.miEAA.trj_mic <- rbind(rs.miEAA.trj_mic.dn, rs.miEAA.trj_mic.up, rs.miEAA.trj_mic.dnUp, rs.miEAA.trj_mic.upDn, rs.miEAA.trj_mic.upDnLate)

combine

# intersectios of the single enrichment sets
intersect(rs.PW.trj_mrn.pro_fin$Description, rs.miEAA.trj_mic$Description) # 51
intersect(rs.KG.trj_mrn.pro_fin$Description, rs.miEAA.trj_mic$Description) # 148
intersect(rs.GO.trj_mrn.pro_fin$Description, rs.miEAA.trj_mic$Description) # 179

## combine all

# all mrn.pro
rs.ALL.trj_mrn.pro <- rbind(rs.PW.trj_mrn.pro_fin, rs.KG.trj_mrn.pro_fin, rs.GO.trj_mrn.pro_fin)

# with mic results
rs.ALL.trj <- rbind(rs.ALL.trj_mrn.pro, rs.miEAA.trj_mic)

investigate

# check overlaps
data <- rs.ALL.trj %>% select(Description, Cluster)
data <- table(data)
counts <- apply(data, 1, function(x) sum(x))
cluster <- apply(data, 1, function(x) names(which(x!=0)))
cluster <- lapply(cluster, function(x) paste(x, collapse = ";")) %>% unlist()
tab <- data.frame(pathway = names(counts), n = counts, cluster = cluster)


# select terms

selTerms.ALL.disease <- c(
  'Acute myeloid leukemia',
  'Inflammatory bowel disease'
)

selTerms.ALL.important <- c(
'SRP-dependent cotranslational protein targeting to membrane',
# 'Apoptosis',
'Cell Cycle',
'Ribosome',
'Translation',
'leukocyte cell-cell adhesion',
# 'ficolin-1-rich granule lumen',
'mitochondrial membrane',
'Neutrophil extracellular trap formation',
'Nonsense-Mediated Decay (NMD)'
# 'Pentose phosphate pathway',
)


selTerms.ALL.interesting <- c(
'HIF-1 signaling pathway',
'L13a-mediated translational silencing of Ceruloplasmin expression',
#'FoxO signaling pathway',
'mTOR signaling pathway',
#'Signaling by ROBO receptors',
'Regulation of expression of SLITs and ROBOs',
#'ErbB signaling pathway',
'MAPK cascade',
#'negative regulation of transcription by RNA polymerase II',
'Ras protein signal transduction',
'VEGF signaling pathway'
)


inspect.selTerms.GO(rs.ALL.trj, c(selTerms.ALL.disease, selTerms.ALL.important)) # 

inspect.selTerms.GO(rs.ALL.trj, selTerms.ALL.interesting) # 

inspect.selTerms.GO(rs.ALL.trj, c(selTerms.ALL.interesting, selTerms.ALL.important)) # 

finalize

# reduce to terms of interest
rs.ALL.trj.fin <- rs.ALL.trj %>% 
  filter(Description %in% c(selTerms.ALL.interesting, selTerms.ALL.important))

rs.ALL.trj.fin <- rs.ALL.trj.fin[-36,] # SRP is double mentioned in pro

# prep levels to order axis elements. NEEDED???
#rs.ALL.trj90.fin$Cluster <- droplevels(rs.ALL.trj90.fin$Cluster)

# check
sort(unique(rs.ALL.trj.fin$Description))

# define levels Y axis
unique(rs.ALL.trj.fin$Description)


# shorten terms
rs.ALL.trj.fin$desc <- rs.ALL.trj.fin$Description
unique(rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "leukocyte cell-cell adhesion", 
                                  "Leukocyte cell-cell adh.", rs.ALL.trj.fin$desc)
rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "mitochondrial membrane", 
                                  "Mito. membrane", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "L13a-mediated translational silencing of Ceruloplasmin expression", 
                                  "L13a silencing Cerulop.", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "SRP-dependent cotranslational protein targeting to membrane", 
                                  "SRP-dep. loc.", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Regulation of expression of SLITs and ROBOs", 
                                  "SLIT and ROBO reg.", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "VEGF signaling pathway", 
                                  "VEGF pathway", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "mTOR signaling pathway" , 
                                  "mTOR pathway" , rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Neutrophil extracellular trap formation", 
                                  "NET formation", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "HIF-1 signaling pathway", 
                                  "HIF-1 pathway", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Ras protein signal transduction",
                                  "Ras signal", rs.ALL.trj.fin$desc)

rs.ALL.trj.fin$desc <- ifelse(rs.ALL.trj.fin$desc == "Nonsense-Mediated Decay (NMD)",
                                  "NMD", rs.ALL.trj.fin$desc)

# 
# rs.ALL.trj90.fin$desc <- ifelse(rs.ALL.trj90.fin$desc == , 
#                                   , rs.ALL.trj90.fin$desc)


unique(rs.ALL.trj.fin$desc)

levels.LL.trj.fin <- c(
  "VEGF pathway",
  "Ras signal",
  "NET formation",
  "mTOR pathway",
  "MAPK cascade",
  "Leukocyte cell-cell adh.",
  "HIF-1 pathway",
  "Ribosome",
  "Translation",
  "Mito. membrane",
  "SRP-dep. loc.",
  "NMD",
  "L13a silencing Cerulop.",
  "SLIT and ROBO reg.",
  "Cell Cycle"
)
 
setdiff(rs.ALL.trj.fin$desc, levels.LL.trj.fin) # ok


## shorten terms
# check
#rs.ALL.trj90.fin$desc2 <- stringr::str_wrap(rs.ALL.trj90.fin$desc, 20)
#levels.LL.trj90.fin <- stringr::str_wrap(levels.LL.trj90.fin, 20)

## plot options

# shapes
rs.ALL.trj.fin$shape <- case_when(
  str_detect(rs.ALL.trj.fin$Cluster, 'mrn') ~ 'mrn',
  str_detect(rs.ALL.trj.fin$Cluster, 'mic') ~ 'mic',
  str_detect(rs.ALL.trj.fin$Cluster, 'pro') ~ 'pro'
)

shps <- c(1, 16, 5)

rs.ALL.trj.fin$shape <- factor(rs.ALL.trj.fin$shape, levels = c('mrn', 'pro', 'mic'))

# colors
table(rs.ALL.trj.fin$Cluster)

colors_trj <- c(
'#f89540', # mrn up
"#7e03a8", # mrn down
'#f89540', # pro up
"#7e03a8", # pro down
"#2166AC", # pro upDn
'hotpink', # pro dnUp
'lightblue', # pro DnUpDn
"#7e03a8", # mic down
'#f89540', # mic up
"#2166AC") # mic upDn

names(colors_trj) <- c('mrn.Up', 'mrn.Dn', 'pro.Up', 'pro.Dn', 'pro.UpDn', 'pro.DnUp', 'pro.DnUpDn', 'mic.Dn', 'mic.Up', 'mic.UpDn')




plot.enriched.trj <- function(rs.CP, desc.levels, fondSize, labSize, colorsX){
  p <- rs.CP %>%
    ggplot(aes(x= Cluster, y = desc, color = Cluster, label = Count, shape = shape)) +
    #geom_point(aes(size = FoldEnrichment), stroke = 2) +
    geom_point(aes(size = 3), stroke = 2) +
    geom_label(color = 'black', label.padding = unit(0.09, "lines"), hjust = -0.5, size = labSize) +
    # scale_size(range = c(5, 15)) +
    theme_light(base_size = 18) +
    theme(axis.text.x = element_text(angle = 30, vjust = 0.9, hjust= 1, size=fondSize),
          axis.title.x = element_blank() ,
          axis.text.y = element_text(size=fondSize),
          legend.position = "none") +
    scale_color_manual(values = colorsX, guide = 'none') +
    scale_shape_manual(values=shps) +
    #ggtitle(title) +
    ylab('Terms') +
    scale_y_discrete(limits=rev(desc.levels)) +
    guides(shape = "none")
  
  return(p)
}


p.ALL.trj <- plot.enriched.trj(rs.ALL.trj.fin, levels.LL.trj.fin, 20, 5, colors_trj)
p.ALL.trj

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_3/", "p.ALL.trj", ".svg"),
  plot = p.ALL.trj,
  device = svg,
  width = 9,
  height = 8)

upsets

trjs

library(ComplexUpset)

# turn into SYMBOLS
turnENSG2SYMBOL <- function(vectorENSG){
  vectorSYMBOL <- fan$SYMBOL[match(vectorENSG, fan$ENSG)]
  return(vectorSYMBOL)
}
  
l.trj_mrn.pro_SYMBOL <- purrr::map(l.trj_mrn.pro_ENSG, turnENSG2SYMBOL)

# create TRUE/FALSE table
names(l.trj_mrn.pro_SYMBOL)

trj4upset <- data.frame(SYMBOL = fan$SYMBOL)
trj4upset$mrn.Up <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.Up"]], T, F)
trj4upset$mrn.Dn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.Dn"]], T, F)
trj4upset$mrn.UpDn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.UpDn"]], T, F)
trj4upset$mrn.DnUp <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.DnUp"]], T, F)
trj4upset$mrn.UpDnUp <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.UpDnUp"]], T, F)
trj4upset$mrn.DnUpDn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["mrn.DnUpDn"]], T, F)
trj4upset$pro.Up <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.Up"]], T, F)
trj4upset$pro.Dn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.Dn"]], T, F)
trj4upset$pro.UpDn <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.UpDn"]], T, F)
trj4upset$pro.DnUp <- ifelse(trj4upset$SYMBOL %in% l.trj_mrn.pro_SYMBOL[["pro.DnUp"]], T, F)


# plot prep
trj.groups <- colnames(trj4upset)[-1]


# plot
p.upset.traj <- upset(trj4upset, trj.groups, name='Traj', 
                      width_ratio=0.1, min_degree=2, min_size= 18,
                      # mode= 'inclusive_intersection', # does not matter because these are exclusive lists anyways
                      themes = upset_default_themes(text=element_text(size=20)),
                      set_sizes=(
                        upset_set_size() + theme(axis.text.x=element_text(angle=35), text=element_text(size=15))),
                      base_annotations=list(
                          'Intersection size'=intersection_size(
                              text=list(size=6),
                              #width=0.4))
                      )))


p.upset.traj

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_3/", "p.upset.traj", ".svg"),
  plot = p.upset.traj,
  device = svg,
  width = 10,
  height = 6)

disease

## add disease and unregulated genes
trj4upsetWdis <- trj4upset

SCN.genes <- read_csv(paste0(dir_moa, "data/anno/", "genotypes_of_SCN.csv"))
AML.genes <- read_csv(paste0(dir_moa, "data/anno/", "genes_of_AML.csv"))

total.trj <- unique(Reduce(c,l.trj_mrn.pro_SYMBOL))
total.mrn.noTRJ <- unique(dex.all$dex.mrn$dex_stage_mrn_vsn$SYMBOL)
total.mrn.noTRJ <- total.mrn.noTRJ[!total.mrn.noTRJ %in% total.trj]

total.pro.noTRJ <- unique(dex.all$dex.pro$dex_stage_pro_vsn$SYMBOL)
total.pro.noTRJ <- total.pro.noTRJ[!total.pro.noTRJ %in% total.trj]

# add
trj4upsetWdis$SCN <- ifelse(trj4upset$SYMBOL %in% SCN.genes$SYMBOL, T, F)
trj4upsetWdis$AML <- ifelse(trj4upset$SYMBOL %in% AML.genes$SYMBOL, T, F)

# # remove non-overlapping SYMBOLS: not needed anymore
trj4upsetWdis$RowSum <- rowSums(trj4upsetWdis[,-1])
trj4upsetWdis.fin <- trj4upsetWdis %>% filter(RowSum > 0) %>% select(-RowSum)


# mrn
trj4upsetWdis$unreg.mRNA <- ifelse(trj4upset$SYMBOL %in% total.mrn.noTRJ, T, F)
sum(trj4upsetWdis$unreg.mRNA) # 3510
length(total.mrn.noTRJ) # 3510

# SCN
intersect(total.mrn.noTRJ, SCN.genes$SYMBOL) # "CSF3R"   "GFI1"    "LAMTOR2" "USB1"    "SRP19"   "SMARCD2"
intersect(total.mrn.noTRJ, AML.genes$SYMBOL) # "TET2"  "CBL"   "ASXL1" "CEBPA"

# pro
trj4upsetWdis$unreg.Prot <- ifelse(trj4upset$SYMBOL %in% total.pro.noTRJ, T, F)
sum(trj4upsetWdis$unreg.Prot) # 959
length(total.pro.noTRJ) # 959

intersect(total.pro.noTRJ, SCN.genes$SYMBOL) # "SMARCD2" "SRP19"
intersect(total.pro.noTRJ, AML.genes$SYMBOL) # "CEBPA

#traj.pro.unreg <- trj904upsetWdis %>% filter(unreg.Prot == T) %>% pull(SYMBOL)
#intersect(traj.pro.unreg, SCN.genes$SYMBOL) #

# plot prep
# trj.groupsWdis <- colnames(trj4upsetWdis.fin)[-1]
trj.groupsWdis <- colnames(trj4upsetWdis.fin)[-1]


# first trial for dimensions
upset(trj4upsetWdis.fin, trj.groupsWdis, name='Traj',
      set_sizes= FALSE,
      mode= 'inclusive_intersection', # 
      intersections=list(
        c('SCN', "mrn.Up"),  # 2
        c('SCN', "mrn.Dn"), # 4  
        c('SCN', "mrn.UpDn"), # 2
        #c('SCN', "mrn.DnUp"), # 0
        #c('SCN', "mrn.UpDnUp"), # 0
        #c('SCN', "mrn.DnUpDn"), # 0
        c('SCN', "pro.Up"), # 1
        #c('SCN', "pro.Dn"), # 0
        c('SCN', "pro.UpDn"), # 4
        #c('SCN', "pro.DnUp"), # 0
        #c('AML', "mrn.Up"),  # 0
        c('AML', "mrn.Dn"), # 1  
        c('AML', "pro.Dn") # 2
        #c('AML', "mrn.UpDn"), # 0
        #c('AML', "mrn.DnUp"), # 0
        #c('AML', "mrn.UpDnUp"), # 0
        #c('AML', "mrn.DnUpDn"), # 0
        #c('AML', "pro.Up"), # 0
        #c('AML', "pro.UpDn"), # 0
        #c('AML', "pro.DnUp") # 0
        #c('SCN', "unreg.mRNA"), # works
        #c('SCN', "unreg.Prot"), # why does this show 0 if intersect shows more ???
        #c('AML', "unreg.mRNA"), # 4
        #c('AML', "unreg.Prot") # 4
        )
    )

# add gene names
trj.groupsWdis.2 <- c("mrn.Up", "mrn.Dn", "mrn.UpDn", "pro.Up" , "pro.Dn", "pro.UpDn", "SCN", "AML")

p.upset.dis <- upset(trj4upsetWdis.fin, trj.groupsWdis.2, name='Feature sets',
      set_sizes= FALSE,
      mode= 'inclusive_intersection', # 
      themes = upset_default_themes(text=element_text(size=20)),
      intersections=list(
        c('SCN', "mrn.Up"),  # 2
        c('SCN', "mrn.Dn"), # 4  
        c('SCN', "mrn.UpDn"), # 2
        c('SCN', "pro.Up"), # 1
        c('SCN', "pro.UpDn"), # 4
        c('AML', "mrn.Dn"), # 1  
        c('AML', "pro.Dn") # 2
        ),
      ,
      base_annotations = list(
        'Intersection size'=(
            intersection_size(color='grey9',
                fill='grey80',
                mode= 'inclusive_intersection',
                text=list(size=6))
            + ylim(c(0, 9))
            #+ theme(plot.background=element_rect(fill='#E5D3B3'))
            #+ ylab('# observations in intersection')
        ) + 
          geom_text(
                mapping=aes(label=SYMBOL),
                position=position_stack(),
                na.rm=TRUE,
                vjust=1.3
            )
    )
    )

p.upset.dis

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_3/", "p.upset.dis", ".svg"),
  plot = p.upset.dis,
  device = svg,
  width = 10,
  height = 6)

Use this to show disease gene names that overlap https://stackoverflow.com/questions/75990226/add-labels-to-upset-plot-so-the-values-of-intersection-would-be-visible-along-t