Last update: 24-04-09

Last markdown compiled:

ToDo: - format

Version 15.03.24

knitr::opts_chunk$set(warning = FALSE, message = FALSE, results = FALSE) 

# dirs
dir_moa <- "/home/isar/Dropbox/scn_moa/" #  Sebastian @Linux
# dir_moa <- "/Users/home_sh/Library/CloudStorage/Dropbox/scn_moa"  # Sebastian @Mac


# data, revised 2024.01.09
# exp
load(file = paste0(dir_moa, "/data/final/", "gnp.pro.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mrn.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mic.RData"))
#load(file = paste0(dir_moa, "/data/final/", "scn.pro.RData")) # discarded prom project
# results
load(file = paste0(dir_moa, "/data/final/", "dex.all.RData"))
load(file = paste0(dir_moa, "/data/final/", "timeomics.singles.RData"))
load(file = paste0(dir_moa, "/data/final/", "res.wgcna.RData"))
load(file = paste0(dir_moa, "/data/final/", "anno.RData"))

## packages
source(paste0(dir_moa, "/scripts/", "scn_moa_fct.R"))
# load packagea
sapply(packages_general, require, character.only = TRUE)
# sapply(packages_WGCNA, require, character.only = TRUE)

library("viridis")
library(ggpmisc)
library(gt)
library(ggalluvial)

base data

# extract data
fan <- gnp.mrn$fan

# pro: single VSN normalized intensities
exp_proL <- as.data.frame(t(gnp.pro$exp_pro_vsn))
exp_proL$UNIPROT <- rownames(exp_proL)
exp_proL <- exp_proL %>% pivot_longer(!UNIPROT, names_to = "id", values_to = "value")
exp_proL$stage <- gnp.pro$san_pro$stage[match(exp_proL$id, gnp.pro$san_pro$id)]
exp_proL$SYMBOL <- fan$SYMBOL[match(exp_proL$UNIPROT, fan$UNIPROT)]
exp_proL$ENSG <- fan$ENSG[match(exp_proL$UNIPROT, fan$UNIPROT)]

# pro: means
exp_proLc <- exp_proL %>%
  select(ENSG, stage, value) %>%
  group_by(ENSG, stage) %>%
  dplyr::summarise(medianLg2 = median(value))
exp_proLc$SYMBOL <- fan$SYMBOL[match(exp_proLc$ENSG, fan$ENSG)]
exp_proLc$stage <- factor(exp_proLc$stage, levels = c('MB', 'PM', 'MC', 'MM', 'B', 'PMN'))

exp_proLc$median <- 2**exp_proLc$medianLg2

# exp_proWc <- exp_proLc %>% select(SYMBOL, stage, median) %>%
#   pivot_wider(names_from = stage, values_from = median)



# mrn: single values
exp_mrnL <- as.data.frame(t(gnp.mrn$exp_mrn_tpm_vsn))
exp_mrnL$ENSG <- rownames(exp_mrnL)
exp_mrnL <- exp_mrnL %>% pivot_longer(!ENSG, names_to = "id", values_to = "value")
exp_mrnL$stage <- gnp.mrn$san_mrn$stage[match(exp_mrnL$id, gnp.mrn$san_mrn$id)]
exp_mrnL$SYMBOL <- fan$SYMBOL[match(exp_mrnL$ENSG, fan$ENSG)]

# mrn: means
exp_mrnLc <- exp_mrnL %>%
  select(ENSG, stage, value) %>%
  group_by(ENSG, stage) %>%
  dplyr::summarise(medianLg2= median(value, na.rm = T))
exp_mrnLc$SYMBOL <- fan$SYMBOL[match(exp_mrnLc$ENSG, fan$ENSG)]
exp_mrnLc$stage <- factor(exp_mrnLc$stage, levels = c('MB', 'PM', 'MC', 'MM', 'B', 'S', 'PMN'))

exp_mrnLc$median <- 2**exp_mrnLc$medianLg2




# mic: means

# single values
exp_micL <- as.data.frame(t(gnp.mic$exp_mic_vsn))
exp_micL$SYMBOL <- rownames(exp_micL)
exp_micL <- exp_micL %>% pivot_longer(!SYMBOL, names_to = "id", values_to = "value")
exp_micL$stage <- gnp.mic$san_mic$stage[match(exp_micL$id, gnp.mic$san_mic$id)]

exp_micLc <- exp_micL %>%
  select(SYMBOL, stage, value) %>%
  group_by(SYMBOL, stage) %>%
  dplyr::summarise(medianLg2= median(value, na.rm = T))
exp_micLc$stage <- factor(exp_micLc$stage, levels = c('MB', 'MC', 'MM', 'B', 'S', 'PMN'))

exp_micLc$median <- 2**exp_micLc$medianLg2

# # turn into wide to get direction 
# exp_micWc <- exp_micLc %>% select(SYMBOL, stage, median) %>%
#   pivot_wider(names_from = stage, values_from = median)


#exp_micWc$direction.MBtoMC <- ifelse(exp_micWc$MB > exp_micWc$MC, 'down', 'up')


#exp_micWc$direction <- ifelse(exp_micWc$MB > exp_micWc$PMN, 'down', 'up')

# transfer
#exp_micLc$direction.MBtoMC <- exp_micWc$direction.MBtoMC[match(exp_micLc$SYMBOL, exp_micWc$SYMBOL)]

Feature ranks

To understand composition of the total omics space, we plot feature ranks per omics plane

fct

## proteome
calc_CumSum_per_stage.pro <- function(dat, STAGE){
  # isolate
  dat_stage <- dat %>% filter(stage == STAGE)
  # total_Mol_stage = sum(dat_stage$value) # this seems unneccesary
  dat_stage <- dat_stage %>% arrange(desc(value))
  # calc cumMol
  dat_stage$cumSum <- cumsum(dat_stage$value)/sum(dat_stage$value)
  # rank
  dat_stage %<>% mutate(rank = c(1:3156))
  return(dat_stage)
}

## apply stage wise
apply_CumSum.allStages.pro <- function(moNr.l){
  dat.pro_MB <- calc_CumSum_per_stage.pro(moNr.l, 'MB')
  dat.pro_PM <- calc_CumSum_per_stage.pro(moNr.l, 'PM')
  dat.pro_MC <- calc_CumSum_per_stage.pro(moNr.l, 'MC')
  dat.pro_MM <- calc_CumSum_per_stage.pro(moNr.l, 'MM')
  dat.pro_B <- calc_CumSum_per_stage.pro(moNr.l, 'B')
  #dat.pro_S <- calc_CumSum_per_stage(moNr.l, 'S')
  dat.pro_PMN <- calc_CumSum_per_stage.pro(moNr.l, 'PMN')
  # combine and format
  rs <- rbind(dat.pro_MB, dat.pro_PM, dat.pro_MC, dat.pro_MM, dat.pro_B, dat.pro_PMN)
  rs %<>% ungroup()
  return(rs)
}

## transcriptome
calc_CumSum_per_stage.mrn <- function(dat, STAGE){
  # isolate
  dat_stage <- dat %>% filter(stage == STAGE)
  # total_Mol_stage = sum(dat_stage$value) # this seems unneccesary
  dat_stage <- dat_stage %>% arrange(desc(value))
  # calc cumMol
  dat_stage$cumSum <- cumsum(dat_stage$value)/sum(dat_stage$value)
  # rank
  dat_stage %<>% mutate(rank = c(1:8432))
  return(dat_stage)
}

## apply stage wise
apply_CumSum.allStages.mrn <- function(moNr.l){
  dat.pro_MB <- calc_CumSum_per_stage.mrn(moNr.l, 'MB')
  dat.pro_PM <- calc_CumSum_per_stage.mrn(moNr.l, 'PM')
  dat.pro_MC <- calc_CumSum_per_stage.mrn(moNr.l, 'MC')
  dat.pro_MM <- calc_CumSum_per_stage.mrn(moNr.l, 'MM')
  dat.pro_B <- calc_CumSum_per_stage.mrn(moNr.l, 'B')
  dat.pro_S <- calc_CumSum_per_stage.mrn(moNr.l, 'S')
  dat.pro_PMN <- calc_CumSum_per_stage.mrn(moNr.l, 'PMN')
  # combine and format
  rs <- rbind(dat.pro_MB, dat.pro_PM, dat.pro_MC, dat.pro_MM, dat.pro_B, dat.pro_S, dat.pro_PMN)
  rs %<>% ungroup()
  return(rs)
}


## miRNAnome
calc_CumSum_per_stage.mic <- function(dat, STAGE){
  # isolate
  dat_stage <- dat %>% filter(stage == STAGE)
  # total_Mol_stage = sum(dat_stage$value) # this seems unneccesary
  dat_stage <- dat_stage %>% arrange(desc(value))
  # calc cumMol
  dat_stage$cumSum <- cumsum(dat_stage$value)/sum(dat_stage$value)
  # rank
  dat_stage %<>% mutate(rank = c(1:283))
  return(dat_stage)
}

## apply stage wise
apply_CumSum.allStages.mic <- function(moNr.l){
  dat.pro_MB <- calc_CumSum_per_stage.mic(moNr.l, 'MB')
  #dat.pro_PM <- calc_CumSum_per_stage(moNr.l, 'PM')
  dat.pro_MC <- calc_CumSum_per_stage.mic(moNr.l, 'MC')
  dat.pro_MM <- calc_CumSum_per_stage.mic(moNr.l, 'MM')
  dat.pro_B <- calc_CumSum_per_stage.mic(moNr.l, 'B')
  dat.pro_S <- calc_CumSum_per_stage.mic(moNr.l, 'S')
  dat.pro_PMN <- calc_CumSum_per_stage.mic(moNr.l, 'PMN')
  # combine and format
  rs <- rbind(dat.pro_MB, dat.pro_MC, dat.pro_MM, dat.pro_B, dat.pro_S, dat.pro_PMN)
  rs %<>% ungroup()
  return(rs)
}



plot.ranks <- function(ranksDF, xLIM, yLAB, targets){
  ranksDF$LAB <- ifelse(ranksDF$SYMBOL %in% targets, ranksDF$SYMBOL, NA)
  p <- ranksDF %>% ggplot(aes(x= rank, y= cumSum, color = stage, label = LAB)) +
    geom_point() +
    geom_hline(yintercept = 0.5, linetype="dashed", alpha=0.7) +
    geom_text_repel(color= 'black') +
    xlab("Rank by abundance") + 
    ylab(yLAB) +
    #theme(legend.position = "none") +
    xlim(xLIM) +
    scale_color_manual(values = colors_stage, name = 'Stage') +
    theme_light(base_size = 20) +
    guides(colour = guide_legend(override.aes = list(size=10))) +
    theme(legend.title = element_text(size = 12), legend.text = element_text(size = 9),
                                               legend.margin=margin(-5,-5,-5,-15))
    #labs(title = paste0("Cumulative abundance:", type4title),
     #    caption = CAPTION)
  return(p)
}

plot

# you need to reverse the log2!

# ungroup
exp_proLc %<>% ungroup()
exp_mrnLc %<>% ungroup()
exp_micLc %<>% ungroup()

## medians as value (due to functions above)
exp_proLc$value <- exp_proLc$median
exp_mrnLc$value <- exp_mrnLc$median
exp_micLc$value <- exp_micLc$median

ranks.pro <- apply_CumSum.allStages.pro(exp_proLc)
ranks.mrn <- apply_CumSum.allStages.mrn(exp_mrnLc)
ranks.mic <- apply_CumSum.allStages.mic(exp_micLc)

p.ranks.pro <- plot.ranks(ranks.pro, c(0, 100), 'Cumulative protein abundance', NA)
p.ranks.pro

p.ranks.mrn <- plot.ranks(ranks.mrn, c(0, 150), 'Cumulative mRNA abundance', NA)
p.ranks.mrn

p.ranks.mic <- plot.ranks(ranks.mic, c(0, 100), 'Cumulative miRNA abundance', NA)
p.ranks.mic

## add numbers

# pro
rnk.pro <- ranks.pro %>% group_by(stage) %>% filter(cumSum < 0.51)
num.rnk.pro <- rnk.pro %>% group_by(stage) %>% summarise("Proteins at 50%" = n())
num.rnk.pro %<>% rename(Stage = stage)


p.ranks.pro_fin <- p.ranks.pro +
  annotate(geom = "table", x =55, y = 0.4, label = list(num.rnk.pro), 
           vjust = 1, hjust = 0, size = 4)

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_5/", "p.ranks.pro_fin", ".svg"),
  plot = p.ranks.pro_fin,
  device = svg,
  width = 6.5,
  height = 6)


# mrn
rnk.mrn <- ranks.mrn %>% group_by(stage) %>% filter(cumSum < 0.51)
num.rnk.mrn <- rnk.mrn %>% group_by(stage) %>% summarise("Transcripts at 50%" = n())
num.rnk.mrn %<>% rename(Stage = stage)


p.ranks.mrn_fin <- p.ranks.mrn +
  annotate(geom = "table", x = 75, y = 0.4, label = list(num.rnk.mrn), 
           vjust = 1, hjust = 0, size = 4)
p.ranks.mrn_fin

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_5/", "p.ranks.mrn_fin", ".svg"),
  plot = p.ranks.mrn_fin,
  device = svg,
  width = 6.5,
  height = 6)

tables

check consistency

# ELANE expression
exp_proLc %>% filter(SYMBOL == 'ELANE') %>%
  ggplot(aes(x = stage, y = median)) +
  geom_bar(stat = 'identity')

exp_proLc %>% filter(SYMBOL == 'S100A8') %>%
  ggplot(aes(x = stage, y = median)) +
  geom_bar(stat = 'identity')

location of features in development

prep

# cumulated locatiosn
anno$fan.locs %>% dplyr::count(loc.combined) # 11 locations

# add granule 
anno$fan.locs$loc.combined %<>% recode(
  'Primary' = 'Primary gr.',
  'Secondary' = 'Secondary gr.',
  'Tertiary' = 'Tertiary gr.',
  'Ficolin-1' = 'Ficolin-1 gr.',
  'Secretory' = 'Secretory gr.'
)

# define colors
loc.levels = c(
  'Cytosol',
  'ER u. Golgi',
  'Mitochondrion',
  'Plasma membrane',
  'Nucleus',
  'Primary gr.',
  'Secondary gr.',
  'Tertiary gr.',
  'Ficolin-1 gr.',
  'Secretory gr.',
  'Unknown')
length(loc.levels)

# colors from: https://sashamaps.net/docs/resources/20-colors/
colors.locs <- c(
  '#9A6324',  # Cytosol
  '#f58231',  # 'ER u. Golgi', 
  '#e6194B',  # Mitochondrion
  '#ffe119',  # Plasma membrane
  '#3cb44b',  # Nucleus
  '#1E90FF',  # Primary
  '#87CEEB',  # Secondary
  '#00FFFF',  # Tertiary
  '#42d4f4',  # Ficolin
  '#7FFFD4',  # Secretory
  '#636363'   # Unknown
)

names(colors.locs) <- loc.levels


## check the intensity per location in the maturation stages, as total and as fractions

# add loc
add.loc2ENSG <- function(expL){
  expL$loc <- anno$fan.locs$loc.combined[match(expL$ENSG, anno$fan.locs$ENSG)]
  return(expL)
}

exp_proLc <- add.loc2ENSG(exp_proLc)
exp_mrnLc <- add.loc2ENSG(exp_mrnLc)

exp_proL <- add.loc2ENSG(exp_proL)
exp_mrnL <- add.loc2ENSG(exp_mrnL)

pro

# get sums per loc in stages
pro.locSums <- exp_proLc %>% ungroup %>% group_by(stage, loc) %>% summarise(sum.medians = sum(median))

# calc fractions
pro.fracLocs <- pro.locSums %>% group_by(stage) %>% mutate(tot = sum(sum.medians)) %>% mutate(frac = sum.medians / tot * 100)
pro.fracLocs$loc <- factor(pro.fracLocs$loc, levels = loc.levels)
# pro.fracLocs$frac <- pro.fracLocs %>% reorder(frac, loc)

# as prc
pro.fracLocs$prc <- paste0(round(pro.fracLocs$frac, 0), "%")


# plot
p.frac.loc_pro <- pro.fracLocs %>%
  filter(loc != 'Tertiary gr.') %>%
  droplevels('loc') %>%
  #group_by(Sample) %>%
  #mutate(perc = Percentage* 100/sum(Percentage)) %>%
  ggplot(aes(y = frac, x = stage, fill = loc, label = prc)) +
  geom_flow(aes(alluvium = loc), alpha= .5, color = "white",
            curve_type = "linear", 
            width = .5) +
  geom_col(width = .5, color = "white") +
  scale_fill_manual(values = colors.locs) +
  scale_y_continuous(NULL, expand = c(0,0)) +
  cowplot::theme_minimal_hgrid() +
  theme(panel.grid.major = element_blank(), 
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(),
        axis.text=element_text(size=12, face = 'bold'),
        axis.title.x = element_blank()) +
  guides(fill=guide_legend(title="Subcellular\nlocation")) +
  geom_text(size = 4, 
            position = position_stack(vjust = .5),
            fontface = "bold")
p.frac.loc_pro

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_5/", "p.frac.loc_pro", ".svg"),
  plot = p.frac.loc_pro,
  device = svg,
  width = 8,
  height = 6)

mrn

# get sums per loc in stages
mrn.locSums <- exp_mrnLc %>% ungroup %>% group_by(stage, loc) %>% summarise(sum.medians = sum(median))

# calc fractions
mrn.fracLocs <- mrn.locSums %>% ungroup %>% group_by(stage) %>% mutate(tot = sum(sum.medians)) %>% mutate(frac = sum.medians / tot * 100)
mrn.fracLocs$loc <- factor(mrn.fracLocs$loc, levels = loc.levels)
# mrn.fracLocs$frac <- mrn.fracLocs %>% reorder(frac, loc)

# as prc
mrn.fracLocs$prc <- paste0(round(mrn.fracLocs$frac, 0), "%")


# plot
p.frac.loc_mrn <- mrn.fracLocs %>%
  filter(prc != "0%") %>%
  #group_by(Sample) %>%
  #mutate(perc = Percentage* 100/sum(Percentage)) %>%
  ggplot(aes(y = frac, x = stage, fill = loc, label = prc)) +
  geom_flow(aes(alluvium = loc), alpha= .5, color = "white",
            curve_type = "linear", 
            width = .5) +
  geom_col(width = .5, color = "white") +
  scale_fill_manual(values = colors.locs) +
  scale_y_continuous(NULL, expand = c(0,0)) +
  cowplot::theme_minimal_hgrid() +
  theme(panel.grid.major = element_blank(), 
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(),
        axis.text=element_text(size=12, face = 'bold'),
        axis.title.x = element_blank()) +
  guides(fill=guide_legend(title="Subcellular\nlocation")) +
  geom_text(size = 4, 
            position = position_stack(vjust = .5),
            fontface = "bold")
p.frac.loc_mrn

ggsave(
  filename = paste0(dir_moa, "/figures/final/fig_5/", "p.frac.loc_mrn", ".svg"),
  plot = p.frac.loc_mrn,
  device = svg,
  width = 8,
  height = 6)

marker features of each omics level

transcriptome

# MB <-  
# PM <- 
# MC <-
# MM <- 
# B <- 
# S <- 
# PMN <- c()