Last update: 24-04-09
Last markdown compiled:
ToDo: - format
Version 15.03.24
knitr::opts_chunk$set(warning = FALSE, message = FALSE, results = FALSE)
# dirs
dir_moa <- "/home/isar/Dropbox/scn_moa/" # Sebastian @Linux
# dir_moa <- "/Users/home_sh/Library/CloudStorage/Dropbox/scn_moa" # Sebastian @Mac
# data, revised 2024.01.09
# exp
load(file = paste0(dir_moa, "/data/final/", "gnp.pro.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mrn.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mic.RData"))
#load(file = paste0(dir_moa, "/data/final/", "scn.pro.RData")) # discarded prom project
# results
load(file = paste0(dir_moa, "/data/final/", "dex.all.RData"))
load(file = paste0(dir_moa, "/data/final/", "timeomics.singles.RData"))
load(file = paste0(dir_moa, "/data/final/", "res.wgcna.RData"))
load(file = paste0(dir_moa, "/data/final/", "anno.RData"))
## packages
source(paste0(dir_moa, "/scripts/", "scn_moa_fct.R"))
# load packagea
sapply(packages_general, require, character.only = TRUE)
# sapply(packages_WGCNA, require, character.only = TRUE)
library("viridis")
library(ggpmisc)
library(gt)
library(ggalluvial)
# extract data
fan <- gnp.mrn$fan
# pro: single VSN normalized intensities
exp_proL <- as.data.frame(t(gnp.pro$exp_pro_vsn))
exp_proL$UNIPROT <- rownames(exp_proL)
exp_proL <- exp_proL %>% pivot_longer(!UNIPROT, names_to = "id", values_to = "value")
exp_proL$stage <- gnp.pro$san_pro$stage[match(exp_proL$id, gnp.pro$san_pro$id)]
exp_proL$SYMBOL <- fan$SYMBOL[match(exp_proL$UNIPROT, fan$UNIPROT)]
exp_proL$ENSG <- fan$ENSG[match(exp_proL$UNIPROT, fan$UNIPROT)]
# pro: means
exp_proLc <- exp_proL %>%
select(ENSG, stage, value) %>%
group_by(ENSG, stage) %>%
dplyr::summarise(medianLg2 = median(value))
exp_proLc$SYMBOL <- fan$SYMBOL[match(exp_proLc$ENSG, fan$ENSG)]
exp_proLc$stage <- factor(exp_proLc$stage, levels = c('MB', 'PM', 'MC', 'MM', 'B', 'PMN'))
exp_proLc$median <- 2**exp_proLc$medianLg2
# exp_proWc <- exp_proLc %>% select(SYMBOL, stage, median) %>%
# pivot_wider(names_from = stage, values_from = median)
# mrn: single values
exp_mrnL <- as.data.frame(t(gnp.mrn$exp_mrn_tpm_vsn))
exp_mrnL$ENSG <- rownames(exp_mrnL)
exp_mrnL <- exp_mrnL %>% pivot_longer(!ENSG, names_to = "id", values_to = "value")
exp_mrnL$stage <- gnp.mrn$san_mrn$stage[match(exp_mrnL$id, gnp.mrn$san_mrn$id)]
exp_mrnL$SYMBOL <- fan$SYMBOL[match(exp_mrnL$ENSG, fan$ENSG)]
# mrn: means
exp_mrnLc <- exp_mrnL %>%
select(ENSG, stage, value) %>%
group_by(ENSG, stage) %>%
dplyr::summarise(medianLg2= median(value, na.rm = T))
exp_mrnLc$SYMBOL <- fan$SYMBOL[match(exp_mrnLc$ENSG, fan$ENSG)]
exp_mrnLc$stage <- factor(exp_mrnLc$stage, levels = c('MB', 'PM', 'MC', 'MM', 'B', 'S', 'PMN'))
exp_mrnLc$median <- 2**exp_mrnLc$medianLg2
# mic: means
# single values
exp_micL <- as.data.frame(t(gnp.mic$exp_mic_vsn))
exp_micL$SYMBOL <- rownames(exp_micL)
exp_micL <- exp_micL %>% pivot_longer(!SYMBOL, names_to = "id", values_to = "value")
exp_micL$stage <- gnp.mic$san_mic$stage[match(exp_micL$id, gnp.mic$san_mic$id)]
exp_micLc <- exp_micL %>%
select(SYMBOL, stage, value) %>%
group_by(SYMBOL, stage) %>%
dplyr::summarise(medianLg2= median(value, na.rm = T))
exp_micLc$stage <- factor(exp_micLc$stage, levels = c('MB', 'MC', 'MM', 'B', 'S', 'PMN'))
exp_micLc$median <- 2**exp_micLc$medianLg2
# # turn into wide to get direction
# exp_micWc <- exp_micLc %>% select(SYMBOL, stage, median) %>%
# pivot_wider(names_from = stage, values_from = median)
#exp_micWc$direction.MBtoMC <- ifelse(exp_micWc$MB > exp_micWc$MC, 'down', 'up')
#exp_micWc$direction <- ifelse(exp_micWc$MB > exp_micWc$PMN, 'down', 'up')
# transfer
#exp_micLc$direction.MBtoMC <- exp_micWc$direction.MBtoMC[match(exp_micLc$SYMBOL, exp_micWc$SYMBOL)]
To understand composition of the total omics space, we plot feature ranks per omics plane
## proteome
calc_CumSum_per_stage.pro <- function(dat, STAGE){
# isolate
dat_stage <- dat %>% filter(stage == STAGE)
# total_Mol_stage = sum(dat_stage$value) # this seems unneccesary
dat_stage <- dat_stage %>% arrange(desc(value))
# calc cumMol
dat_stage$cumSum <- cumsum(dat_stage$value)/sum(dat_stage$value)
# rank
dat_stage %<>% mutate(rank = c(1:3156))
return(dat_stage)
}
## apply stage wise
apply_CumSum.allStages.pro <- function(moNr.l){
dat.pro_MB <- calc_CumSum_per_stage.pro(moNr.l, 'MB')
dat.pro_PM <- calc_CumSum_per_stage.pro(moNr.l, 'PM')
dat.pro_MC <- calc_CumSum_per_stage.pro(moNr.l, 'MC')
dat.pro_MM <- calc_CumSum_per_stage.pro(moNr.l, 'MM')
dat.pro_B <- calc_CumSum_per_stage.pro(moNr.l, 'B')
#dat.pro_S <- calc_CumSum_per_stage(moNr.l, 'S')
dat.pro_PMN <- calc_CumSum_per_stage.pro(moNr.l, 'PMN')
# combine and format
rs <- rbind(dat.pro_MB, dat.pro_PM, dat.pro_MC, dat.pro_MM, dat.pro_B, dat.pro_PMN)
rs %<>% ungroup()
return(rs)
}
## transcriptome
calc_CumSum_per_stage.mrn <- function(dat, STAGE){
# isolate
dat_stage <- dat %>% filter(stage == STAGE)
# total_Mol_stage = sum(dat_stage$value) # this seems unneccesary
dat_stage <- dat_stage %>% arrange(desc(value))
# calc cumMol
dat_stage$cumSum <- cumsum(dat_stage$value)/sum(dat_stage$value)
# rank
dat_stage %<>% mutate(rank = c(1:8432))
return(dat_stage)
}
## apply stage wise
apply_CumSum.allStages.mrn <- function(moNr.l){
dat.pro_MB <- calc_CumSum_per_stage.mrn(moNr.l, 'MB')
dat.pro_PM <- calc_CumSum_per_stage.mrn(moNr.l, 'PM')
dat.pro_MC <- calc_CumSum_per_stage.mrn(moNr.l, 'MC')
dat.pro_MM <- calc_CumSum_per_stage.mrn(moNr.l, 'MM')
dat.pro_B <- calc_CumSum_per_stage.mrn(moNr.l, 'B')
dat.pro_S <- calc_CumSum_per_stage.mrn(moNr.l, 'S')
dat.pro_PMN <- calc_CumSum_per_stage.mrn(moNr.l, 'PMN')
# combine and format
rs <- rbind(dat.pro_MB, dat.pro_PM, dat.pro_MC, dat.pro_MM, dat.pro_B, dat.pro_S, dat.pro_PMN)
rs %<>% ungroup()
return(rs)
}
## miRNAnome
calc_CumSum_per_stage.mic <- function(dat, STAGE){
# isolate
dat_stage <- dat %>% filter(stage == STAGE)
# total_Mol_stage = sum(dat_stage$value) # this seems unneccesary
dat_stage <- dat_stage %>% arrange(desc(value))
# calc cumMol
dat_stage$cumSum <- cumsum(dat_stage$value)/sum(dat_stage$value)
# rank
dat_stage %<>% mutate(rank = c(1:283))
return(dat_stage)
}
## apply stage wise
apply_CumSum.allStages.mic <- function(moNr.l){
dat.pro_MB <- calc_CumSum_per_stage.mic(moNr.l, 'MB')
#dat.pro_PM <- calc_CumSum_per_stage(moNr.l, 'PM')
dat.pro_MC <- calc_CumSum_per_stage.mic(moNr.l, 'MC')
dat.pro_MM <- calc_CumSum_per_stage.mic(moNr.l, 'MM')
dat.pro_B <- calc_CumSum_per_stage.mic(moNr.l, 'B')
dat.pro_S <- calc_CumSum_per_stage.mic(moNr.l, 'S')
dat.pro_PMN <- calc_CumSum_per_stage.mic(moNr.l, 'PMN')
# combine and format
rs <- rbind(dat.pro_MB, dat.pro_MC, dat.pro_MM, dat.pro_B, dat.pro_S, dat.pro_PMN)
rs %<>% ungroup()
return(rs)
}
plot.ranks <- function(ranksDF, xLIM, yLAB, targets){
ranksDF$LAB <- ifelse(ranksDF$SYMBOL %in% targets, ranksDF$SYMBOL, NA)
p <- ranksDF %>% ggplot(aes(x= rank, y= cumSum, color = stage, label = LAB)) +
geom_point() +
geom_hline(yintercept = 0.5, linetype="dashed", alpha=0.7) +
geom_text_repel(color= 'black') +
xlab("Rank by abundance") +
ylab(yLAB) +
#theme(legend.position = "none") +
xlim(xLIM) +
scale_color_manual(values = colors_stage, name = 'Stage') +
theme_light(base_size = 20) +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(legend.title = element_text(size = 12), legend.text = element_text(size = 9),
legend.margin=margin(-5,-5,-5,-15))
#labs(title = paste0("Cumulative abundance:", type4title),
# caption = CAPTION)
return(p)
}
# you need to reverse the log2!
# ungroup
exp_proLc %<>% ungroup()
exp_mrnLc %<>% ungroup()
exp_micLc %<>% ungroup()
## medians as value (due to functions above)
exp_proLc$value <- exp_proLc$median
exp_mrnLc$value <- exp_mrnLc$median
exp_micLc$value <- exp_micLc$median
ranks.pro <- apply_CumSum.allStages.pro(exp_proLc)
ranks.mrn <- apply_CumSum.allStages.mrn(exp_mrnLc)
ranks.mic <- apply_CumSum.allStages.mic(exp_micLc)
p.ranks.pro <- plot.ranks(ranks.pro, c(0, 100), 'Cumulative protein abundance', NA)
p.ranks.pro
p.ranks.mrn <- plot.ranks(ranks.mrn, c(0, 150), 'Cumulative mRNA abundance', NA)
p.ranks.mrn
p.ranks.mic <- plot.ranks(ranks.mic, c(0, 100), 'Cumulative miRNA abundance', NA)
p.ranks.mic
## add numbers
# pro
rnk.pro <- ranks.pro %>% group_by(stage) %>% filter(cumSum < 0.51)
num.rnk.pro <- rnk.pro %>% group_by(stage) %>% summarise("Proteins at 50%" = n())
num.rnk.pro %<>% rename(Stage = stage)
p.ranks.pro_fin <- p.ranks.pro +
annotate(geom = "table", x =55, y = 0.4, label = list(num.rnk.pro),
vjust = 1, hjust = 0, size = 4)
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_5/", "p.ranks.pro_fin", ".svg"),
plot = p.ranks.pro_fin,
device = svg,
width = 6.5,
height = 6)
# mrn
rnk.mrn <- ranks.mrn %>% group_by(stage) %>% filter(cumSum < 0.51)
num.rnk.mrn <- rnk.mrn %>% group_by(stage) %>% summarise("Transcripts at 50%" = n())
num.rnk.mrn %<>% rename(Stage = stage)
p.ranks.mrn_fin <- p.ranks.mrn +
annotate(geom = "table", x = 75, y = 0.4, label = list(num.rnk.mrn),
vjust = 1, hjust = 0, size = 4)
p.ranks.mrn_fin
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_5/", "p.ranks.mrn_fin", ".svg"),
plot = p.ranks.mrn_fin,
device = svg,
width = 6.5,
height = 6)
# ELANE expression
exp_proLc %>% filter(SYMBOL == 'ELANE') %>%
ggplot(aes(x = stage, y = median)) +
geom_bar(stat = 'identity')
exp_proLc %>% filter(SYMBOL == 'S100A8') %>%
ggplot(aes(x = stage, y = median)) +
geom_bar(stat = 'identity')
# cumulated locatiosn
anno$fan.locs %>% dplyr::count(loc.combined) # 11 locations
# add granule
anno$fan.locs$loc.combined %<>% recode(
'Primary' = 'Primary gr.',
'Secondary' = 'Secondary gr.',
'Tertiary' = 'Tertiary gr.',
'Ficolin-1' = 'Ficolin-1 gr.',
'Secretory' = 'Secretory gr.'
)
# define colors
loc.levels = c(
'Cytosol',
'ER u. Golgi',
'Mitochondrion',
'Plasma membrane',
'Nucleus',
'Primary gr.',
'Secondary gr.',
'Tertiary gr.',
'Ficolin-1 gr.',
'Secretory gr.',
'Unknown')
length(loc.levels)
# colors from: https://sashamaps.net/docs/resources/20-colors/
colors.locs <- c(
'#9A6324', # Cytosol
'#f58231', # 'ER u. Golgi',
'#e6194B', # Mitochondrion
'#ffe119', # Plasma membrane
'#3cb44b', # Nucleus
'#1E90FF', # Primary
'#87CEEB', # Secondary
'#00FFFF', # Tertiary
'#42d4f4', # Ficolin
'#7FFFD4', # Secretory
'#636363' # Unknown
)
names(colors.locs) <- loc.levels
## check the intensity per location in the maturation stages, as total and as fractions
# add loc
add.loc2ENSG <- function(expL){
expL$loc <- anno$fan.locs$loc.combined[match(expL$ENSG, anno$fan.locs$ENSG)]
return(expL)
}
exp_proLc <- add.loc2ENSG(exp_proLc)
exp_mrnLc <- add.loc2ENSG(exp_mrnLc)
exp_proL <- add.loc2ENSG(exp_proL)
exp_mrnL <- add.loc2ENSG(exp_mrnL)
# get sums per loc in stages
pro.locSums <- exp_proLc %>% ungroup %>% group_by(stage, loc) %>% summarise(sum.medians = sum(median))
# calc fractions
pro.fracLocs <- pro.locSums %>% group_by(stage) %>% mutate(tot = sum(sum.medians)) %>% mutate(frac = sum.medians / tot * 100)
pro.fracLocs$loc <- factor(pro.fracLocs$loc, levels = loc.levels)
# pro.fracLocs$frac <- pro.fracLocs %>% reorder(frac, loc)
# as prc
pro.fracLocs$prc <- paste0(round(pro.fracLocs$frac, 0), "%")
# plot
p.frac.loc_pro <- pro.fracLocs %>%
filter(loc != 'Tertiary gr.') %>%
droplevels('loc') %>%
#group_by(Sample) %>%
#mutate(perc = Percentage* 100/sum(Percentage)) %>%
ggplot(aes(y = frac, x = stage, fill = loc, label = prc)) +
geom_flow(aes(alluvium = loc), alpha= .5, color = "white",
curve_type = "linear",
width = .5) +
geom_col(width = .5, color = "white") +
scale_fill_manual(values = colors.locs) +
scale_y_continuous(NULL, expand = c(0,0)) +
cowplot::theme_minimal_hgrid() +
theme(panel.grid.major = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text=element_text(size=12, face = 'bold'),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Subcellular\nlocation")) +
geom_text(size = 4,
position = position_stack(vjust = .5),
fontface = "bold")
p.frac.loc_pro
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_5/", "p.frac.loc_pro", ".svg"),
plot = p.frac.loc_pro,
device = svg,
width = 8,
height = 6)
# get sums per loc in stages
mrn.locSums <- exp_mrnLc %>% ungroup %>% group_by(stage, loc) %>% summarise(sum.medians = sum(median))
# calc fractions
mrn.fracLocs <- mrn.locSums %>% ungroup %>% group_by(stage) %>% mutate(tot = sum(sum.medians)) %>% mutate(frac = sum.medians / tot * 100)
mrn.fracLocs$loc <- factor(mrn.fracLocs$loc, levels = loc.levels)
# mrn.fracLocs$frac <- mrn.fracLocs %>% reorder(frac, loc)
# as prc
mrn.fracLocs$prc <- paste0(round(mrn.fracLocs$frac, 0), "%")
# plot
p.frac.loc_mrn <- mrn.fracLocs %>%
filter(prc != "0%") %>%
#group_by(Sample) %>%
#mutate(perc = Percentage* 100/sum(Percentage)) %>%
ggplot(aes(y = frac, x = stage, fill = loc, label = prc)) +
geom_flow(aes(alluvium = loc), alpha= .5, color = "white",
curve_type = "linear",
width = .5) +
geom_col(width = .5, color = "white") +
scale_fill_manual(values = colors.locs) +
scale_y_continuous(NULL, expand = c(0,0)) +
cowplot::theme_minimal_hgrid() +
theme(panel.grid.major = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text=element_text(size=12, face = 'bold'),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Subcellular\nlocation")) +
geom_text(size = 4,
position = position_stack(vjust = .5),
fontface = "bold")
p.frac.loc_mrn
ggsave(
filename = paste0(dir_moa, "/figures/final/fig_5/", "p.frac.loc_mrn", ".svg"),
plot = p.frac.loc_mrn,
device = svg,
width = 8,
height = 6)
# MB <-
# PM <-
# MC <-
# MM <-
# B <-
# S <-
# PMN <- c()