Last update: 23-10-14

Last markdown compiled: 23-10-14

ToDo:

include SCN

knitr::opts_chunk$set(warning = FALSE, message = FALSE, results = FALSE) 

# dirs
## Sebastian @Linux
dir_moa <- "/home/isar/Dropbox/scn_moa/"

# data
# revised 2023.10.13
load(file = paste0(dir_moa, "/data/final/", "gnp.pro.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mrn.RData"))
load(file = paste0(dir_moa, "/data/final/", "gnp.mic.RData"))
# load(paste0(dir_moa, "/data/final/", "scn.pro.temp.RData"))

## packages
source(paste0(dir_moa, "/scripts/", "scn_moa_fct.R"))

# load packagea
sapply(packages_general, require, character.only = TRUE)
sapply(packages_dex, require, character.only = TRUE)

fct

limma_getRes <- function(df, design, contrast) {
  fit1 <- lmFit(df, design)
  fit2 <- contrasts.fit(fit1, contrasts = contrast)
  fit3 <- eBayes(fit2, trend = T, robust = T)
  summary <- decideTests(fit3, p.value= 0.05, lfc=1, method = "separate", adjust.method = "BH")
  results <- list(
    fit1 = fit1,
    fit2 = fit2,
    fit3 = fit3,
    summary = summary)
  return(results)
}

Pro

Main stages

# get data
san_pro <- gnp.pro$san_pro
exp_pro_vsn <- gnp.pro$exp_pro_vsn # try with both
exp_pro_lg2 <- gnp.pro$exp_pro_lg2 # try with both

# format: fid2rownames
exp_pro_vsn <- as.data.frame(t(exp_pro_vsn))
exp_pro_lg2 <- as.data.frame(t(exp_pro_lg2))

# match san order
identical(colnames(exp_pro_vsn), colnames(exp_pro_lg2)) # samples
san_pro <- san_pro[match(colnames(exp_pro_vsn), san_pro$id),]
identical(san_pro$id, colnames(exp_pro_lg2))

# dex design
design_stage_pro <- model.matrix(
  ~0 + stage, 
  data = san_pro)

# contrast
contrast_stage_pro <- makeContrasts(
  ## progression
  MBtoPM = stagePM - stageMB, # progression main 1
  PMtoMC = stageMC - stagePM, # progression main 2
  MCtoMM = stageMM - stageMC, # progression main 3
  MMtoB = stageB - stageMM, # progression main 4
  BtoPMN = stagePMN - stageB, # progression main 5
  # stage jumps: MB
  MBtoMC = stageMC - stageMB, # jump MB
  MBtoMM = stageMM - stageMB, # jump MB
  MBtoB = stageB - stageMB, # jump MB
  MBtoPMN = stagePMN - stageMB, # jump MB
  # stage jumps: PM
  PMtoMM = stageMM - stagePM, # jump PM
  PMtoB = stageB - stagePM, # jump PM
  PMtoPMN = stagePMN - stagePM, # jump PM
  # stage jumps MC
  MCtoB = stageB - stageMC, # jump MC
  MCtoPMN = stagePMN - stageMC, # jump MC
  # stage jumps MM
  MMtoPMN = stagePMN - stageMM, # jump MM
  ## vs PMN (to match vs Healthy in SRP. Do this both ways so no need for x(-1) later on)
  MBvsPMN = stageMB - stagePMN, #vsPMN 1.1
  PMvsPMN = stagePM - stagePMN, #vsPMN 1.2
  MCvsPMN = stageMC - stagePMN, #vsPMN 1.3
  MMvsPMN = stageMM - stagePMN, #vsPMN 1.4
  BvsPMN = stageB - stagePMN, #vsPMN 1.5
  PMNvsMB = stagePMN - stageMB, #vsPMN 2.1
  PMNvsPM = stagePMN - stagePM, #vsPMN 2.2
  PMNvsMC = stagePMN - stageMC, #vsPMN 2.3
  PMNvsMM = stagePMN - stageMM, #vsPMN 2.4
  PMNvsB = stagePMN - stageB, #vsPMN 2.5
  # progression 
  levels = design_stage_pro)

# execute limma_getRes fct
FITs_stage_pro_vsn <- limma_getRes(exp_pro_vsn, design_stage_pro, contrast_stage_pro)
FITs_stage_pro_lg2 <- limma_getRes(exp_pro_lg2, design_stage_pro, contrast_stage_pro)

#export diffEx tables from limmas FITs objects (use fit3)
extract_stage_pro <- function(fit3){
  # progression
  MBtoPM <- topTable(fit3, number = nrow(fit3), coef = 'MBtoPM') #
  PMtoMC <- topTable(fit3, number = nrow(fit3), coef = 'PMtoMC') #
  MCtoMM <- topTable(fit3, number = nrow(fit3), coef = 'MCtoMM') #
  MMtoB <- topTable(fit3, number = nrow(fit3), coef = 'MMtoB') #
  BtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'BtoPMN') #
  # stage jumps: MB
  MBtoMC <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMC') #
  MBtoMM <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMM') #
  MBtoB <- topTable(fit3, number = nrow(fit3), coef = 'MBtoB') #
  MBtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MBtoPMN') #
  # stage jumps: PM
  PMtoMM <- topTable(fit3, number = nrow(fit3), coef = 'PMtoMM') #
  PMtoB <- topTable(fit3, number = nrow(fit3), coef = 'PMtoB') #
  PMtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'PMtoPMN') #
  # stage jumps: MC
  MCtoB <- topTable(fit3, number = nrow(fit3), coef = 'MCtoB') #
  MCtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MCtoPMN') #
  # stage jumps: MM
  MMtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MMtoPMN') #
  # vs PMN and viceVersa
  MBvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MBvsPMN") #
  PMvsPMN <- topTable(fit3, number = nrow(fit3), coef = "PMvsPMN") #
  MCvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MCvsPMN") #
  MMvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MMvsPMN") #
  BvsPMN <- topTable(fit3, number = nrow(fit3), coef = "BvsPMN") #
  PMNvsMB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMB") #
  PMNvsPM <- topTable(fit3, number = nrow(fit3), coef = "PMNvsPM") #
  PMNvsMC <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMC") #
  PMNvsMM <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMM") #
  PMNvsB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsB") #
  # result list
  result <- list(
    MBtoPM = MBtoPM ,
    PMtoMC = PMtoMC,
    MCtoMM =MCtoMM,
    MMtoB = MMtoB,
    BtoPMN= BtoPMN,
    MBtoMC= MBtoMC,
    MBtoMM= MBtoMM,
    MBtoB=MBtoB,
    MBtoPMN=MBtoPMN,
    PMtoMM=PMtoMM,
    PMtoB =PMtoB ,
    PMtoPMN =PMtoPMN ,
    MCtoB =MCtoB ,
    MCtoPMN= MCtoPMN ,
    MMtoPMN= MMtoPMN ,
    MBvsPMN =MBvsPMN,
    PMvsPMN =PMvsPMN ,
    MCvsPMN =MCvsPMN,
    MMvsPMN= MMvsPMN,
    BvsPMN =BvsPMN,
    PMNvsMB= PMNvsMB,
    PMNvsPM= PMNvsPM,
    PMNvsMC =PMNvsMC ,
    PMNvsMM= PMNvsMM,
    PMNvsB =PMNvsB
  )
  return(result)
}

## extract results
dex_stage_pro_vsn <- extract_stage_pro(FITs_stage_pro_vsn$fit3)
dex_stage_pro_lg2 <- extract_stage_pro(FITs_stage_pro_lg2$fit3)

# make rownames fid
dex_stage_pro_vsn <- map(dex_stage_pro_vsn, rownames_to_column, var = "fid")
dex_stage_pro_lg2 <- map(dex_stage_pro_lg2, rownames_to_column, var = "fid")

# as df
dex_stage_pro_vsn <- bind_rows(dex_stage_pro_vsn, .id = "comparison")
dex_stage_pro_lg2 <- bind_rows(dex_stage_pro_lg2, .id = "comparison")

# unique(dex_stage_pro_vsn$comparison) # OK



### get ANOVA

# vsn
anova_pro_vsn <- topTable(FITs_stage_pro_vsn$fit3, coef=1:5, number = nrow(FITs_stage_pro_vsn$fit3)) 
anova_pro_vsn %>% filter(adj.P.Val < 0.05) %>% nrow() # 1566
anova_pro_vsn$fid <- rownames(anova_pro_vsn)

# non
anova_pro_lg2 <- topTable(FITs_stage_pro_lg2$fit3, coef=1:5, number = nrow(FITs_stage_pro_lg2$fit3)) 
anova_pro_lg2 %>% filter(adj.P.Val < 0.05) %>% nrow() # 1804
anova_pro_lg2$fid <- rownames(anova_pro_lg2)

check

# pop_gen
comp_progression_pro <- c('MBtoPM', 'PMtoMC', 'MCtoMM', 'MMtoB', 'BtoPMN')
comp_vsPMN_pro <- c('MBvsPMN', 'PMvPMN', 'MCvsPMN', 'MMvsPMN', 'BvsPMN')

# prep
res_pro_vsn <- dex_stage_pro_vsn %>% 
  filter(comparison %in% comp_progression_pro) %>%
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")
res_pro_vsn$type = "vsn"

res_pro_lg2 <- dex_stage_pro_lg2 %>% 
  filter(comparison %in% comp_progression_pro) %>%
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")
res_pro_lg2$type = "lg2"

res_dex_pro <- rbind(res_pro_vsn, res_pro_lg2)


# plot
res_dex_pro %>%
  ggplot(aes(x = factor(comparison, level = comp_progression_pro), y = n, fill = type)) +
   geom_bar(stat="identity", position = "dodge") +
   geom_text(aes(y = n + 40 * sign(n), label=n), position = position_dodge(width = .9)) +
  ggtitle("Dex in proteome: normalisation dependecy \n [abs(FC)>=1, adj.p<0.05]") +
  ylab("Number of dex") +
  geom_hline(yintercept = 0)

Sub stages

# design
design_substage_pro <- model.matrix(
  ~0 + substage, 
  data = san_pro)

colSums(design_substage_pro)

# contrast: (try if single sampe works... prob not though)
contrast_substage_pro <- makeContrasts(
  eMCtolMC =  substagelMC - substageeMC,
  eBtolB = substagelB - substageeB,
  levels = design_substage_pro)

# execute limma_getRes fct
FIT_substage_vsn <- limma_getRes(exp_pro_vsn, design_substage_pro, contrast_substage_pro)

#export diffEx tables from limmas fit3 objects
extract_substage_pro <- function(fit3){
  eMCtolMC <- topTable(fit3, number = nrow(fit3), coef = "eMCtolMC")
  eBtolB <- topTable(fit3, number = nrow(fit3), coef = "eBtolB")
  result <- list(
    eMCtolMC = eMCtolMC,
    eBtolB = eBtolB
  )
  return(result)
}

#extract results
dex_substage_pro_vsn <- extract_substage_pro(FIT_substage_vsn$fit3)
#push rownames to UNIPROT col
dex_substage_pro_vsn <- map(dex_substage_pro_vsn, rownames_to_column, var = "fid")
# as df
dex_substage_pro_vsn <- bind_rows(dex_substage_pro_vsn, .id = "comparison")


res_pro_vsn_substage <- dex_substage_pro_vsn %>% 
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")


res_pro_vsn_substage %>%
  ggplot(aes(x = factor(comparison), y = n)) +
   geom_bar(stat="identity", position = "dodge") +
   geom_text(aes(y = n + 2 * sign(n), label=n), position = position_dodge(width = .9)) +
  ggtitle("Dex in proteome: substages") +
  ylab("Number of dex") +
  geom_hline(yintercept = 0)

mRNA

Main stages

# get data: counts 
san_mrn <- gnp.mrn$san_mrn
exp_mrn_vsn <- gnp.mrn$exp_mrn_cou_vsn # contains NA
exp_mrn_lg2 <- gnp.mrn$exp_mrn_cou_vsn # contains NA

# format: fid2rownames
exp_mrn_vsn <- as.data.frame(t(exp_mrn_vsn))
exp_mrn_lg2 <- as.data.frame(t(exp_mrn_lg2))

# match san order
identical(colnames(exp_mrn_vsn), colnames(exp_mrn_lg2)) # samples
san_mrn <- san_mrn[match(colnames(exp_mrn_vsn), san_mrn$id),]
identical(san_mrn$id, colnames(exp_mrn_lg2))

# dex design
design_stage_mrn <- model.matrix(
  ~0 + stage, 
  data = san_mrn)

# contrast
contrast_stage_mrn <- makeContrasts(
  ## progression
  MBtoPM = stagePM - stageMB, # progression main 1
  PMtoMC = stageMC - stagePM, # progression main 2
  MCtoMM = stageMM - stageMC, # progression main 3
  MMtoB = stageB - stageMM, # progression main 4
  BtoS = stageS - stageB, # progression main 5
  StoPMN = stagePMN - stageS, # progression main 6
  BtoPMN = stagePMN - stageB, # progression to match pro
  # stage jumps: MB
  MBtoMC = stageMC - stageMB, # jump MB
  MBtoMM = stageMM - stageMB, # jump MB
  MBtoB = stageB - stageMB, # jump MB
  MBtoS = stageS - stageMB,
  MBtoPMN = stagePMN - stageMB, # jump MB
  # stage jumps: PM
  PMtoMM = stageMM - stagePM, # jump PM
  PMtoB = stageB - stagePM, # jump PM
  PMtoS = stageS - stagePM, 
  PMtoPMN = stagePMN - stagePM, # jump PM
  # stage jumps MC
  MCtoB = stageB - stageMC, # jump MC
  MCtoS = stageS - stageMC,
  MCtoPMN = stagePMN - stageMC, # jump MC
  # stage jumps MM
  MMtoS = stageS - stageMM,
  MMtoPMN = stagePMN - stageMM, # jump MM
  ## vs PMN (to match vs Healthy in SRP. Do this both ways so no need for x(-1) later on)
  MBvsPMN = stageMB - stagePMN, #vsPMN 1.1
  PMvsPMN = stagePM - stagePMN, #vsPMN 1.2
  MCvsPMN = stageMC - stagePMN, #vsPMN 1.3
  MMvsPMN = stageMM - stagePMN, #vsPMN 1.4
  SvsPMN = stageS - stagePMN,
  BvsPMN = stageB - stagePMN, #vsPMN 1.5
  PMNvsMB = stagePMN - stageMB, #vsPMN 2.1
  PMNvsPM = stagePMN - stagePM, #vsPMN 2.2
  PMNvsMC = stagePMN - stageMC, #vsPMN 2.3
  PMNvsMM = stagePMN - stageMM, #vsPMN 2.4
  PMNvsB = stagePMN - stageB, #vsPMN 2.5
  PMNvsS = stagePMN - stageS,
  # progression 
  levels = design_stage_mrn)

# execute limma_getRes fct
FITs_stage_mrn_vsn <- limma_getRes(exp_mrn_vsn, design_stage_mrn, contrast_stage_mrn)
FITs_stage_mrn_lg2 <- limma_getResn(exp_mrn_lg2, design_stage_mrn, contrast_stage_mrn)

# extract
extract_stage_mrn <- function(fit3){
  # progression
  MBtoPM <- topTable(fit3, number = nrow(fit3), coef = 'MBtoPM') #
  PMtoMC <- topTable(fit3, number = nrow(fit3), coef = 'PMtoMC') #
  MCtoMM <- topTable(fit3, number = nrow(fit3), coef = 'MCtoMM') #
  MMtoB <- topTable(fit3, number = nrow(fit3), coef = 'MMtoB') #
  BtoS <- topTable(fit3, number = nrow(fit3), coef = 'BtoS') #
  BtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'BtoPMN') #
  StoPMN <- topTable(fit3, number = nrow(fit3), coef = 'StoPMN') #
  # stage jumps: MB
  MBtoMC <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMC') #
  MBtoMM <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMM') #
  MBtoB <- topTable(fit3, number = nrow(fit3), coef = 'MBtoB') #
  MBtoS <- topTable(fit3, number = nrow(fit3), coef = 'MBtoS') #
  MBtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MBtoPMN') #
  # stage jumps: PM
  PMtoMM <- topTable(fit3, number = nrow(fit3), coef = 'PMtoMM') #
  PMtoB <- topTable(fit3, number = nrow(fit3), coef = 'PMtoB') #
  PMtoS <- topTable(fit3, number = nrow(fit3), coef = 'PMtoS') #
  PMtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'PMtoPMN') #
  # stage jumps: MC
  MCtoB <- topTable(fit3, number = nrow(fit3), coef = 'MCtoB') #
  MCtoS <- topTable(fit3, number = nrow(fit3), coef = 'MCtoS') #
  MCtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MCtoPMN') #
  # stage jumps: MM
  MMtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MMtoPMN') #
  MMtoS <- topTable(fit3, number = nrow(fit3), coef = 'MMtoS') #
  # vs PMN and viceVersa
  MBvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MBvsPMN") #
  PMvsPMN <- topTable(fit3, number = nrow(fit3), coef = "PMvsPMN") #
  MCvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MCvsPMN") #
  MMvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MMvsPMN") #
  BvsPMN <- topTable(fit3, number = nrow(fit3), coef = "BvsPMN") #
  SvsPMN <- topTable(fit3, number = nrow(fit3), coef = "SvsPMN") #
  PMNvsMB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMB") #
  PMNvsPM <- topTable(fit3, number = nrow(fit3), coef = "PMNvsPM") #
  PMNvsMC <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMC") #
  PMNvsMM <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMM") #
  PMNvsB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsB") #
  PMNvsS <- topTable(fit3, number = nrow(fit3), coef = "PMNvsS") #
  # result list
  result <- list(
    MBtoPM = MBtoPM ,
    PMtoMC = PMtoMC,
    MCtoMM =MCtoMM,
    MMtoB = MMtoB,
    BtoS = BtoS,
    BtoPMN= BtoPMN,
    StoPMN = StoPMN,
    MBtoMC= MBtoMC,
    MBtoMM= MBtoMM,
    MBtoB=MBtoB,
    MBtoS=MBtoS,
    MBtoPMN=MBtoPMN,
    PMtoMM=PMtoMM,
    PMtoB =PMtoB ,
    PMtoS = PMtoS,
    PMtoPMN =PMtoPMN ,
    MCtoB =MCtoB ,
    MCtoS =MCtoS ,
    MCtoPMN= MCtoPMN ,
    MMtoS=MMtoS,
    MMtoPMN= MMtoPMN ,
    MBvsPMN =MBvsPMN,
    PMvsPMN =PMvsPMN ,
    MCvsPMN =MCvsPMN,
    MMvsPMN= MMvsPMN,
    BvsPMN =BvsPMN,
    SvsPMN=SvsPMN,
    PMNvsMB= PMNvsMB,
    PMNvsPM= PMNvsPM,
    PMNvsMC =PMNvsMC ,
    PMNvsMM= PMNvsMM,
    PMNvsB =PMNvsB,
    PMNvsS=PMNvsS
  )
  return(result)
}

## extract results
dex_stage_mrn_vsn <- extract_stage_mrn(FITs_stage_mrn_vsn$fit3)
dex_stage_mrn_lg2 <- extract_stage_mrn(FITs_stage_mrn_lg2$fit3)

# make rownames fid
dex_stage_mrn_vsn <- map(dex_stage_mrn_vsn, rownames_to_column, var = "fid")
dex_stage_mrn_lg2 <- map(dex_stage_mrn_lg2, rownames_to_column, var = "fid")

# as df
dex_stage_mrn_vsn <- bind_rows(dex_stage_mrn_vsn, .id = "comparison")
dex_stage_mrn_lg2 <- bind_rows(dex_stage_mrn_lg2, .id = "comparison")


### get ANOVA

# vsn
anova_mrn_vsn <- topTable(FITs_stage_mrn_vsn$fit3, coef=1:6, number = nrow(FITs_stage_mrn_vsn$fit3)) 
anova_mrn_vsn %>% filter(adj.P.Val < 0.05) %>% nrow() # 7892
anova_mrn_vsn$fid <- rownames(anova_mrn_vsn)

# non
anova_mrn_lg2 <- topTable(FITs_stage_mrn_lg2$fit3, coef=1:6, number = nrow(FITs_stage_mrn_lg2$fit3)) 
anova_mrn_lg2 %>% filter(adj.P.Val < 0.05) %>% nrow() # 7820
anova_mrn_lg2$fid <- rownames(anova_mrn_lg2)

check

comp_progression_mrn <- c('MBtoPM', 'PMtoMC', 'MCtoMM', 'MMtoB', 'BtoS', 'StoPMN')

# prep
res_mrn_vsn <- dex_stage_mrn_vsn %>% 
  filter(comparison %in% comp_progression_mrn) %>%
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")
res_mrn_vsn$type = "vsn"

res_mrn_lg2 <- dex_stage_mrn_lg2 %>% 
  filter(comparison %in% comp_progression_mrn) %>%
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")
res_mrn_lg2$type = "lg2"

res_dex_mrn <- rbind(res_mrn_vsn, res_mrn_lg2)


# plot
res_dex_mrn %>%
  ggplot(aes(x = factor(comparison, level = comp_progression_mrn), y = n, fill = type)) +
   geom_bar(stat="identity", position = "dodge") +
   geom_text(aes(y = n + 40 * sign(n), label=n), position = position_dodge(width = .9)) +
  ggtitle("Dex in mRNA: normalisation dependecy \n [abs(FC)>=1, adj.p<0.05]") +
  ylab("Number of dex") +
  geom_hline(yintercept = 0)

Sub stages

# design
design_substage_mrn <- model.matrix(
  ~0 + substage, 
  data = san_mrn)

colSums(design_substage_mrn)

# contrast: (try if single sampe works... mrnb not though)
contrast_substage_mrn <- makeContrasts(
  eMCtolMC =  substagelMC - substageeMC,
  eBtolB = substagelB - substageeB,
  levels = design_substage_mrn)

# execute limma_getRes fct
FIT_substage_vsn <- limma_getRes(exp_mrn_vsn, design_substage_mrn, contrast_substage_mrn)

#export diffEx tables from limmas fit3 objects
extract_substage_mrn <- function(fit3){
  eMCtolMC <- topTable(fit3, number = nrow(fit3), coef = "eMCtolMC")
  eBtolB <- topTable(fit3, number = nrow(fit3), coef = "eBtolB")
  result <- list(
    eMCtolMC = eMCtolMC,
    eBtolB = eBtolB
  )
  return(result)
}

#extract results
dex_substage_mrn_vsn <- extract_substage_mrn(FIT_substage_vsn$fit3)
#push rownames to UNImrnT col
dex_substage_mrn_vsn <- map(dex_substage_mrn_vsn, rownames_to_column, var = "fid")
# as df
dex_substage_mrn_vsn <- bind_rows(dex_substage_mrn_vsn, .id = "comparison")

dex_substage_mrn_vsn %>%
  filter(comparison == 'eMCtolMC') %>%
  filter(is.na(adj.P.Val)) # a few have NAs! 

res_mrn_vsn_substage <- dex_substage_mrn_vsn %>% 
  dplyr::group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1, na.rm = T),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1, na.rm = T)) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")


res_mrn_vsn_substage %>%
  ggplot(aes(x = factor(comparison), y = n)) +
   geom_bar(stat="identity", position = "dodge") +
   geom_text(aes(y = n + 2 * sign(n), label=n), position = position_dodge(width = .9)) +
  ggtitle("Dex in mRNA: substages") +
  ylab("Number of dex") +
  geom_hline(yintercept = 0)

microRNA

Main stages

# get data
san_mic <- gnp.mic$san_mic
exp_mic_vsn <- gnp.mic$exp_mic_vsn # non imputed counts
exp_mic_lg2 <- gnp.mic$exp_mic_lg2 # non imputed counts

# format: fid2rownames
exp_mic_vsn <- as.data.frame(t(exp_mic_vsn))
exp_mic_lg2 <- as.data.frame(t(exp_mic_lg2))

# match san order
identical(colnames(exp_mic_vsn), colnames(exp_mic_lg2)) # samples
san_mic <- san_mic[match(colnames(exp_mic_vsn), san_mic$id),]
identical(san_mic$id, colnames(exp_mic_lg2))

# dex design
design_stage_mic <- model.matrix(
  ~0 + stage, 
  data = san_mic)

contrast_mic_stages <- makeContrasts(
  ## progression
  MBtoMC = stageMC - stageMB, # progression main 1
  #PMtoMC = stageMC - stagePM, # progression main 2
  MCtoMM = stageMM - stageMC, # progression main 3
  MMtoB = stageB - stageMM, # progression main 4
  BtoS = stageS - stageB, # progression main 5
  StoPMN = stagePMN - stageS, # progression main 6
  BtoPMN = stagePMN - stageB, # progression to match pro
  # stage jumps: MB
  #MBtoMC = stageMC - stageMB, # jump MB
  MBtoMM = stageMM - stageMB, # jump MB
  MBtoB = stageB - stageMB, # jump MB
  MBtoS = stageS - stageMB,
  MBtoPMN = stagePMN - stageMB, # jump MB
  # stage jumps: PM
  #PMtoMM = stageMM - stagePM, # jump PM
  #PMtoB = stageB - stagePM, # jump PM
  #PMtoS = stageS - stagePM, 
  #PMtoPMN = stagePMN - stagePM, # jump PM
  # stage jumps MC
  MCtoB = stageB - stageMC, # jump MC
  MCtoS = stageS - stageMC,
  MCtoPMN = stagePMN - stageMC, # jump MC
  # stage jumps MM
  MMtoS = stageS - stageMM,
  MMtoPMN = stagePMN - stageMM, # jump MM
  ## vs PMN (to match vs Healthy in SCN. Do this both ways so no need for x(-1) later on)
  MBvsPMN = stageMB - stagePMN, #vsPMN 1.1
  #PMvsPMN = stagePM - stagePMN, #vsPMN 1.2
  MCvsPMN = stageMC - stagePMN, #vsPMN 1.3
  MMvsPMN = stageMM - stagePMN, #vsPMN 1.4
  SvsPMN = stageS - stagePMN,
  BvsPMN = stageB - stagePMN, #vsPMN 1.5
  PMNvsMB = stagePMN - stageMB, #vsPMN 2.1
  #PMNvsPM = stagePMN - stagePM, #vsPMN 2.2
  PMNvsMC = stagePMN - stageMC, #vsPMN 2.3
  PMNvsMM = stagePMN - stageMM, #vsPMN 2.4
  PMNvsB = stagePMN - stageB, #vsPMN 2.5
  PMNvsS = stagePMN - stageS,
  # vs B (because PMN localizes a bit strange in PCA so B may represent the most mature stage)
  MBvsB = stageMB - stageB, #vsB 1.1
  #PMvsB = stagePM - stageB, #vsB 1.2
  MCvsB = stageMC - stageB, #vsB 1.3
  MMvsB = stageMM - stageB, #vsB 1.4
  SvsB = stageS - stageB,
  PMNvsB = stagePMN - stageB, #vsB 1.5
  BvsMB = stageB - stageMB, #vsB 2.1
  #BvsPM = stageB - stagePM, #vsB 2.2
  BvsMC = stageB - stageMC, #vsB 2.3
  BvsMM = stageB - stageMM, #vsB 2.4
  BvsS = stageB - stageS,
  BvsPMN = stageB - stagePMN, #vsB 2.5
  # progression 
  levels = design_stage_mic)

# execute limma_getRes fct
FITs_stages_mic_vsn <- limma_getRes(exp_mic_vsn, design_stage_mic, contrast_mic_stages)
FITs_stages_mic_lg2 <-  limma_getRes(exp_mic_lg2, design_stage_mic, contrast_mic_stages)

#export diffEx tables from limmas fit3 objects
#export diffEx tables from limmas FITs objects (use fit3)
extract_stages_mic <- function(fit3){
  # progression
  MBtoMC <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMC') #
  #PMtoMC <- topTable(fit3, number = nrow(fit3), coef = 'PMtoMC') #
  MCtoMM <- topTable(fit3, number = nrow(fit3), coef = 'MCtoMM') #
  MMtoB <- topTable(fit3, number = nrow(fit3), coef = 'MMtoB') #
  BtoS <- topTable(fit3, number = nrow(fit3), coef = 'BtoS') #
  BtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'BtoPMN') #
  StoPMN <- topTable(fit3, number = nrow(fit3), coef = 'StoPMN') #
  # stage jumps: MB
  MBtoMC <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMC') #
  MBtoMM <- topTable(fit3, number = nrow(fit3), coef = 'MBtoMM') #
  MBtoB <- topTable(fit3, number = nrow(fit3), coef = 'MBtoB') #
  MBtoS <- topTable(fit3, number = nrow(fit3), coef = 'MBtoS') #
  MBtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MBtoPMN') #
  # stage jumps: MC
  MCtoB <- topTable(fit3, number = nrow(fit3), coef = 'MCtoB') #
  MCtoS <- topTable(fit3, number = nrow(fit3), coef = 'MCtoS') #
  MCtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MCtoPMN') #
  # stage jumps: MM
  MMtoPMN <- topTable(fit3, number = nrow(fit3), coef = 'MMtoPMN') #
  MMtoS <- topTable(fit3, number = nrow(fit3), coef = 'MMtoS') #
  # vs PMN and viceVersa
  MBvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MBvsPMN") #
  #PMvsPMN <- topTable(fit3, number = nrow(fit3), coef = "PMvsPMN") #
  MCvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MCvsPMN") #
  MMvsPMN <- topTable(fit3, number = nrow(fit3), coef = "MMvsPMN") #
  BvsPMN <- topTable(fit3, number = nrow(fit3), coef = "BvsPMN") #
  SvsPMN <- topTable(fit3, number = nrow(fit3), coef = "SvsPMN") #
  PMNvsMB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMB") #
  #PMNvsPM <- topTable(fit3, number = nrow(fit3), coef = "PMNvsPM") #
  PMNvsMC <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMC") #
  PMNvsMM <- topTable(fit3, number = nrow(fit3), coef = "PMNvsMM") #
  PMNvsB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsB") #
  PMNvsS <- topTable(fit3, number = nrow(fit3), coef = "PMNvsS") #
  # vs B and viceVersa
  MBvsB <- topTable(fit3, number = nrow(fit3), coef = "MBvsB") #
  #PMvsB <- topTable(fit3, number = nrow(fit3), coef = "PMvsB") #
  MCvsB <- topTable(fit3, number = nrow(fit3), coef = "MCvsB") #
  MMvsB <- topTable(fit3, number = nrow(fit3), coef = "MMvsB") #
  SvsB <- topTable(fit3, number = nrow(fit3), coef = "SvsB") #
  PMNvsB <- topTable(fit3, number = nrow(fit3), coef = "PMNvsB") #
  BvsMB <- topTable(fit3, number = nrow(fit3), coef = "BvsMB") #
  #BvsPM <- topTable(fit3, number = nrow(fit3), coef = "BvsPM") #
  BvsMC <- topTable(fit3, number = nrow(fit3), coef = "BvsMC") #
  BvsMM <- topTable(fit3, number = nrow(fit3), coef = "BvsMM") #
  BvsS <- topTable(fit3, number = nrow(fit3), coef = "BvsS") #
  BvsPMN <- topTable(fit3, number = nrow(fit3), coef = "BvsPMN") #
  # result list
  result <- list(
    MBtoMC = MBtoMC ,
    #PMtoMC = PMtoMC,
    MCtoMM =MCtoMM,
    MMtoB = MMtoB,
    BtoS = BtoS,
    BtoPMN= BtoPMN,
    StoPMN = StoPMN,
    MBtoMC= MBtoMC,
    MBtoMM= MBtoMM,
    MBtoB=MBtoB,
    MBtoS=MBtoS,
    MBtoPMN=MBtoPMN,
    MCtoB =MCtoB ,
    MCtoS =MCtoS ,
    MCtoPMN= MCtoPMN ,
    MMtoS=MMtoS,
    MMtoPMN= MMtoPMN ,
    MBvsPMN =MBvsPMN,
    #PMvsPMN =PMvsPMN ,
    MCvsPMN =MCvsPMN,
    MMvsPMN= MMvsPMN,
    BvsPMN =BvsPMN,
    SvsPMN=SvsPMN,
    PMNvsMB= PMNvsMB,
    #PMNvsPM= PMNvsPM,
    PMNvsMC =PMNvsMC ,
    PMNvsMM= PMNvsMM,
    PMNvsB =PMNvsB,
    PMNvsS=PMNvsS,
    MBvsB= MBvsB ,
    #PMvsB =PMvsB,
    MCvsB =MCvsB,
    MMvsB= MMvsB,
    SvsB=SvsB,
    PMNvsB =PMNvsB,
    BvsMB =BvsMB,
    #BvsPM= BvsPM,
    BvsMC =BvsMC,
    BvsMM= BvsMM,
    BvsS=BvsS,
    BvsPMN =BvsPMN
  )
  return(result)
}


## extract results
dex_stage_mic_vsn <- extract_stages_mic(FITs_stages_mic_vsn$fit3)
dex_stage_mic_lg2 <- extract_stages_mic(FITs_stages_mic_lg2$fit3)

# make rownames fid
dex_stage_mic_vsn <- map(dex_stage_mic_vsn, rownames_to_column, var = "fid")
dex_stage_mic_lg2 <- map(dex_stage_mic_lg2, rownames_to_column, var = "fid")

# as df
dex_stage_mic_vsn <- bind_rows(dex_stage_mic_vsn, .id = "comparison")
dex_stage_mic_lg2 <- bind_rows(dex_stage_mic_lg2, .id = "comparison")



### get ANOVA

# vsn
anova_mic_vsn <- topTable(FITs_stages_mic_vsn$fit3, coef=1:5, number = nrow(FITs_stages_mic_vsn$fit3)) 
anova_mic_vsn %>% filter(adj.P.Val < 0.05) %>% nrow() # 203
anova_mic_vsn$fid <- rownames(anova_mic_vsn)

# non
anova_mic_lg2 <- topTable(FITs_stages_mic_lg2$fit3, coef=1:5, number = nrow(FITs_stages_mic_lg2$fit3)) 
anova_mic_lg2 %>% filter(adj.P.Val < 0.05) %>% nrow() # 180
anova_mic_lg2$fid <- rownames(anova_mic_lg2)

check

# pop_gen
comp_progression_mic <- c('MBtoMC','MCtoMM', 'MMtoB', 'BtoS', 'StoPMN')

# prep
res_mic_vsn <- dex_stage_mic_vsn %>% 
  filter(comparison %in% comp_progression_mic) %>%
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")
res_mic_vsn$type = "vsn"

res_mic_lg2 <- dex_stage_mic_lg2 %>% 
  filter(comparison %in% comp_progression_mic) %>%
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")
res_mic_lg2$type = "lg2"

res_dex_mic <- rbind(res_mic_vsn, res_mic_lg2)


# plot
res_dex_mic %>%
  ggplot(aes(x = factor(comparison, level = comp_progression_mic), y = n, fill = type)) +
   geom_bar(stat="identity", position = "dodge") +
   geom_text(aes(y = n + 5 * sign(n), label=n), position = position_dodge(width = .9)) +
  ggtitle("Dex in microRNA: normalisation dependecy \n [abs(FC)>=1, adj.p<0.05]") +
  ylab("Number of dex") +
  geom_hline(yintercept = 0)

Sub stages

# design
design_substage_mic <- model.matrix(
  ~0 + substage, 
  data = san_mic)

contrast_substage_mic <- makeContrasts(
  eMCtolMC =  substagelMC - substageeMC,
  eBtolB = substagelB - substageeB,
  levels = design_substage_mic)

# execute limma_getRes fct
FIT_substage_vsn <- limma_getRes(exp_mic_vsn, design_substage_mic, contrast_substage_mic)

#export diffEx tables from limmas fit3 objects
extract_substage_mic <- function(fit3){
  eMCtolMC <- topTable(fit3, number = nrow(fit3), coef = "eMCtolMC")
  eBtolB <- topTable(fit3, number = nrow(fit3), coef = "eBtolB")
  result <- list(
    eMCtolMC = eMCtolMC,
    eBtolB = eBtolB
  )
  return(result)
}

#extract results
dex_substage_mic_vsn <- extract_substage_mic(FIT_substage_vsn$fit3)
#push rownames to UNImicT col
dex_substage_mic_vsn <- map(dex_substage_mic_vsn, rownames_to_column, var = "fid")
# as df
dex_substage_mic_vsn <- bind_rows(dex_substage_mic_vsn, .id = "comparison")


res_mic_vsn_substage <- dex_substage_mic_vsn %>% 
  group_by(comparison) %>%
  dplyr::summarize(
    n_up = sum(adj.P.Val < 0.05 & logFC >= 1, na.rm = T),
    n_down = -sum(adj.P.Val < 0.05 & logFC <= -1, na.rm = T) # checked
  ) %>%
  pivot_longer(-comparison, names_to = "direction", values_to = "n")


res_mic_vsn_substage %>%
  ggplot(aes(x = factor(comparison), y = n)) +
   geom_bar(stat="identity", position = "dodge") +
   geom_text(aes(y = n + 2 * sign(n), label=n), position = position_dodge(width = .9)) +
  ggtitle("Dex in micteome: substages") +
  ylab("Number of dex") +
  geom_hline(yintercept = 0)

SCN

PENDING

Export

dex.pro <- list(
  dex_stage_pro_vsn = dex_stage_pro_vsn,
  dex_stage_pro_lg2 = dex_stage_pro_lg2,
  dex_substage_pro_vsn = dex_substage_pro_vsn,
  anova_pro_vsn = anova_pro_vsn,
  anova_pro_lg2 =anova_pro_lg2
)

dex.mrn <- list(
  dex_stage_mrn_vsn = dex_stage_mrn_vsn,
  dex_stage_mrn_lg2 = dex_stage_mrn_lg2,
  dex_substage_mrn_vsn = dex_substage_mrn_vsn,
  anova_mrn_vsn = anova_mrn_vsn,
  anova_mrn_lg2 =anova_mrn_lg2
)

dex.mic <- list(
  dex_stage_mic_vsn = dex_stage_mic_vsn,
  dex_stage_mic_lg2 = dex_stage_mic_lg2,
  dex_substage_mic_vsn = dex_substage_mic_vsn,
  anova_mic_vsn = anova_mic_vsn,
  anova_mic_lg2 =anova_mic_lg2
)

dex.gnp <- list(
  dex.pro = dex.pro,
  dex.mrn = dex.mrn,
  dex.mic = dex.mic
)

save(
  dex.gnp,
  file = paste0(dir_moa, "/data/final/", "dex.gnp.RData"))

Differential expression analysis

Sebastian Hesse, contact: dr.sebastian.hesse@gmail.com

2023-10-09

fct

Pro

Main stages

check

Sub stages

mRNA

Main stages

check

Sub stages

microRNA

Main stages

check

Sub stages

SCN

Export