library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## âś” ggplot2 3.1.0 âś” purrr 0.2.5
## âś” tibble 2.0.0 âś” dplyr 0.7.99.9000
## âś” tidyr 0.8.2 âś” stringr 1.3.1
## âś” readr 1.1.1 âś” forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
# devtools::install_github("privefl/bigsnpr")
library(bigsnpr)
## Loading required package: bigstatsr
# devtools::install_github("privefl/paper2-PRS/pkg.paper.PRS")
library(pkg.paper.PRS) ## See code source for functions' code.
options(nboot = 1e5)
Useful functions
COLORS <- scales::hue_pal()(6)
methods.color <- setNames(
c(COLORS, COLORS[c(4, 2, 5)], "black"),
c("PLR", "C+T-max", "C+T-stringent", "T-Trees", "PLR3", "C+T-all",
"C+T-max-0.05", "C+T-max-0.2", "C+T-max-0.8", "biglasso")
)
myggplot <- function(..., coeff = 1) {
ggplot(...) + bigstatsr::theme_bigstatsr(size.rel = coeff)
}
barplot_causal <- function(results) {
results %>%
myggplot(aes(par.causal, AUC_mean, fill = method, color = method)) +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_col(position = position_dodge(), alpha = 0.5, color = "black", size = 1) +
geom_errorbar(aes(ymin = AUC_mean - 2 * AUC_boot, ymax = AUC_mean + 2 * AUC_boot),
position = position_dodge(width = 0.9), color = "black", width = 0.2, size = 1) +
scale_y_continuous(limits = c(0.5, NA), minor_breaks = 0:20 / 20,
oob = scales::rescale_none) +
labs(x = "Causal SNPs (number and location)", y = "Mean of 100 AUCs",
fill = "Method", color = "Method") +
scale_fill_manual(values = methods.color) +
scale_color_manual(values = methods.color)
}
barplot_causal_one <- function(results, h2) {
auc_max <- `if`(h2 == 0.8, 0.94, 0.84)
results %>%
filter(par.h2 == h2) %>%
barplot_causal() +
geom_hline(yintercept = auc_max, linetype = 3, color = "blue") +
facet_grid(par.dist ~ .)
}
barplot_causal_all <- function(results) {
results %>%
barplot_causal() +
geom_hline(aes(yintercept = auc_max), linetype = 3, color = "blue",
data = data.frame(par.h2 = c(0.5, 0.8), auc_max = c(0.84, 0.94))) +
facet_grid(par.dist ~ par.h2)
}
compare_PLR <- function(results, with_method) {
results <- filter(
results, method %in% c("PLR", with_method), par.h2 == 0.8)
results.summary <- results %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n())
nsimu <- results.summary$N[[1]]
# stopifnot(all(results.summary$N == nsimu))
plot_extra <- function(y, ylab = y) {
results %>%
myggplot() +
geom_boxplot(aes_string("par.causal", y, color = "method",
fill = "method"), alpha = 0.3) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(par.model ~ par.dist) +
labs(x = "Causal SNPs (number and location)", y = ylab,
fill = "Method", color = "Method") +
theme(legend.position = "none") +
scale_fill_manual(values = methods.color) +
scale_color_manual(values = methods.color) +
scale_y_continuous(limits = c(0, NA))
}
p1 <- results.summary %>%
barplot_causal_one(h2 = 0.8) +
facet_grid(par.model ~ par.dist) +
labs(y = sprintf("Mean of %d AUCs", nsimu))
p2 <- cowplot::plot_grid(
plot_extra("nb.preds", "Number of predictors"),
plot_extra("timing", "Execution time (in seconds)"),
ncol = 1, align = "hv", scale = 0.95,
labels = LETTERS[2:3], label_size = 20
)
p12 <- cowplot::plot_grid(
p1 + theme(legend.position = "none"), p2,
ncol = 2, scale = 0.95, rel_widths = c(6, 4),
labels = c("A", ""), label_size = 20
)
cowplot::plot_grid(
cowplot::get_legend(p1 + theme(legend.direction = "horizontal")), p12,
rel_heights = c(0.1, 1), ncol = 1
)
}
G <- snp_attach("backingfiles/celiacQC_sub1.rds")$genotypes
corr <- snp_cor(G, size = 100)
saveRDS(corr, "backingfiles/corr2.rds")
corr <- readRDS("backingfiles/corr2.rds")
Results with T-Trees
results1 <- list.files("results1", full.names = TRUE) %>%
read_format_results(corr)
compare_PLR(results1, with_method = "T-Trees")
ggsave("figures/supp-ttrees.pdf", scale = 1/100, width = 1400, height = 1060)
Results with PLR3 & model COMP
results2 <- list.files("results2", full.names = TRUE) %>%
read_format_results(corr)
Results with PLR3
compare_PLR(results2, with_method = "PLR3")
ggsave("figures/supp-triple.pdf", scale = 1/100, width = 1400, height = 1060)
results2 %>%
filter(grepl("PLR", method)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_one(h2 = 0.5) +
facet_grid(par.model ~ par.dist)
ggsave("figures/supp-AUC-triple.pdf", scale = 1/110, width = 1242, height = 951)
Results with model COMP
results2 %>%
filter(par.model == "COMP", method %in% c("PLR", "C+T-max")) %>%
filter(thr.r2 == 0.2 | is.na(thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_all()
ggsave("figures/supp-AUC-logit-fancy.pdf", scale = 1/110, width = 1242, height = 951)
results2 %>%
filter(par.model == "COMP", grepl("C\\+T", method)) %>%
filter(thr.r2 == 0.2 | is.na(thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_all()
ggsave("figures/supp-AUC-PRS-fancy.pdf", scale = 1/110, width = 1242, height = 951)
Results with only PLR & C+T
Different r2 thresholds
results2.all.r2 <- results2 %>%
filter(par.model == "ADD", method %in% c("PLR", "C+T-max")) %>%
mutate(method = c_method_r2(method, thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n())
barplot_causal_all(results2.all.r2)
ggsave("figures/supp-AUC-all-r2.pdf", scale = 1/110, width = 1242, height = 951)
results2 %>%
filter(par.model == "ADD", method %in% c("PLR", "C+T-max")) %>%
mutate(method = c_method_r2(method, thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AltSens_mean = mean(AltSens), AltSens_boot = boot(AltSens), N = n()) %>%
myggplot(aes(par.causal, AltSens_mean, fill = method, color = method)) +
geom_col(position = position_dodge(), alpha = 0.3) +
geom_errorbar(aes(ymin = AltSens_mean - 2 * AltSens_boot,
ymax = AltSens_mean + 2 * AltSens_boot),
position = position_dodge(width = 0.9), color = "black", width = 0.2) +
scale_y_continuous(limits = c(0, 1)) +
labs(x = "Causal SNPs (number and location)", y = "Mean of 100 AltSens",
fill = "Method", color = "Method") +
scale_fill_manual(values = methods.color) +
scale_color_manual(values = methods.color) +
facet_grid(par.dist ~ par.h2)
ggsave("figures/supp-AltSens-all-r2.pdf", scale = 1/110, width = 1242, height = 951)
Correlation between predictive measures
results3 <- results2 %>%
filter(thr.r2 == 0.2 | is.na(thr.r2)) %>%
filter(par.model == "ADD", method != "PLR3")
results3 %>%
filter(method %in% c("PLR", "C+T-max")) %>%
mutate(Parameters = interaction(par.h2, par.dist, method, sep = " | ")) %>%
myggplot(aes(AUC, pAUC, color = Parameters)) +
geom_point(size = 0.6, alpha = 0.5) +
geom_smooth(aes(linetype = Parameters), method = "loess", se = FALSE, size = 2) +
theme(legend.key.width = unit(3.5, "line"),
legend.direction = "horizontal") +
labs(y = "Partial AUC (specificity: [0.9, 1])") +
theme(legend.position = "top", axis.title = element_text(size = rel(2.2))) +
ggsave("figures/supp-AUC-corr.pdf", scale = 1/100, width = 1525, height = 983)
cor(results3$AUC, results3$pAUC, method = "spearman")
## [1] 0.9804009
6000 in training & all chromosomes
results3.summary <- results3 %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n())
results3.summary %>%
filter(method %in% c("PLR", "C+T-max"), par.model == "ADD") %>%
barplot_causal_all()
ggsave("figures/main-AUC-logit.pdf", scale = 1/110, width = 1242, height = 951)
results3.summary %>%
filter(grepl("C\\+T", method)) %>%
barplot_causal_all()
ggsave("figures/supp-AUC-PRS.pdf", scale = 1/110, width = 1242, height = 951)
6000 in training & only chromosome 6
results4 <- list.files("results4", full.names = TRUE) %>%
read_format_results(corr)
results4 %>%
filter(method %in% c("PLR", "C+T-max")) %>%
mutate(method = c_method_r2(method, thr.r2)) %>%
group_by_at(c(vars(starts_with("par")), "method")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
barplot_causal_all() +
geom_col(aes(par.causal, AUC_mean), data = results2.all.r2,
position = position_dodge(), color = "black", alpha = 0)
ggsave("figures/supp-AUC-chr6-all-r2.pdf", scale = 1/110, width = 1242, height = 951)
Varying training size & all shromosomes
results5 <- list.files("results5", full.names = TRUE) %>%
read_format_results(corr)
results6 <- results2 %>%
filter(par.model == "ADD", par.causal == "300 in all") %>%
bind_rows(results5) %>%
filter(method %in% c("PLR", "C+T-max")) %>%
mutate(method = c_method_r2(method, thr.r2),
par.causal = as.character(par.causal))
results6 %>%
group_by_at(c(vars(starts_with("par")), "method", "n.train")) %>%
summarise(AUC_mean = mean(AUC), AUC_boot = boot(AUC), N = n()) %>%
myggplot(aes(n.train, AUC_mean, color = method, linetype = method)) +
geom_hline(yintercept = 0.5, linetype = 2) +
geom_hline(aes(yintercept = auc_max), linetype = 3, color = "blue",
data = data.frame(par.h2 = c(0.5, 0.8), auc_max = c(0.84, 0.94))) +
geom_line(size = 1.5) +
geom_errorbar(aes(ymin = AUC_mean - 2 * AUC_boot,
ymax = AUC_mean + 2 * AUC_boot),
size = 1.5, width = 0) +
facet_grid(par.dist ~ par.h2) +
scale_x_continuous(breaks = 1:6 * 1000, minor_breaks = NULL) +
scale_y_continuous(minor_breaks = 0:20 / 20) +
scale_color_manual(values = methods.color) +
labs(x = "Size of the training set", y = "Mean of 100 AUCs",
color = "Method", linetype = "Method") +
theme(legend.key.width = unit(3, "line"))
ggsave("figures/main-AUC-ntrain.pdf", scale = 1/100, width = 1050, height = 800)
Comparison with biglasso
list.files("results7", full.names = TRUE) %>%
read_format_results(corr) %>%
compare_PLR("biglasso")
ggsave("figures/supp-biglasso.pdf", scale = 1/100, width = 1600, height = 1000)