Summary

This document contains the R code needed to carry out the analyses in the Jeffers et al. Plant Disease manuscript. All three assays are analyzed with a combined model, where log(x+1)-transformed aflatoxin concentration is the response, and genotype and assay type and their interaction are the predictors, with appropriate random intercepts. We assess convergence diagnostics. Models with normal and Student’s t error distributions, and models assuming equal and unequal variance by assay type are compared. After establishing that a model with t error distribution and unequal variance by assay type fits the data best, two additional models are fit: a model with G×E interaction terms and a model with secondary infection covariates. We compare each of these models to the model without either of those terms and find that adding interaction or covariates does not improve model fit. In all cases, we use LOO cross-validation to compare the models.

Post hoc comparisons are done by geting posterior estimates of marginal means for each genotype within each assay type, and calculating the two-sided probability of direction for each pairwise aflatoxin ratio within assay type. Bayesian R-squared values are calculated for the each model, and rank correlation coefficients between the fitted values for each pair of assays are calculated. Finally, the model is refit with genotype as a random effect to get the ICC coefficients for the repeatability analysis.

Define functions

Function to get summary information about all parameters from a model fit.

get_all_pars <- function(model) {
  summ <- summary(model)
  with(summ, rbind(fixed, do.call(rbind, random), spec_pars, cor_pars))
}

Functions to estimate marginal means and contrasts for a model fit, and to generate the multiple comparison letter display.

get_contrasts <- function(model, by_var = NA) {
  # Get marginal means and contrasts on original data scale
  if (is.na(by_var)) {
    emm <- emmeans(model, ~ Genotype)
  } else {
    emm <- emmeans(model, ~ Genotype, by = by_var)
  }
  contr <- contrast(emm, 'pairwise')
  emm_draws <- gather_emmeans_draws(emm) |> setDT()
  contr_draws <- gather_emmeans_draws(contr) |> setDT()
  
  qprobs <- c(0.5, 0.025, 0.975, 0.05, 0.95, 0.17, 0.83)
  qcols <- paste0('q', qprobs)
  if (is.na(by_var)) {
    contr_quantiles <- contr_draws[, data.frame(q = qcols, p = quantile(.value, probs = qprobs)), by = .(contrast)]
    contr_quantiles <- dcast(contr_quantiles, contrast ~ q, value.var = 'p')
    
    emm_quantiles <- emm_draws[, data.frame(q = qcols, p = quantile(.value, probs = qprobs)), by = .(Genotype)]
    emm_quantiles <- dcast(emm_quantiles, Genotype ~ q, value.var = 'p')
  } else {
    contr_quantiles <- contr_draws[, data.frame(q = qcols, p = quantile(.value, probs = qprobs)), by = c(by_var, 'contrast')]
    contr_quantiles <- dcast(contr_quantiles, paste(by_var, '+ contrast ~ q'), value.var = 'p')
    
    emm_quantiles <- emm_draws[, data.frame(q = qcols, p = quantile(.value, probs = qprobs)), by = c(by_var, 'Genotype')]
    emm_quantiles <- dcast(emm_quantiles, paste(by_var, '+ Genotype ~ q'), value.var = 'p')
  }
  
  emm_dat <- as.data.frame(emm)
  if (is.na(by_var)) {
    # Generate contrast letters
    means_order <- emm_dat[['Genotype']][order(emm_dat[['emmean']])]
    contr_letters <- contrast_letters_v2(contr_quantiles, means_order)
    
    # Generate data for plotting
    emm_quantiles <- emm_quantiles[order(q0.5)]
    emm_quantiles[, Genotype := factor(Genotype, levels = as.character(means_order))]
    emm_quantiles[, group := contr_letters$Letters]
    
  } else {
    emm_dat <- group_nest_dt(as.data.table(emm_dat), group_vars = by_var)
    emm_dat[, means_order := map(data, ~ .[['Genotype']][order(.[['emmean']])])]
    
    contr_letters <- group_nest_dt(contr_quantiles, group_vars = by_var)
    contr_letters[, letters := map2(data, emm_dat$means_order, ~ contrast_letters_v2(.x, levels_ordered = .y))]
    
    emm_quantiles <- emm_quantiles[order(get(by_var), q0.5)]
    emm_quantiles[, Genotype := factor(Genotype, levels = as.character(emm_dat$means_order[[1]]))]
    emm_quantiles[, group := do.call(c, map(contr_letters$letters, 'Letters'))]
    
  }
  
  # If the log1p transformation has led to a value of less than zero, correct to a small value (for plotting purposes only)
  emm_quantiles[, (qcols) := lapply(.SD, function(x) ifelse(x > 0, expm1(x), 0.1)), .SDcols = qcols]
  
  return(emm_quantiles)
  
}

contrast_letters_v2 <- function(contrasts, levels_ordered, scale = 'additive') {
  
  if (scale == 'additive') {
    sep <- ' - '
    cutoff <- 0
  } else {
    sep <- ' / '
    cutoff <- 1
  }
  
  message(paste0('Using', sep, 'as separator.'))
  comps <- tidyr::separate(contrasts, contrast, into = c('x', 'y'), sep = sep)
  comps_mat <- as.matrix(comps[, c('x', 'y')])

  different <- with(comps, q0.025 > cutoff | q0.975 < cutoff)

  multcomp:::insert_absorb(different,
                           decreasing=FALSE,
                           comps = comps_mat,
                           lvl_order = levels_ordered)
}

qprobs <- c(0.5, 0.025, 0.975, 0.05, 0.95, 0.17, 0.83)
qcols <- paste0('q', qprobs)

Setup

Load packages. Define paths to directories (these may need to be changed).

library(data.table)
library(purrr)
library(ggplot2)
library(emmeans)
library(multcomp)
library(brms)
library(Rutilitybelt)
library(tidybayes)
library(performance)
library(ggtext)
library(ragg)
library(ggrepel)
library(patchwork)

data_dir <- 'data'
model_fits_dir <- 'brmfits'
out_dir <- 'MS'

Set plotting theme and brms options.

theme_set(
  theme_bw(base_family = 'Franklin Gothic Book') + 
    theme(panel.grid = element_blank(),
          strip.background = element_blank())
)

options(mc.cores = 4, brms.backend = 'cmdstanr', brms.file_refit = 'never')

Load the data and do some processing of the categorical columns so the factors are in the right format for fitting the models.

resistant_g <- c('Mp715', 'Mp717', 'Mp719', 'Mp313E')
susceptible_g <- c('B73', 'T173', 'Va35')

walk(list.files(data_dir, pattern = 'tsv', full.names = TRUE),
     ~ assign(gsub('\\.tsv', '', basename(.)), 
              fread(., na.strings = '.', stringsAsFactors = TRUE), envir = .GlobalEnv))

setnames(step1_ksa_field, c('REP', 'Year'), c('rep', 'year'))
step1_ksa_field[, year := as.numeric(gsub('MS_', '', year))]
step1_ksa_field[, rep_year := paste(rep, year, sep = '_')]

setnames(step1_ksa, 'Year', 'year')
step1_ksa[, rep_year := paste(rep, year, sep = '_')]

factor_cols <- c('rep', 'rep_year')
step1_ksa_field[, (factor_cols) := lapply(.SD, factor), .SDcols = factor_cols]

factor_cols <- c('subsample', 'rep', 'rep_year')
step1_ksa[, (factor_cols) := lapply(.SD, factor), .SDcols = factor_cols]

Combine field and lab data into a single object.

step1_ksa_field[, Trt := 'field']
setnames(step1_ksa_field, old = 'ENTRY', new = 'Entry')
field_lab_combined <- rbind(step1_ksa, step1_ksa_field, use.names = TRUE, fill = TRUE)

Fit and compare models

Base models

Define fixed-effect prior, model formulas, and fitting options.

beta_prior <- c(
  prior('student_t(3, 0, 5)', class = 'b')
)

nchain <- 4
niter <- 10000
nwarm <- 7500

formula_equalv <- bf(log1p(aflatoxin) ~ Trt + Genotype + Genotype:Trt + (1 | year) + (1 | year:rep))
formula_unequalv <- bf(log1p(aflatoxin) ~ Trt + Genotype + Genotype:Trt + (1 | year) + (1 | year:rep),
                       sigma ~ Trt)

Fit four models to the combined data: 2x2 combination of equal and unequal variance by assay type, and Gaussian (normal) and Student t error distribution.

combined_models <- CJ(formula = list(formula_equalv, formula_unequalv),
                      family = list(gaussian, student),
                      sorted = FALSE)

model_file_names <- CJ(variance = c('equalv', 'unequalv'),
                       family = c('norm', 'stud'))

combined_models[, seed := round(seq(555, 999, length.out = .N))]
combined_models[, file_name := paste0('combined_', apply(model_file_names, 1, paste, collapse = '_'))]

combined_models[, fit := pmap(combined_models, function(formula, family, seed, file_name) {
  brm(formula = formula,
      data = field_lab_combined,
      family = family,
      chains = nchain, iter = niter, warmup = nwarm, cores = nchain,
      prior = beta_prior,
      control = list(adapt_delta = 0.9),
      file = file.path(model_fits_dir, file_name),
      seed = seed)
})
]

Check convergence diagnostics (both maximum R-hat and minimum effective sample size). All are acceptable.

combined_models[, all_pars := map(fit, get_all_pars)]
combined_models[, max_Rhat := map_dbl(all_pars, ~ max(.$Rhat))]
combined_models[, min_ESS := map_dbl(all_pars, ~ min(.$Bulk_ESS))]

combined_models[, .(file_name, max_Rhat, min_ESS)]

##                 file_name max_Rhat   min_ESS
## 1:   combined_equalv_norm 1.003522 1314.6895
## 2:   combined_equalv_stud 1.018759  261.7477
## 3: combined_unequalv_norm 1.004165  952.7383
## 4: combined_unequalv_stud 1.004145 1164.3825

Do model comparison using leave-one-out cross-validation. We see that the model with unequal variance and Student (fat-tailed) distribution vastly outperforms the other models.

combined_models[, fit := map(fit, add_criterion, criterion = 'loo')]

loo_compare(combined_models$fit[[1]], combined_models$fit[[2]], combined_models$fit[[3]], combined_models$fit[[4]],
            model_names = c('equal var., normal', 'equal var., Student', 'unequal var., normal', 'unequal var., Student'))

##                       elpd_diff se_diff
## unequal var., Student    0.0       0.0 
## unequal var., normal  -123.4      31.6 
## equal var., Student   -139.7      18.0 
## equal var., normal    -278.9      22.3

Output degrees of freedom of t-distribution, with its 95% CI.

combined_models$all_pars[[4]]['nu', c('Estimate', 'l-95% CI', 'u-95% CI')] |> round(3)

##    Estimate l-95% CI u-95% CI
## nu    3.762    3.001    4.756

Model with G by E interaction

Now that we have established the appropriate response distribution and that it’s better to model unequal variance by assay type, compare this base model with a model with genotype by environment (year) terms included.

formula_gxe <- bf(
  log1p(aflatoxin) ~ Trt + Genotype + Genotype:Trt + year + Genotype:year + Trt:year + (1 | year:rep),
  sigma ~ Trt
)

fit_gxe <- brm(formula = formula_gxe,
                data = field_lab_combined,
                family = student,
                chains = nchain, iter = 10000, warmup = 7500, cores = nchain,
                prior = beta_prior,
                control = list(adapt_delta = 0.9),
                file = file.path(model_fits_dir, 'combined_unequalv_stud_gxe'),
                seed = 153)

fit_gxe <- add_criterion(fit_gxe, criterion = 'loo')

Ensure this model converged by checking the R-hat and effective sample size diagnostics.

summgxe <- summary(fit_gxe)
pars_gxe <- with(summgxe, rbind(fixed, do.call(rbind, random), spec_pars, cor_pars))
max(pars_gxe$Rhat)

## [1] 1.003759

min(pars_gxe$Bulk_ESS)

## [1] 807.8259

Show that the interaction parameters are all roughly zero. All are centered within about 0.002 of zero and the 95% credible intervals extend well on either side of zero.

pars_gxe[grepl("Genotype(.*)year", row.names(pars_gxe)), c('Estimate', 'l-95% CI', 'u-95% CI')]

##                          Estimate     l-95% CI    u-95% CI
## GenotypeB73:year    -1.170233e-03 -0.008446260 0.006342036
## GenotypeF118:year   -5.076556e-04 -0.008184922 0.007135823
## GenotypeHBA1:year    6.377089e-04 -0.006776858 0.008302850
## GenotypeLH195:year  -6.232560e-04 -0.009179716 0.007864279
## GenotypeLH197:year  -5.827482e-04 -0.008262603 0.006629261
## GenotypeLH198:year  -9.267822e-04 -0.009262531 0.007838097
## GenotypeMM402A:year  4.155107e-04 -0.007407449 0.009229018
## GenotypeMp313E:year  5.219872e-05 -0.008093973 0.008111655
## GenotypeMp715:year  -2.032697e-03 -0.009701318 0.005764282
## GenotypeMp717:year  -8.974895e-04 -0.008728424 0.006819258
## GenotypeMp719:year  -1.207311e-03 -0.009584780 0.007097633
## GenotypePHN46:year  -8.724167e-04 -0.008472819 0.006782944
## GenotypePHW79:year   3.816266e-04 -0.008189989 0.008187762
## GenotypeT173:year    3.746820e-04 -0.007338050 0.010889942
## GenotypeVa35:year   -9.505336e-04 -0.008662017 0.006357629

Additionally, show that the model with GxE is not an improvement over the model without. The information criteria are essentially identical (difference of 0.1 with a standard error of 0.3 for the difference) so we can’t say it’s an improvement to add the interactions.

loo_compare(fit_gxe, combined_models$fit[[4]], model_names = c('GxE', 'no GxE'))

##        elpd_diff se_diff
## no GxE  0.0       0.0   
## GxE    -0.1       0.3

Model with secondary infection covariates

We are also interested in the effect of the Aspergillus and Fusarium infection covariates. To our previously identified best model, we add fixed effects for each of those covariates. These data are not available for the field study, so we have to limit this comparison to the lab data.

First join the covariate data with the lab data. There is no infection covariate data for the field data.

ksa_covs <- ksa_cov_mod[, .(Sample,Genotype,Entry,Year,Trt,subsample,rep,Asp,Fus)]
setnames(ksa_covs, old = 'Year', new = 'year') # For consistency
ksa_covs[, subsample := factor(subsample)]
ksa_covs[, rep := factor(rep)]

ksa_withcov <- step1_ksa[ksa_covs, on = .(Sample,Genotype,Entry,year,Trt,subsample,rep)]

Fit a model to the lab data only with and without the infection covariates.

beta_prior <- c(
  prior('student_t(3, 0, 5)', class = 'b')
)

formula_unequalv_covs <- bf(log1p(aflatoxin) ~ Trt + Genotype + Genotype:Trt + log1p(Asp) + log1p(Fus) + (1 | year) + (1 | year:rep),
                              sigma ~ Trt)

fit_nocov <- brm(formula = formula_unequalv, 
                   data = ksa_withcov, family = student, prior = beta_prior,
                   chains = 4, iter = 20000, warmup = 17500, cores = 4, seed = 1422,
                   control = list(adapt_delta = 0.9),
                   file = file.path(model_fits_dir, 'unequalv_stud_nocovariates'))

fit_withcov <- brm(formula = formula_unequalv_covs, 
                   data = ksa_withcov, family = student, prior = beta_prior,
                   chains = 4, iter = 20000, warmup = 17500, cores = 4, seed = 1423,
                   control = list(adapt_delta = 0.9),
                   file = file.path(model_fits_dir, 'unequalv_stud_covariates'))

Check convergence diagnostics of these models.

pars_nocov <- get_all_pars(fit_nocov)
pars_withcov <- get_all_pars(fit_withcov)

max(pars_nocov$Rhat)

## [1] 1.003752

max(pars_withcov$Rhat)

## [1] 1.005771

min(pars_nocov$Bulk_ESS)

## [1] 1446.397

min(pars_withcov$Bulk_ESS)

## [1] 956.1441

Compare the model to the “best model” without the infection covariates. Again the two models are close to identical (the standard error of the difference in ELPD is quite a bit greater than the difference in ELPD). Thus there is not good support for adding the covariates.

fit_nocov <- add_criterion(fit_nocov, 'loo')
fit_withcov <- add_criterion(fit_withcov, 'loo')

loo_compare(fit_withcov, fit_nocov, model_names = c('with covariates', 'without covariates'))

##                    elpd_diff se_diff
## with covariates     0.0       0.0   
## without covariates -1.7       2.9

Model predictions

Estimated marginal means and contrasts

Generate contrasts from the combined model with no covariates or G by E interaction included.

contr_combined <- get_contrasts(combined_models$fit[[4]], by_var = 'Trt')

Plot: field results

# Change genotype order in both datasets so that they are increasing in the field figure.
# Indicate resistance using a bold label next to the genotype.
field_incr_order <- contr_combined[Trt %in% 'field'][order(q0.5)][['Genotype']] |> as.character()

field_incr_order_withlabels <- fcase(
  field_incr_order %in% resistant_g, paste(field_incr_order, '<span style="color: #E69F00"><b>R</b></span>'),
  field_incr_order %in% susceptible_g, paste(field_incr_order, '<span style="color: #56B4E9"><b>S</b></span>'),
  !field_incr_order %in% c(resistant_g, susceptible_g), field_incr_order
)

field_lab_combined[, Genotype := factor(Genotype, levels = field_incr_order, labels = field_incr_order_withlabels)]
contr_combined[, Genotype := factor(Genotype, levels = field_incr_order, labels = field_incr_order_withlabels)]


# Indicate legend color using only text, so there is no confusing it with another point
year_color_scale <- scale_color_manual(
  name = 'year',
  values = c('#009E73','#D55E00'),
  labels = c('<span style="color: #009E73"><b>2017</b></span>', '<span style="color: #D55E00"><b>2018</b></span>')
)

pmeanfield <- ggplot(contr_combined[Trt %in% 'field'], aes(x = Genotype)) +
  geom_point(aes(y = aflatoxin, color = factor(year), group = interaction(Genotype, factor(year))), data = field_lab_combined[Trt %in% 'field'],
             position = position_dodge(width = 0.7), alpha = 0.3) +
  geom_errorbar(aes(ymin = q0.025, ymax = q0.975), width = 0.2) +
  geom_errorbar(aes(ymin = q0.17, ymax = q0.83), width = 0, size = 1.5) +
  geom_point(aes(y = q0.5), size = 3) +
  geom_text(aes(label = group, y = q0.975 * 1.5), family = 'Franklin Gothic Book') +
  geom_richtext(aes(label = lbl, x = xpos, y = ypos), 
                data = data.frame(lbl = c('year', '<span style="color: #009E73"><b>2017</b></span>', '<span style="color: #D55E00"><b>2018</b></span>'), xpos = 1.2, ypos = c(25000, 15000, 10000)), 
                family = 'Franklin Gothic Book',
                label.color = NA, fill = NA) +
  year_color_scale +
  scale_y_continuous(name = 'aflatoxin (ng g<sup>-1</sup>)', trans = 'log1p', breaks = c(1, 3, 10, 30, 100, 300, 1000, 3000, 10000, 30000)) +
  theme(legend.position = 'none',
        axis.text.x = element_markdown(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_markdown()) 

pmeanfield

Plot: lab results

pmeanlab <- ggplot(contr_combined[!Trt %in% 'field'], aes(x = Genotype)) +
  geom_point(aes(y = aflatoxin, color = factor(year), group = interaction(Genotype, factor(year))), data = field_lab_combined[!Trt %in% 'field'],
             position = position_dodge(width = 0.7), alpha = 0.3) +
  geom_errorbar(aes(ymin = q0.025, ymax = q0.975), width = 0.2) +
  geom_errorbar(aes(ymin = q0.17, ymax = q0.83), width = 0, size = 1.5) +
  geom_point(aes(y = q0.5), size = 3) +
  geom_text(aes(label = group, y = q0.975 * 1.5), family = 'Franklin Gothic Book') +
  geom_richtext(aes(label = lbl, x = xpos, y = ypos), 
                data = data.frame(Trt = 'wound', lbl = c('year', '<span style="color: #009E73"><b>2017</b></span>', '<span style="color: #D55E00"><b>2018</b></span>'), xpos = 15.5, ypos = c(3, 1.3, .3)), 
                family = 'Franklin Gothic Book', label.color = NA, fill = NA) +
  scale_color_manual(name = 'year', values = c('#009E73','#D55E00')) +
  scale_y_continuous(name = 'aflatoxin (ng g<sup>-1</sup>)', trans = 'log1p', breaks = c(1, 3, 10, 30, 100, 300, 1000, 3000, 10000, 30000)) +
  facet_wrap(~ Trt, labeller = labeller(Trt = c(no = 'KSA, non-wounding', wound = 'KSA, wounding'))) +
  theme(legend.position = 'none',
        axis.text.x = element_markdown(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_markdown()) +
  geom_text(aes(x = 0, y = Inf, label = lbl), data = data.frame(Trt = c('no', 'wound'), lbl = c('(a)', '(b)')),
            family = 'Franklin Gothic Book', hjust = -0.5, vjust = 1.2, size = 4)

pmeanlab

Plot: correlation of means between field assay and KSA

# Get estimated marginal means by genotype and method (all posterior draws)
emm_all <- emmeans(combined_models$fit[[4]], ~ Genotype + Trt) |> 
  gather_emmeans_draws() |>
  setDT() |>
  dcast(.draw + Genotype ~ Trt, value.var = '.value')

# Calculate medians and endpoints of error bars

# Get slope and credible interval, then get draws of predicted values
pred_x <- seq(0, 11, length.out = 101)

pairwise_lms <- group_nest_dt(emm_all, .draw)
pairwise_lms[, lm_fieldno := map(data, ~ lm(field ~ no, data = .))]
pairwise_lms[, lm_fieldwound := map(data, ~ lm(field ~ wound, data = .))]

pairwise_lms[, `:=`(intercept_no = map_dbl(lm_fieldno, ~ coefficients(.)[1]),
                    slope_no = map_dbl(lm_fieldno, ~ coefficients(.)[2]),
                    intercept_wound = map_dbl(lm_fieldwound, ~ coefficients(.)[1]),
                    slope_wound = map_dbl(lm_fieldwound, ~ coefficients(.)[2]))]

pairwise_lms[, pred_fieldno := map(lm_fieldno, predict, newdata = data.frame(no = pred_x))]
pairwise_lms[, pred_fieldwound := map(lm_fieldwound, predict, newdata = data.frame(wound = pred_x))]

emm_all_long <- melt(emm_all, id.vars = c('Genotype', '.draw'), measure.vars = c('field', 'no', 'wound'), variable.name = 'method')
emm_all_long[, value := expm1(value)]
emm_all_summ <- emm_all_long[, .(median = quantile(value, .5), cilow = quantile(value, .025), cihigh = quantile(value, .975)), by = .(Genotype, method)]
emm_all_summ_wide <- dcast(emm_all_summ, Genotype ~ method, value.var = c('median', 'cilow', 'cihigh'))

trend_dat_fieldno <- data.table(
  no = expm1(pred_x),
  field = expm1(unlist(pairwise_lms$pred_fieldno))
)
trend_dat_fieldno_summ <- trend_dat_fieldno[, .(median = quantile(field, .5), cilow = quantile(field, .025), cihigh = quantile(field, .975)), by = .(no)]

trend_dat_fieldwound <- data.table(
  wound = expm1(pred_x),
  field = expm1(unlist(pairwise_lms$pred_fieldwound))
)
trend_dat_fieldwound_summ <- trend_dat_fieldwound[, .(median = quantile(field, .5), cilow = quantile(field, .025), cihigh = quantile(field, .975)), by = .(wound)]

# Assign resistance classifications for the plot color scheme.
emm_all_summ_wide[, resistance := fcase(
  Genotype %in% resistant_g, 'resistant',
  Genotype %in% susceptible_g, 'susceptible',
  !Genotype %in% c(resistant_g, susceptible_g), 'none'
)]

# Get summary stats for figure
pairwise_summ_stats <- pairwise_lms[, lapply(.SD, quantile, probs = c(.5, .025, .975)), 
                                    .SDcols = c('intercept_no','slope_no','intercept_wound','slope_wound')]
pairwise_summ_stats <- cbind(quantile = c('median', '95% CI lower', '95% CI upper'), pairwise_summ_stats)

pleft <- ggplot(emm_all_summ_wide) +
  geom_ribbon(aes(x=no, ymin=cilow, ymax=cihigh), data = trend_dat_fieldno_summ[no > 10 & no < 10000], fill = 'gray90', color = NA) +
  geom_line(aes(x=no, y=median), data = trend_dat_fieldno_summ[no > 10 & no < 10000], color = 'gray25') +
  geom_point(aes(x = median_no, y = median_field)) +
  geom_point(aes(x = median_no, y = median_field, color = resistance), data = emm_all_summ_wide[!resistance %in% 'none']) +
  geom_text_repel(aes(x = median_no, y = median_field, label = Genotype), family = 'Franklin Gothic Book', size = 3) +
  scale_x_log10(name = 'aflatoxin (ng g<sup>-1</sup>)<br>KSA, non-wounding') + scale_y_log10(name = 'aflatoxin (ng g<sup>-1</sup>)<br>Field, side-needle') +
  theme(legend.position = c(1, 0), legend.justification = c(1, 0), legend.background = element_blank(), legend.title = element_blank(), 
        axis.title.x = element_markdown(), axis.title.y = element_markdown()) +
  see::scale_color_okabeito() +
  annotate(geom = 'text', x = 0, y = Inf, label = 'a', family = 'Franklin Gothic Book', hjust = -0.5, vjust = 1.2, size = 4)

pright <- ggplot(emm_all_summ_wide) +
  geom_ribbon(aes(x=wound, ymin=cilow, ymax=cihigh), data = trend_dat_fieldwound_summ[wound > 500 & wound < 6000], fill = 'gray90', color = NA) +
  geom_line(aes(x=wound, y=median), data = trend_dat_fieldwound_summ[wound > 500 & wound < 6000], color = 'gray25') +
  geom_point(aes(x = median_wound, y = median_field)) +
  geom_point(aes(x = median_wound, y = median_field, color = resistance), data = emm_all_summ_wide[!resistance %in% 'none']) +
  geom_text_repel(aes(x = median_wound, y = median_field, label = Genotype), family = 'Franklin Gothic Book', size = 3) +
  scale_x_log10(name = 'aflatoxin (ng g<sup>-1</sup>)<br>KSA, wounding') + scale_y_log10(name = 'aflatoxin (ng g<sup>-1</sup>)<br>Field, side-needle') +
  theme(legend.position = 'none', axis.title.y = element_blank(), axis.text.y = element_blank(),
        axis.title.x = element_markdown()) +
  see::scale_color_okabeito() +
  annotate(geom = 'text', x = 0, y = Inf, label = 'b', family = 'Franklin Gothic Book', hjust = -0.5, vjust = 1.2, size = 4)

pcombined <- pleft | pright + plot_layout(widths = c(2, 1))

pcombined

Model performance

We assess model performance using the Bayesian R-squared, partitioned into marginal and conditional R-squared. You can see that the random effects of replicate and year don’t add a lot to the model’s explanatory power. Basically everything is the fixed effect of genotype (and covariates such as pre-infection and treatment if present). This is a good thing for our study design. This is further evidence that the infection covariates aren’t very important because they do not increase the fixed-effect portion of the R-squared at all.

r2_combined <- r2(combined_models$fit[[4]])
r2_lab_nocov <- r2(fit_nocov)
r2_lab_withcov <- r2(fit_withcov)

# Generate a table
r2_table <- map2_dfr(list(r2_combined, r2_lab_nocov, r2_lab_withcov), c('combined lab and field, no covariates', 'lab, no covariates', 'lab, with infection covariates'),
                     ~ data.table(method = .y, as.data.frame(.x))) 

dcast(r2_table[, .(method, Component, R2)], method ~ Component)[
  , .(method, marginal, conditional)] |>
  knitr::kable(col.names = c('method', 'marginal R-squared (fixed effects of genotype and covariates)', 'conditional R-squared (fixed effects + random effects)'),
               digits = 3)

method	marginal R-squared (fixed effects of genotype and covariates)	conditional R-squared (fixed effects + random effects)
combined lab and field, no covariates	0.443	0.453
lab, no covariates	0.409	0.418
lab, with infection covariates	0.411	0.419

Correlation between assay methods

Here we calculate pairwise correlations between the estimated marginal mean values between field, lab no-wound method, and lab wound method. We show that there are reasonably good correlations between the lab methods but not very high correlation between either lab method and the field method.

all_cors <- emm_all[, .(
  nowound_wound = cor(no, wound, method = 'pearson'),
  nowound_field = cor(no, field, method = 'pearson'),
  wound_field = cor(field, wound, method = 'pearson')
), by = .(.draw)]

all_cors <- melt(all_cors, id.vars = '.draw', value.name = 'corr') |>
  tidyr::separate(variable, into = c('var1', 'var2')) 
setDT(all_cors)

corr_summ <- all_cors[, .(q = qcols, v = quantile(corr, probs = qprobs)), by = .(var1, var2)]
corr_summ <- dcast(corr_summ, var1 + var2 ~ q)

corr_summ[, .(var1, var2, q0.5, q0.025, q0.975)] |>
  knitr::kable(col.names = c('method 1', 'method 2', 'Pearson correlation', '95% CI lower', '95% CI upper'), digits = 3)

method 1	method 2	Pearson correlation	95% CI lower	95% CI upper
nowound	field	0.383	0.241	0.510
nowound	wound	0.797	0.696	0.878
wound	field	0.561	0.458	0.653

Repeatability analysis (ICC)

Next, we compare the repeatability of the different methods by looking at the intraclass correlation coefficients (ICCs) for the different random effects. We must fit separate models for this analysis: each of the three assay methods must be fit with a separate model, and genotype is treated as a random effect instead of fixed as in the previous analysis. For the two lab assay methods, a model is fit with and without covariates.

intercept_prior <- c(
  prior('gamma(1, 1)', class = 'sd')
)

formula_grandom_onetrt_covs <- bf(log1p(aflatoxin) ~ log1p(Asp) + log1p(Fus) + (1 | Genotype) + (1 | year) + (1 | year:rep))
formula_grandom_onetrt_nocovs <- bf(log1p(aflatoxin) ~ (1 | Genotype) + (1 | year) + (1 | year:rep))

fit_nowound_withcov <- brm(formula = formula_grandom_onetrt_covs, 
                           data = ksa_withcov[Trt %in% 'no'], family = student, prior = c(beta_prior, intercept_prior),
                           chains = 4, iter = 12500, warmup = 10000, seed = 222,
                           file = file.path(model_fits_dir, 'nowound_withcovs'))
fit_nowound_nocov <- brm(formula = formula_grandom_onetrt_nocovs, 
                         data = ksa_withcov[Trt %in% 'no'], family = student, prior = intercept_prior,
                         chains = 4, iter = 12500, warmup = 10000, seed = 333,
                         file = file.path(model_fits_dir, 'nowound_nocovs'))

fit_wound_withcov <- brm(formula = formula_grandom_onetrt_covs, 
                         data = ksa_withcov[Trt %in% 'wound'], family = student, prior = c(beta_prior, intercept_prior),
                         chains = 4, iter = 12500, warmup = 10000, seed = 1222,
                         file = file.path(model_fits_dir, 'wound_withcovs'))
fit_wound_nocov <- brm(formula = formula_grandom_onetrt_nocovs, 
                       data = ksa_withcov[Trt %in% 'wound'], family = student, prior = prior(gamma(5, 5), class = sd),
                       chains = 4, iter = 12500, warmup = 10000, seed = 1223,
                       file = file.path(model_fits_dir, 'wound_nocovs'))

fit_field_grandomintercepts <- brm(log1p(aflatoxin) ~ (1 | Genotype) + (1 | year) + (1 | year:rep), data = step1_ksa_field,
                                   family = student, 
                                   chains = 4, iter = 12500, warmup = 10000, seed = 101,
                                   file = file.path(model_fits_dir, 'field_grandomintercepts'))

Check convergence diagnostics of these models.

pars_icc_fits <- map(list(fit_nowound_withcov, fit_nowound_nocov, fit_wound_withcov, fit_wound_nocov, fit_field_grandomintercepts), get_all_pars)

maxrhats <- map_dbl(pars_icc_fits, ~ max(.$Rhat))
miness <- map_dbl(pars_icc_fits, ~ min(.$Bulk_ESS))

data.frame(model = c('no wound, with covariates', 'no wound, no covariates', 'wound, with covariates', 'wound, no covariates', 'field'),
           Rhat = maxrhats, ESS = miness) |>
  knitr::kable(digits = 3)

model	Rhat	ESS
no wound, with covariates	1.003	2028.255
no wound, no covariates	1.001	1885.373
wound, with covariates	1.015	344.869
wound, no covariates	1.001	2187.798
field	1.003	1587.759

Calculate ICC values for each of the models, using a simple variance decomposition method. Obtain a 95% quantile credible interval.

get_icc <- function(fit) {
  draws <- as_draws_df(fit)
  sigmas <- draws[, c(grep('^sd', names(draws),value=TRUE), 'sigma')]
  icc_draws <- apply(sigmas, 2, function(sigma) sigma^2/rowSums(sigmas^2))
  t(apply(icc_draws[, 1:3], 2, quantile, probs = c(0.5, 0.025, 0.975)))
}

icc_df <- map(list(fit_field_grandomintercepts, fit_wound_nocov, fit_nowound_nocov, fit_wound_withcov, fit_nowound_withcov), get_icc)

make_icc_table <- function(iccs) {
  c(glue::glue('{round(iccs[,1], 3)} ({round(iccs[,2], 3)}, {round(iccs[,3], 3)})')) |>
    setNames(c('genotype', 'year', 'replicate')) |> t() |>
    as.data.frame()
}

icc_table <- cbind(
  method = c('field', 'lab, wound', 'lab, no wound', 'lab, wound', 'lab, no wound'),
  covariates = c('no', 'no', 'no', 'yes', 'yes'),
  map_dfr(icc_df, make_icc_table)
)

knitr::kable(icc_table, col.names = c('method', 'infection covariates included', 'genotype', 'year', 'replicate'))

method	infection covariates included	genotype	year	replicate
field	no	0.797 (0.251, 0.938)	0.087 (0.001, 0.706)	0.002 (0, 0.052)
lab, wound	no	0.278 (0.097, 0.554)	0.341 (0.054, 0.746)	0.113 (0.021, 0.447)
lab, no wound	no	0.293 (0.144, 0.498)	0.059 (0.001, 0.441)	0.011 (0, 0.111)
lab, wound	yes	0.471 (0.121, 0.707)	0.051 (0, 0.74)	0.053 (0.01, 0.288)
lab, no wound	yes	0.318 (0.168, 0.523)	0.03 (0, 0.36)	0.01 (0, 0.097)

Overall means

Means for each method with 95% quantile credible intervals, averaged across genotypes.

all_grand <- emmeans(combined_models$fit[[4]], ~ Trt) %>% gather_emmeans_draws()

setDT(all_grand)
all_grand[, .value := expm1(.value)]

grandmean_table <- all_grand[, median_qi(.value), by = Trt][
  , .(Trt, y, ymin, ymax)]
grandmean_table[ , Trt := c('KSA no wound', 'KSA wound', 'side-needle')]
setnames(grandmean_table, c('method', 'mean', '95% CI lower', '95% CI upper'))

knitr::kable(grandmean_table, digits = 2)

method	mean	95% CI lower	95% CI upper
KSA no wound	261.26	95.93	681.06
KSA wound	1629.80	599.67	4190.31
side-needle	385.14	140.05	993.16

Write tables and figures to files

Format tables for means, pairwise correlations, and ICCs.

format_cis <- function(dat, n = 2) {
  dat[, ci66 := paste0('(', round(q0.17, n), ', ', round(q0.83, n), ')')]
  dat[, ci90 := paste0('(', round(q0.05, n), ', ', round(q0.95, n), ')')]
  dat[, ci95 := paste0('(', round(q0.025, n), ', ', round(q0.975, n), ')')]
}

format_cis(contr_combined)
contr_combined[, median := paste(round(q0.5, 2), group)]

means_table <- contr_combined[, .(Trt, Genotype, median, ci95)] |> dcast(Genotype ~ Trt, value.var = c('median', 'ci95'))

setcolorder(means_table, c('Genotype','median_field','ci95_field','median_no','ci95_no','median_wound','ci95_wound'))
setnames(means_table, c('genotype', 'side-needle mean', 'side-needle 95% CI', 'KSA no wound mean', 'KSA no wound 95% CI', 'KSA wound mean', 'KSA wound 95% CI'))

# Remove color label from genotype mean names.
means_table[, genotype := factor(genotype, levels = levels(genotype), labels = map_chr(strsplit(levels(genotype), ' '), 1))]

# Pairwise correlations with 95% CI
format_cis(corr_summ)

corr_summ[, cor := paste(round(q0.5, 2), ci95)]

corr_2x2 <- corr_summ[, .(var1, var2, cor)] |> dcast(var2 ~ var1)
corr_2x2[, var2 := c('side-needle', 'KSA wound')]
setnames(corr_2x2, c('', 'KSA no wound', 'KSA wound'))

# Intraclass correlation coefficients
# Do not include the covariate models

setDT(icc_table)
icc_3x3 <- icc_table[!covariates %in% 'yes'] 
icc_3x3[, covariates := NULL]
icc_3x3[, method := rev(c('KSA no wound', 'KSA wound', 'side-needle'))]
icc_3x3 <- icc_3x3[3:1]

The following code will not evaluate when the notebook is rendered. Run this code to write the tables to CSV files and write the figures to PNG files.

fwrite(grandmean_table, file.path(out_dir, 'grandmeans.csv'))
fwrite(means_table, file.path(out_dir, 'means_table.csv'))
fwrite(corr_2x2, file.path(out_dir, 'pairwise_correlations.csv'))
fwrite(icc_3x3, file.path(out_dir, 'ICC.csv'))

ggsave(file.path(out_dir, 'fig1.png'), pmeanfield, height = 4, width = 5, dpi = 400, device = agg_png)
ggsave(file.path(out_dir, 'fig2.png'), pmeanlab, height = 4, width = 9, dpi = 400, device = agg_png)
ggsave(file.path(out_dir, 'fig3.png'), pcombined, height = 4, width = 8, dpi = 400, device = agg_png)

KSA analysis

Quentin D. Read

February 26, 2024