#reset R's brain
rm(list=ls())

#install packages for analysis
# install.packages("ggplot2")
#  install.packages("scales")
#  install.packages("rpart")
#  install.packages("rpart.plot")
#  install.packages("gridExtra")
#  install.packages("RColorBrewer")
#  install.packages("viridis")
#  install.packages("plyr")
#  install.packages("hrbrthemes")

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(rpart)
## Warning: package 'rpart' was built under R version 3.6.2
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.3
library(RColorBrewer)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
library(scales)
## Warning: package 'scales' was built under R version 3.6.2
library(drc)
## Warning: package 'drc' was built under R version 3.6.2
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.6.2
## 
## 'drc' has been loaded.
## Please cite R and 'drc' if used for a publication,
## for references type 'citation()' and 'citation('drc')'.
## 
## Attaching package: 'drc'
## The following objects are masked from 'package:stats':
## 
##     gaussian, getInitial
library(plyr) 
## Warning: package 'plyr' was built under R version 3.6.3
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 3.6.2
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see http://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 3.6.3
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
###############################################################################################################################################
###############################################################################################################################################
#basic stats of the dataset SI2
###############################################################################################################################################
###############################################################################################################################################

#stewd tells R where to look for data
setwd("C:/Users/Bettina/Documents/Papers/Validation scale/datasets and analysis/R analysis/data/valscale_data_and_analysis")

#setwd(".../valscale_data_and_analysis")


# load table to analyse basic info
val2 <- read.csv("valscale_basic.csv", sep = "," , header = TRUE)


##############################################################################################################
# broad taxonomic group; figure SI2a

#generate plot colors
colourCount = length(unique(val2$group))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))

ggplot(val2, aes(x=group, fill=group)) +
  theme_bw() +
  theme(text = element_text(size=20))+
  geom_bar(stat="count", fill=getPalette(colourCount))+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  xlab("") +
  ylab("assay count")

##############################################################################################################
# number of citations per assay; figure SI2b

#generate plot colors
colourCount = length(unique(val2$nocit))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))

ggplot(val2, aes(x=as.factor(nocit), fill=as.factor(nocit))) +
  theme_bw() +
  theme(text = element_text(size=18))+
  geom_bar(aes(y = (..count..)/sum(..count..)), fill=getPalette(colourCount)) +
  scale_y_continuous(labels = percent) +
  xlab("number of citations") +
  ylab("relative frequency")

##############################################################################################################
#PCR type; figure SI2c
#generate plot colors
colourCount = length(unique(val2$type))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))

ggplot(val2, aes(x=type, fill=type)) +
  theme_bw() +
  theme(text = element_text(size=18))+
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  scale_y_continuous(labels = percent) +
  scale_fill_brewer(palette="Set3")+
  theme(legend.position = "none")+
  ylab("relative frequency")+
  xlab("")

##############################################################################################################
# target gene; figure SI2e
#generate plot colors
colourCount = length(unique(val2$gene))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))

ggplot(val2, aes(x=gene, fill=gene)) +
  theme_bw() +
  theme(text = element_text(size=15))+
  geom_bar(aes(y = (..count..)/sum(..count..)), fill=getPalette(colourCount)) +
  scale_y_continuous(labels = percent) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  ylab("relative frequency")+
  xlab("")

###############################################################################################################
#eDNA source; figure SI2d

#generate plot colors
colourCount = length(unique(val2$esou))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))

ggplot(val2, aes(x=as.factor(esou), fill=as.factor(esou))) +
  theme_bw() +
  theme(text = element_text(size=18))+
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  scale_fill_brewer(palette="Set3")+
  scale_y_continuous(labels = percent) +
  theme(legend.position = "none")+
  ylab("relative frequency")+
  xlab("")

###############################################################################################################





###############################################################################################################################################
###############################################################################################################################################

#Working out the basic metrics of the valdation scale
# Figure 2 A to E

###############################################################################################################################################
###############################################################################################################################################

# load the validation table; minimum criteria are in virst variable block, all scoring variables follow, total scoring percentage at end of sheet
checkmet <- read.csv("checklist_metrics_1.csv", sep = "," , header = TRUE)
names(checkmet)
##   [1] "ID"                     "spec"                  
##   [3] "common"                 "group"                 
##   [5] "esou"                   "type"                  
##   [7] "gene"                   "rating"                
##   [9] "year"                   "Level"                 
##  [11] "nocit"                  "L_min_crit_perc"       
##  [13] "X"                      "X.1"                   
##  [15] "L1_min_isil_targetspec" "L1_min_tt_tt"          
##  [17] "L1_min._pripro"         "L2_min_pcr_dna"        
##  [19] "L2_min_iv"              "L3_min_ext_meth"       
##  [21] "L3_min_ftpc"            "L3_min_envdet"         
##  [23] "L4_min_lod_lod"         "L4_min_multlosa"       
##  [25] "L4_min_adv_iv"          "L5_min_comp_specplus"  
##  [27] "L5_min_detprob"         "L5_min_ecphy"          
##  [29] "X.2"                    "L5_isil_db"            
##  [31] "L5_isil_targetspec"     "L5_isil_softw"         
##  [33] "L5_isil_softwpar"       "L5_isil_mism"          
##  [35] "L5_isil_mismloc"        "L5_isil_potprob"       
##  [37] "L5_isil_closrel"        "isil_cooc"             
##  [39] "L1_tt_tt"               "L5_tt_ts"              
##  [41] "L5_tt_geosc"            "L5_tt_ind"             
##  [43] "L5_tt_ht"               "L5_tt_conc"            
##  [45] "L5_min._pripro"         "L5_ttp_cyccon"         
##  [47] "L5_ttp_acheck"          "L5_ttp_seq"            
##  [49] "L5_ttp_gene"            "L5_probe"              
##  [51] "L5_dye"                 "L5_iqen"               
##  [53] "L5_quen"                "L5_TMM"                
##  [55] "L5_pcr_vol"             "L5_pcr_pc"             
##  [57] "L5_pcr_commix"          "L5_pcr_custmix"        
##  [59] "L5_pcr_mg"              "L5_pcr_dntp"           
##  [61] "L5_pcr_buff"            "L5_pcr_enz"            
##  [63] "L5_pcr_enha"            "L5_pcr_dna"            
##  [65] "L5_pcr_cycler"          "L5_pcr_annt"           
##  [67] "L5_pcr_anntemp"         "L5_pcr_cyc"            
##  [69] "L5_pcr_tr"              "L5_pcr_opt"            
##  [71] "L5_iv_ind"              "L5_iv_geoorig"         
##  [73] "L5_iv_ts"               "L5_iv_seq"             
##  [75] "L5_iv_conc"             "L5_geosco"             
##  [77] "L5_unilabflow"          "L5_geotest"            
##  [79] "L5_ext_meth"            "L5_ext_modif"          
##  [81] "L5_ext_lysvol"          "L5_ext_nk"             
##  [83] "L5_ext_elu"             "L5_ext_verif"          
##  [85] "L5_proc_vol"            "L5_proc_ft"            
##  [87] "L5_proc_ps"             "L5_proc_diam"          
##  [89] "L5_proc_pressu"         "L5_proc_preserv"       
##  [91] "L5_proc_precichen"      "L5_proc_cent"          
##  [93] "L5_proc_temp"           "L5_proc_stor"          
##  [95] "L5_proc_blanks"         "L5_sam_det"            
##  [97] "L5_sam_type"            "L5_sam_arthab"         
##  [99] "L5_sam_nathab"          "L5_sam_spike"          
## [101] "L5_sam_env"             "L5_fieldsampseq"       
## [103] "L5_lod_lod"             "L5_lod_met"            
## [105] "L5_lod_rep"             "L5_avf_multloc"        
## [107] "L5_avf_notth"           "L5_avf_th"             
## [109] "L5_avf_hist"            "L5_avf_multsam"        
## [111] "L5_avf_envgrad"         "L5_avf_dens"           
## [113] "L5_avf_trad"            "L5_avf_inhi"           
## [115] "L5_adv_iv_geosc"        "L5_adv_iv_socc"        
## [117] "L5_adv_iv_ind"          "L5_adv_iv_geoorig"     
## [119] "L5_adv_iv_ts"           "L5_adv_iv_seq"         
## [121] "L5_adv_iv_conc"         "L5_comp_specplus"      
## [123] "L5_comp_just"           "L5_comp_seq"           
## [125] "L5_stat_time"           "L5_stat_spac"          
## [127] "L5_stat_mod"            "L5_stat_occm"          
## [129] "L5_stat_param"          "L5_env_orig"           
## [131] "L5_env_state"           "L5_env_degr"           
## [133] "L5_env_transp"          "X.3"                   
## [135] "nas"                    "sum"                   
## [137] "sco_perc"
##############################################################################################################
#rating based on the minimum reorting criteria from the validation scale
#Figure 2A
ggplot(checkmet, aes(x=Level, fill=as.factor(Level))) +
  theme_bw() +
  theme(text = element_text(size=16))+
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF"))+
  scale_y_continuous(labels = percent) +
  theme(legend.position = "none")+
  ylab("relative frequency")+
  xlab("validation level based on minimum criteria")

##############################################################################################################
# assays year vs. pessimistic level on the validation scale
#Figure 2B
ggplot(checkmet, aes(x=as.factor(year), fill=as.factor(Level))) + 
  theme_bw() +
  geom_bar(position = "fill")+
  theme(text = element_text(size=16))+
  scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF")) +
  scale_y_continuous(labels = scales::percent_format())+
  ylab("relative frequency")+
  xlab("publication year")+
  theme(legend.position = "none")

##############################################################################################################
#violin plot of assays per level and their total scoring percentage
#Figure2C

#mean and sd function
data_summary <- function(x) {
  m <- mean(x)
  ymin <- m-sd(x)
  ymax <- m+sd(x)
  return(c(y=m,ymin=ymin,ymax=ymax))
}


#violin plot of assay classification based on minimum criteria and total scoring percentage
ggplot(checkmet, aes(x=as.factor(Level), y=sco_perc, fill=as.factor(Level))) + 
  theme_bw() +
  theme(text = element_text(size=16))+
  geom_violin(trim=T)+
  scale_y_continuous(labels = scales::percent_format(), limits=c(0,0.8))+
  stat_summary(fun.data=data_summary, colour="white") +
  scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF"))+
  theme(legend.position = "none")+
  ylab("total scoring percentage")+
  xlab("validation level based on minimum criteria")

###############################################################################################################################
# which minimum criteria cause assays to not make the next level
#Figure 2E

#subset the dataframe into the different levels

l0 <- subset(checkmet, Level == 0)
l1 <- subset(checkmet, Level == 1)
l2 <- subset(checkmet, Level == 2)
l3 <- subset(checkmet, Level == 3)
l4 <- subset(checkmet, Level == 4)

# create dataframe showing which of the level 0 assays DO fulfill each of the L1 minimum criteria (96 level 0 assays)
suml0 <- data.frame(
  level = c(0,0,0),
  min_crit=c("L1_min_isil_targetspec", "L1_min_tt_tt", "L1_min_pripro" ),
  value=c(sum(l0$L1_min_isil_targetspec)/83, sum(l0$L1_min_tt_tt)/83, sum(l0$L1_min._pripro)/83))
# create dataframe showing which of the level 1 assays DO fulfill each of the L2 minimum criteria (166 level 1 assays)
suml1 <- data.frame(
  level = c(1,1),
  min_crit=c("L2_min_pcr_dna", "L2_min_iv" ),
  value=c(sum(l1$L2_min_pcr_dna)/162, sum(l1$L2_min_iv)/162))
# create dataframe showing which of the level 2 assays DO fulfill each of the L3 minimum criteria (87 level 2 assays)
suml2 <- data.frame(
  level = c(2,2,2),
  min_crit=c("L3_min_ext_meth", "L3_min_ftpc", "L3_min_envdet"),
  value=c(sum(l2$L3_min_ext_meth)/89, sum(l2$L3_min_ftpc)/89, sum(l2$L5_min_envdet)/89))
# create dataframe showing which of the level 3 assays DO fulfill each of the L4 minimum criteria (145 level 3 assays)
suml3 <- data.frame(
  level = c(3,3,3),
  min_crit=c("L4_min_lod_lod", "L4_min_multlosa", "L4_min_adv_iv"),
  value=c(sum(l3$L4_min_lod_lod)/155, sum(l3$L4_min_multlosa)/155, sum(l3$L4_min_adv_iv)/155))
# create dataframe showing which of the level 4 assays DO fulfill each of the L5 minimum criteria (52 level 4 assays)
suml4 <- data.frame(
  level = c(4,4,4),
  min_crit=c("L5_min_comp_specplus", "L5_min_detprob", "L5_min_ecphy"),
  value=c(sum(l4$L5_min_comp_specplus)/57, sum(l4$L5_min_detprob)/57, sum(l4$L5_min_ecphy)/57))

# put the 5 little dataframes together into one data frame
tot_summary <- rbind(suml0, suml1, suml2, suml3, suml4) 


# figure showing per level and minimum criterion which percentage of the assays did NOT fullfill the minimum criterion from the next highest level
# plot figure 2E
ggplot(tot_summary, aes(x=min_crit, y = 1-value, fill=as.factor(level))) + 
  theme_bw() +
  geom_bar(stat = "identity")+
  scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF")) +
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  ylab("relative frequency")+
  xlab("minimum criteria") +
  theme(legend.position = "none")

###############################################################################################################################
# plot showing how often a certain variable is scored positive
#only assays for which a variable is acutally useful were used to compute the percentages
#figure 2D


# load the summarized validation table 
valvarsum <- read.csv("val_var_summary.csv", sep = "," , header = TRUE)
names(valvarsum)
## [1] "var"          "old.var.name" "coun"         "perc"        
## [5] "var_level"
# var = variable name
# old.var.name = old variable name not suitable in figures
# coun = number of assays for which a certain variable should be reported
# perc = percentage of these assays actually reporting the variable
# var_level = level on the validation scale the variable is associated with

ggplot(valvarsum, aes(x=reorder(var, desc(perc)+(var_level) , "identity") ,y=perc,  fill=as.factor(var_level))) + 
  theme_bw() +
  geom_bar(stat = "identity")+
  theme(text = element_text(size=16))+
  scale_fill_manual(values = c("#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF")) +
  theme(axis.text.y = element_text(size = 10))+
  scale_y_continuous(labels = scales::percent_format())+
  ylab("relative frequency")+
  xlab("validation variables") +
  theme(legend.position = "none")+
  coord_flip()

###############################################################################################################################################
###############################################################################################################################################

# Bubbleplot in SI4

###############################################################################################################################################
###############################################################################################################################################


bubble <- read.csv("baseforbubbleplot.csv", sep = "," , header = TRUE)
# Level = validation level based on minimum criteria
# rating = subjective rating given by authors
# coun = number of assays per combination of Level and rating
# col = dummy variable for fill color


ggplot(bubble, aes(x=Level, y= rating, size = coun, fill = col ))+
  theme_bw()+
  geom_point(alpha = 0.8, shape=21, color="black")+
  scale_size(range = c(.1, 20)) +
  scale_fill_manual(values = c( "#6633FF", "#CC00CC", "#FF3399", "#FF6633", "#FFCC00", "#FFFFCC")) +
  ylab("subjective validation level")+
  xlab("validation level based on minimum criteria")+
  guides(fill=FALSE, size=guide_legend(title="count"))
## Warning: Removed 15 rows containing missing values (geom_point).

###############################################################################################################################################
###############################################################################################################################################

# CART Analysis

###############################################################################################################################################
###############################################################################################################################################


# load a summary version of the validation table including the block scores
checkmet2 <- read.csv("checklist_metrics_2.csv", sep = "," , header = TRUE)

names(checkmet2)
##  [1] "ID"                               
##  [2] "spec"                             
##  [3] "common"                           
##  [4] "group"                            
##  [5] "esou"                             
##  [6] "type"                             
##  [7] "gene"                             
##  [8] "rating"                           
##  [9] "year"                             
## [10] "Level"                            
## [11] "nocit"                            
## [12] "sco_perc"                         
## [13] "L1_min_isil_targetspec"           
## [14] "L1_min_tt_tt"                     
## [15] "L1_min._pripro"                   
## [16] "L2_min_pcr_dna"                   
## [17] "L2_min_iv"                        
## [18] "L3_min_ext_meth"                  
## [19] "L3_min_ftpc"                      
## [20] "L3_min_envdet"                    
## [21] "L4_min_lod_lod"                   
## [22] "L4_min_multlosa"                  
## [23] "L4_min_adv_iv"                    
## [24] "L5_min_comp_specplus"             
## [25] "L5_min_detprob"                   
## [26] "L5_min_ecphy"                     
## [27] "X"                                
## [28] "in.silico"                        
## [29] "target.tissue"                    
## [30] "PCR"                              
## [31] "in.vitro"                         
## [32] "stray_sco_perc"                   
## [33] "extraction"                       
## [34] "sample.processing"                
## [35] "eDNA.sample"                      
## [36] "LOD"                              
## [37] "field.testing"                    
## [38] "adv.in.vitro"                     
## [39] "comprehensive.specificity.testing"
## [40] "statistics"                       
## [41] "ecological.and.physical.factors"
#calculate the tree (validation level vs. the scoring percentages of the individual variable blocks)
p1 <- rpart(data=checkmet2, Level ~ 
              in.silico +target.tissue + PCR + in.vitro +           
              extraction + sample.processing + eDNA.sample + LOD + field.testing +          
              adv.in.vitro + comprehensive.specificity.testing + statistics + ecological.and.physical.factors ,
            method = "class")

# basic tree diagnostics        
printcp(p1) # display the results
## 
## Classification tree:
## rpart(formula = Level ~ in.silico + target.tissue + PCR + in.vitro + 
##     extraction + sample.processing + eDNA.sample + LOD + field.testing + 
##     adv.in.vitro + comprehensive.specificity.testing + statistics + 
##     ecological.and.physical.factors, data = checkmet2, method = "class")
## 
## Variables actually used in tree construction:
## [1] adv.in.vitro      field.testing     in.silico         in.vitro         
## [5] LOD               PCR               sample.processing target.tissue    
## 
## Root node error: 384/546 = 0.7033
## 
## n= 546 
## 
##          CP nsplit rel error  xerror     xstd
## 1  0.236979      0   1.00000 1.02083 0.027383
## 2  0.151042      1   0.76302 0.79688 0.030202
## 3  0.117188      2   0.61198 0.65885 0.030344
## 4  0.066406      3   0.49479 0.53646 0.029495
## 5  0.039062      5   0.36198 0.42188 0.027797
## 6  0.033854      6   0.32292 0.34375 0.026053
## 7  0.023438      7   0.28906 0.28646 0.024407
## 8  0.018229      8   0.26562 0.28646 0.024407
## 9  0.015625      9   0.24740 0.28385 0.024324
## 10 0.011719     11   0.21615 0.27865 0.024154
## 11 0.010000     13   0.19271 0.27083 0.023895
plotcp(p1) # visualize cross-validation results

summary(p1) # detailed summary of splits
## Call:
## rpart(formula = Level ~ in.silico + target.tissue + PCR + in.vitro + 
##     extraction + sample.processing + eDNA.sample + LOD + field.testing + 
##     adv.in.vitro + comprehensive.specificity.testing + statistics + 
##     ecological.and.physical.factors, data = checkmet2, method = "class")
##   n= 546 
## 
##            CP nsplit rel error    xerror       xstd
## 1  0.23697917      0 1.0000000 1.0208333 0.02738267
## 2  0.15104167      1 0.7630208 0.7968750 0.03020221
## 3  0.11718750      2 0.6119792 0.6588542 0.03034356
## 4  0.06640625      3 0.4947917 0.5364583 0.02949480
## 5  0.03906250      5 0.3619792 0.4218750 0.02779685
## 6  0.03385417      6 0.3229167 0.3437500 0.02605311
## 7  0.02343750      7 0.2890625 0.2864583 0.02440687
## 8  0.01822917      8 0.2656250 0.2864583 0.02440687
## 9  0.01562500      9 0.2473958 0.2838542 0.02432352
## 10 0.01171875     11 0.2161458 0.2786458 0.02415442
## 11 0.01000000     13 0.1927083 0.2708333 0.02389463
## 
## Variable importance
##                        in.vitro                       in.silico 
##                              16                              14 
##                    adv.in.vitro               sample.processing 
##                              12                              11 
##                   target.tissue                             PCR 
##                              10                               9 
##                   field.testing                     eDNA.sample 
##                               9                               7 
##                             LOD                      extraction 
##                               5                               5 
## ecological.and.physical.factors                      statistics 
##                               1                               1 
## 
## Node number 1: 546 observations,    complexity param=0.2369792
##   predicted class=1  expected loss=0.7032967  P(node) =1
##     class counts:    83   162    89   155    57
##    probabilities: 0.152 0.297 0.163 0.284 0.104 
##   left son=2 (186 obs) right son=3 (360 obs)
##   Primary splits:
##       in.vitro          < 0.1        to the left,  improve=57.10230, (0 missing)
##       in.silico         < 0.2678571  to the left,  improve=52.22768, (0 missing)
##       PCR               < 0.7913043  to the right, improve=47.28173, (0 missing)
##       sample.processing < 0.3541667  to the left,  improve=34.63709, (0 missing)
##       target.tissue     < 0.08333333 to the left,  improve=32.34111, (0 missing)
##   Surrogate splits:
##       adv.in.vitro                    < 0.07142857 to the left,  agree=0.766, adj=0.312, (0 split)
##       target.tissue                   < 0.25       to the left,  agree=0.749, adj=0.263, (0 split)
##       in.silico                       < 0.4365079  to the left,  agree=0.727, adj=0.199, (0 split)
##       PCR                             < 0.6381818  to the left,  agree=0.709, adj=0.145, (0 split)
##       ecological.and.physical.factors < 0.125      to the right, agree=0.690, adj=0.091, (0 split)
## 
## Node number 2: 186 observations,    complexity param=0.1171875
##   predicted class=1  expected loss=0.4193548  P(node) =0.3406593
##     class counts:    64   108     2    10     2
##    probabilities: 0.344 0.581 0.011 0.054 0.011 
##   left son=4 (51 obs) right son=5 (135 obs)
##   Primary splits:
##       in.silico         < 0.2916667  to the left,  improve=44.126950, (0 missing)
##       PCR               < 0.7888889  to the left,  improve=37.935480, (0 missing)
##       target.tissue     < 0.08333333 to the left,  improve=26.164370, (0 missing)
##       sample.processing < 0.6904762  to the left,  improve= 4.945116, (0 missing)
##       field.testing     < 0.1944444  to the left,  improve= 3.621505, (0 missing)
##   Surrogate splits:
##       PCR               < 0.3666667  to the left,  agree=0.758, adj=0.118, (0 split)
##       sample.processing < 0.9545455  to the right, agree=0.742, adj=0.059, (0 split)
##       field.testing     < 0.1388889  to the left,  agree=0.731, adj=0.020, (0 split)
## 
## Node number 3: 360 observations,    complexity param=0.1510417
##   predicted class=3  expected loss=0.5972222  P(node) =0.6593407
##     class counts:    19    54    87   145    55
##    probabilities: 0.053 0.150 0.242 0.403 0.153 
##   left son=6 (84 obs) right son=7 (276 obs)
##   Primary splits:
##       sample.processing < 0.3541667  to the left,  improve=44.97246, (0 missing)
##       LOD               < 0.3333333  to the left,  improve=22.81260, (0 missing)
##       eDNA.sample       < 0.1428571  to the left,  improve=22.18717, (0 missing)
##       extraction        < 0.08333333 to the left,  improve=18.66146, (0 missing)
##       field.testing     < 0.05555556 to the left,  improve=17.55539, (0 missing)
##   Surrogate splits:
##       eDNA.sample   < 0.1428571  to the left,  agree=0.908, adj=0.607, (0 split)
##       field.testing < 0.05555556 to the left,  agree=0.850, adj=0.357, (0 split)
##       extraction    < 0.08333333 to the left,  agree=0.844, adj=0.333, (0 split)
##       adv.in.vitro  < 0.9285714  to the right, agree=0.783, adj=0.071, (0 split)
##       in.vitro      < 0.9        to the right, agree=0.772, adj=0.024, (0 split)
## 
## Node number 4: 51 observations
##   predicted class=0  expected loss=0.07843137  P(node) =0.09340659
##     class counts:    47     2     0     1     1
##    probabilities: 0.922 0.039 0.000 0.020 0.020 
## 
## Node number 5: 135 observations,    complexity param=0.03385417
##   predicted class=1  expected loss=0.2148148  P(node) =0.2472527
##     class counts:    17   106     2     9     1
##    probabilities: 0.126 0.785 0.015 0.067 0.007 
##   left son=10 (21 obs) right son=11 (114 obs)
##   Primary splits:
##       target.tissue < 0.08333333 to the left,  improve=20.59159, (0 missing)
##       PCR           < 0.7863636  to the right, improve=12.13041, (0 missing)
##       in.silico     < 0.5634921  to the left,  improve= 6.13870, (0 missing)
##       field.testing < 0.7638889  to the left,  improve= 3.24923, (0 missing)
##       eDNA.sample   < 0.6428571  to the left,  improve= 3.20936, (0 missing)
## 
## Node number 6: 84 observations,    complexity param=0.0390625
##   predicted class=2  expected loss=0.3095238  P(node) =0.1538462
##     class counts:     1    25    58     0     0
##    probabilities: 0.012 0.298 0.690 0.000 0.000 
##   left son=12 (29 obs) right son=13 (55 obs)
##   Primary splits:
##       in.vitro      < 0.3        to the left,  improve=18.352040, (0 missing)
##       PCR           < 0.7809524  to the right, improve=12.959090, (0 missing)
##       target.tissue < 0.4166667  to the left,  improve= 7.882353, (0 missing)
##       extraction    < 0.4166667  to the left,  improve= 7.342105, (0 missing)
##       eDNA.sample   < 0.1428571  to the left,  improve= 5.669340, (0 missing)
##   Surrogate splits:
##       target.tissue < 0.4166667  to the left,  agree=0.821, adj=0.483, (0 split)
##       LOD           < 0.3333333  to the left,  agree=0.810, adj=0.448, (0 split)
##       adv.in.vitro  < 0.2142857  to the left,  agree=0.786, adj=0.379, (0 split)
##       in.silico     < 0.6125     to the left,  agree=0.750, adj=0.276, (0 split)
##       field.testing < 0.3888889  to the right, agree=0.702, adj=0.138, (0 split)
## 
## Node number 7: 276 observations,    complexity param=0.06640625
##   predicted class=3  expected loss=0.4746377  P(node) =0.5054945
##     class counts:    18    29    29   145    55
##    probabilities: 0.065 0.105 0.105 0.525 0.199 
##   left son=14 (122 obs) right son=15 (154 obs)
##   Primary splits:
##       LOD           < 0.3333333  to the left,  improve=22.524580, (0 missing)
##       adv.in.vitro  < 0.07142857 to the left,  improve=20.955390, (0 missing)
##       PCR           < 0.952381   to the right, improve=14.200260, (0 missing)
##       in.silico     < 0.2678571  to the left,  improve=11.980740, (0 missing)
##       field.testing < 0.4097222  to the left,  improve= 8.358417, (0 missing)
##   Surrogate splits:
##       eDNA.sample                       < 0.3571429  to the left,  agree=0.645, adj=0.197, (0 split)
##       field.testing                     < 0.2777778  to the left,  agree=0.616, adj=0.131, (0 split)
##       in.vitro                          < 0.775      to the right, agree=0.609, adj=0.115, (0 split)
##       sample.processing                 < 0.4375     to the left,  agree=0.605, adj=0.107, (0 split)
##       comprehensive.specificity.testing < 0.8333333  to the right, agree=0.601, adj=0.098, (0 split)
## 
## Node number 10: 21 observations
##   predicted class=0  expected loss=0.2380952  P(node) =0.03846154
##     class counts:    16     3     2     0     0
##    probabilities: 0.762 0.143 0.095 0.000 0.000 
## 
## Node number 11: 114 observations
##   predicted class=1  expected loss=0.09649123  P(node) =0.2087912
##     class counts:     1   103     0     9     1
##    probabilities: 0.009 0.904 0.000 0.079 0.009 
## 
## Node number 12: 29 observations,    complexity param=0.01822917
##   predicted class=1  expected loss=0.2413793  P(node) =0.05311355
##     class counts:     0    22     7     0     0
##    probabilities: 0.000 0.759 0.241 0.000 0.000 
##   left son=24 (22 obs) right son=25 (7 obs)
##   Primary splits:
##       in.silico                         < 0.5357143  to the right, improve=10.620690, (0 missing)
##       PCR                               < 0.7636364  to the right, improve=10.620690, (0 missing)
##       eDNA.sample                       < 0.1428571  to the left,  improve= 6.997313, (0 missing)
##       comprehensive.specificity.testing < 0.1666667  to the right, improve= 2.385396, (0 missing)
##       extraction                        < 0.25       to the right, improve= 1.520690, (0 missing)
##   Surrogate splits:
##       PCR               < 0.7636364  to the right, agree=1.000, adj=1.000, (0 split)
##       eDNA.sample       < 0.1428571  to the left,  agree=0.931, adj=0.714, (0 split)
##       target.tissue     < 0.5833333  to the left,  agree=0.897, adj=0.571, (0 split)
##       sample.processing < 0.125      to the left,  agree=0.897, adj=0.571, (0 split)
##       field.testing     < 0.2222222  to the left,  agree=0.897, adj=0.571, (0 split)
## 
## Node number 13: 55 observations
##   predicted class=2  expected loss=0.07272727  P(node) =0.1007326
##     class counts:     1     3    51     0     0
##    probabilities: 0.018 0.055 0.927 0.000 0.000 
## 
## Node number 14: 122 observations,    complexity param=0.015625
##   predicted class=3  expected loss=0.2295082  P(node) =0.2234432
##     class counts:     8     6    14    94     0
##    probabilities: 0.066 0.049 0.115 0.770 0.000 
##   left son=28 (39 obs) right son=29 (83 obs)
##   Primary splits:
##       PCR               < 0.718254   to the left,  improve=11.348960, (0 missing)
##       extraction        < 0.08333333 to the left,  improve=10.277980, (0 missing)
##       sample.processing < 0.9375     to the right, improve= 6.953752, (0 missing)
##       target.tissue     < 0.25       to the left,  improve= 3.693274, (0 missing)
##       adv.in.vitro      < 0.5        to the left,  improve= 3.131007, (0 missing)
##   Surrogate splits:
##       extraction        < 0.08333333 to the left,  agree=0.738, adj=0.179, (0 split)
##       sample.processing < 0.9375     to the right, agree=0.738, adj=0.179, (0 split)
##       in.silico         < 0.4097222  to the left,  agree=0.705, adj=0.077, (0 split)
##       target.tissue     < 0.08333333 to the left,  agree=0.705, adj=0.077, (0 split)
##       field.testing     < 0.7638889  to the right, agree=0.689, adj=0.026, (0 split)
## 
## Node number 15: 154 observations,    complexity param=0.06640625
##   predicted class=4  expected loss=0.6428571  P(node) =0.2820513
##     class counts:    10    23    15    51    55
##    probabilities: 0.065 0.149 0.097 0.331 0.357 
##   left son=30 (54 obs) right son=31 (100 obs)
##   Primary splits:
##       adv.in.vitro  < 0.07142857 to the left,  improve=37.334670, (0 missing)
##       PCR           < 0.952381   to the right, improve= 9.315181, (0 missing)
##       field.testing < 0.4097222  to the left,  improve= 6.575170, (0 missing)
##       in.silico     < 0.2678571  to the left,  improve= 5.860967, (0 missing)
##       in.vitro      < 0.45       to the right, improve= 4.628889, (0 missing)
##   Surrogate splits:
##       field.testing < 0.2777778  to the left,  agree=0.714, adj=0.185, (0 split)
##       target.tissue < 0.25       to the left,  agree=0.695, adj=0.130, (0 split)
##       PCR           < 0.5727273  to the left,  agree=0.675, adj=0.074, (0 split)
##       extraction    < 0.9166667  to the right, agree=0.675, adj=0.074, (0 split)
##       in.silico     < 0.1111111  to the left,  agree=0.662, adj=0.037, (0 split)
## 
## Node number 24: 22 observations
##   predicted class=1  expected loss=0  P(node) =0.04029304
##     class counts:     0    22     0     0     0
##    probabilities: 0.000 1.000 0.000 0.000 0.000 
## 
## Node number 25: 7 observations
##   predicted class=2  expected loss=0  P(node) =0.01282051
##     class counts:     0     0     7     0     0
##    probabilities: 0.000 0.000 1.000 0.000 0.000 
## 
## Node number 28: 39 observations,    complexity param=0.015625
##   predicted class=3  expected loss=0.5897436  P(node) =0.07142857
##     class counts:     6     3    14    16     0
##    probabilities: 0.154 0.077 0.359 0.410 0.000 
##   left son=56 (19 obs) right son=57 (20 obs)
##   Primary splits:
##       in.vitro     < 0.55       to the right, improve=9.856410, (0 missing)
##       extraction   < 0.08333333 to the left,  improve=5.193910, (0 missing)
##       in.silico    < 0.7321429  to the left,  improve=3.652132, (0 missing)
##       statistics   < 0.1        to the left,  improve=3.293252, (0 missing)
##       adv.in.vitro < 0.5        to the left,  improve=3.042125, (0 missing)
##   Surrogate splits:
##       statistics    < 0.1        to the left,  agree=0.744, adj=0.474, (0 split)
##       extraction    < 0.08333333 to the left,  agree=0.692, adj=0.368, (0 split)
##       field.testing < 0.1111111  to the left,  agree=0.692, adj=0.368, (0 split)
##       in.silico     < 0.5634921  to the right, agree=0.641, adj=0.263, (0 split)
##       PCR           < 0.6742424  to the left,  agree=0.641, adj=0.263, (0 split)
## 
## Node number 29: 83 observations
##   predicted class=3  expected loss=0.06024096  P(node) =0.1520147
##     class counts:     2     3     0    78     0
##    probabilities: 0.024 0.036 0.000 0.940 0.000 
## 
## Node number 30: 54 observations
##   predicted class=3  expected loss=0.1296296  P(node) =0.0989011
##     class counts:     6     0     1    47     0
##    probabilities: 0.111 0.000 0.019 0.870 0.000 
## 
## Node number 31: 100 observations,    complexity param=0.0234375
##   predicted class=4  expected loss=0.45  P(node) =0.1831502
##     class counts:     4    23    14     4    55
##    probabilities: 0.040 0.230 0.140 0.040 0.550 
##   left son=62 (9 obs) right son=63 (91 obs)
##   Primary splits:
##       PCR                               < 0.952381   to the right, improve=9.081099, (0 missing)
##       in.vitro                          < 0.45       to the right, improve=7.385457, (0 missing)
##       adv.in.vitro                      < 0.3095238  to the right, improve=7.245153, (0 missing)
##       comprehensive.specificity.testing < 0.1666667  to the right, improve=5.478611, (0 missing)
##       field.testing                     < 0.4097222  to the left,  improve=5.124698, (0 missing)
##   Surrogate splits:
##       field.testing < 0.8819444  to the right, agree=0.94, adj=0.333, (0 split)
## 
## Node number 56: 19 observations
##   predicted class=2  expected loss=0.2631579  P(node) =0.03479853
##     class counts:     0     3    14     2     0
##    probabilities: 0.000 0.158 0.737 0.105 0.000 
## 
## Node number 57: 20 observations
##   predicted class=3  expected loss=0.3  P(node) =0.03663004
##     class counts:     6     0     0    14     0
##    probabilities: 0.300 0.000 0.000 0.700 0.000 
## 
## Node number 62: 9 observations
##   predicted class=1  expected loss=0  P(node) =0.01648352
##     class counts:     0     9     0     0     0
##    probabilities: 0.000 1.000 0.000 0.000 0.000 
## 
## Node number 63: 91 observations,    complexity param=0.01171875
##   predicted class=4  expected loss=0.3956044  P(node) =0.1666667
##     class counts:     4    14    14     4    55
##    probabilities: 0.044 0.154 0.154 0.044 0.604 
##   left son=126 (27 obs) right son=127 (64 obs)
##   Primary splits:
##       field.testing     < 0.4097222  to the left,  improve=7.478531, (0 missing)
##       adv.in.vitro      < 0.6190476  to the right, improve=5.386999, (0 missing)
##       in.silico         < 0.6904762  to the right, improve=4.790820, (0 missing)
##       in.vitro          < 0.45       to the right, improve=4.650121, (0 missing)
##       sample.processing < 0.8819444  to the right, improve=4.027473, (0 missing)
##   Surrogate splits:
##       sample.processing < 0.9444444  to the right, agree=0.769, adj=0.222, (0 split)
##       in.silico         < 1.071429   to the right, agree=0.736, adj=0.111, (0 split)
##       extraction        < 0.25       to the left,  agree=0.736, adj=0.111, (0 split)
##       PCR               < 0.6055556  to the left,  agree=0.714, adj=0.037, (0 split)
## 
## Node number 126: 27 observations,    complexity param=0.01171875
##   predicted class=1  expected loss=0.6666667  P(node) =0.04945055
##     class counts:     2     9     7     3     6
##    probabilities: 0.074 0.333 0.259 0.111 0.222 
##   left son=252 (10 obs) right son=253 (17 obs)
##   Primary splits:
##       in.silico         < 0.6696429  to the left,  improve=4.099782, (0 missing)
##       PCR               < 0.7863636  to the right, improve=3.580897, (0 missing)
##       adv.in.vitro      < 0.6190476  to the right, improve=2.568173, (0 missing)
##       eDNA.sample       < 0.4642857  to the right, improve=2.481481, (0 missing)
##       sample.processing < 0.7840909  to the right, improve=2.304581, (0 missing)
##   Surrogate splits:
##       extraction        < 0.25       to the left,  agree=0.778, adj=0.4, (0 split)
##       sample.processing < 0.9090909  to the right, agree=0.778, adj=0.4, (0 split)
##       PCR               < 0.767316   to the left,  agree=0.667, adj=0.1, (0 split)
## 
## Node number 127: 64 observations
##   predicted class=4  expected loss=0.234375  P(node) =0.1172161
##     class counts:     2     5     7     1    49
##    probabilities: 0.031 0.078 0.109 0.016 0.766 
## 
## Node number 252: 10 observations
##   predicted class=2  expected loss=0.4  P(node) =0.01831502
##     class counts:     2     0     6     1     1
##    probabilities: 0.200 0.000 0.600 0.100 0.100 
## 
## Node number 253: 17 observations
##   predicted class=1  expected loss=0.4705882  P(node) =0.03113553
##     class counts:     0     9     1     2     5
##    probabilities: 0.000 0.529 0.059 0.118 0.294
par(mfrow=c(1,2)) # two plots on one page 
rsq.rpart(p1) # visualize cross-validation results  
## 
## Classification tree:
## rpart(formula = Level ~ in.silico + target.tissue + PCR + in.vitro + 
##     extraction + sample.processing + eDNA.sample + LOD + field.testing + 
##     adv.in.vitro + comprehensive.specificity.testing + statistics + 
##     ecological.and.physical.factors, data = checkmet2, method = "class")
## 
## Variables actually used in tree construction:
## [1] adv.in.vitro      field.testing     in.silico         in.vitro         
## [5] LOD               PCR               sample.processing target.tissue    
## 
## Root node error: 384/546 = 0.7033
## 
## n= 546 
## 
##          CP nsplit rel error  xerror     xstd
## 1  0.236979      0   1.00000 1.02083 0.027383
## 2  0.151042      1   0.76302 0.79688 0.030202
## 3  0.117188      2   0.61198 0.65885 0.030344
## 4  0.066406      3   0.49479 0.53646 0.029495
## 5  0.039062      5   0.36198 0.42188 0.027797
## 6  0.033854      6   0.32292 0.34375 0.026053
## 7  0.023438      7   0.28906 0.28646 0.024407
## 8  0.018229      8   0.26562 0.28646 0.024407
## 9  0.015625      9   0.24740 0.28385 0.024324
## 10 0.011719     11   0.21615 0.27865 0.024154
## 11 0.010000     13   0.19271 0.27083 0.023895
## Warning in rsq.rpart(p1): may not be applicable for this method

par(mfrow=c(1,1))
# plot tree
plot(p1, uniform=TRUE,
     main="Regression rating vs 1s")
text(p1, use.n=TRUE, all=TRUE, cex=.8)

# prune the tree
pfit<- prune(p1, cp=0.01) # from cptable   
# plot the pruned tree
plot(pfit, uniform=TRUE,
     main="Regression rating vs 1s")
text(pfit, use.n=TRUE, all=TRUE, cex=.8)

#############################################
# Figure 3
rpart.plot(pfit, type = 3, clip.right.labs = FALSE, branch = 1, extra = 101, under= TRUE, 
           fallen.leaves = FALSE, cex =NULL, tweak = 1.64, xcompact = FALSE, ycompact = FALSE, ycompress = FALSE, 
           box.palette = list("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF"), col = "grey99", 
           round=1, split.font = 1, legend.x = NA)

###############################################################################################################################################
###############################################################################################################################################

# BOX 1 Figures

###############################################################################################################################################
###############################################################################################################################################

# load data of example assays
forbox <- read.csv("forbox1.csv", sep = "," , header = TRUE)
names(forbox)
## [1] "spec"        "ID"          "block"       "block_score" "level"
# spec = target species
# ID = assay ID
# block = name of the variable block
# block_score = scoring percentage of the variable block
# level = validation level of the assay

#resort the validation blocks
forbox$block2 <- factor(forbox$block, c("in silico","target tissue testing", "PCR", "in vitro", "extraction", "sample processing",
                                        "eDNA field detection", "LOD", "field testing", "advanced in vitro testing", "comprehensive specificity testing",
                                        "statistics", "ecological and physical factors"))


# subeset for each of the validation levels
l0 <- subset(forbox, level == 0)
l1 <- subset(forbox, level == 1)
l2 <- subset(forbox, level == 2)
l3 <- subset(forbox, level == 3)
l4 <- subset(forbox, level == 4)

#Level 0 barplot
o0 <- ggplot(l0, aes(x=block2, y=block_score, fill=block_score)) +
  theme_bw() +
  geom_bar(stat="identity") +
  scale_fill_viridis() +
  ylim(0,1) +
  theme(text = element_text(size=14))+
  xlab("") + ylab("block score") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "none")

#Level 1 barplot
o1 <- ggplot(l1, aes(x=block2, y=block_score, fill=block_score)) +
  theme_bw() +
  geom_bar(stat="identity") +
  scale_fill_viridis() +
  ylim(0,1) +
  theme(text = element_text(size=14))+
  xlab("") + ylab("block score") +
  theme(axis.text.x =  element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "none")

#Level 2 barplot
o2 <- ggplot(l2, aes(x=block2, y=block_score, fill=block_score)) +
  theme_bw() +
  geom_bar(stat="identity") +
  scale_fill_viridis() +
  ylim(0,1) +
  theme(text = element_text(size=14))+
  xlab("") + ylab("block score") +
  theme(axis.text.x =  element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "none")

#Level 3 barplot
o3 <- ggplot(l3, aes(x=block2, y=block_score, fill=block_score)) +
  theme_bw() +
  geom_bar(stat="identity") +
  scale_fill_viridis() +
  ylim(0,1) +
  theme(text = element_text(size=14))+
  xlab("") + ylab("block score") +
  theme(axis.text.x =  element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "none")

#Level 4 barplot
o4 <- ggplot(l4, aes(x=block2, y=block_score, fill=block_score)) +
  theme_bw() +
  geom_bar(stat="identity") +
  scale_fill_viridis() +
  ylim(0,1) +
  theme(text = element_text(size=14))+
  xlab("") + ylab("block score") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(legend.position = "none")



# load the data with a binary variable 0/1 for minimum criterion fulfilled yes/no
forbox2 <- read.csv("forbox2.csv", sep = "," , header = TRUE)
names(forbox2)
## [1] "spec"      "ID"        "min"       "min_score" "level"
# spec = target species
# ID = assay ID
# min = name and level of the minimum criterion
# min_score = 1 if minimum criterion is fulfilled
# level = validation level of the assay



#subset for each validation level
ll0 <- subset(forbox2, level == 0)
ll1 <- subset(forbox2, level == 1)
ll2 <- subset(forbox2, level == 2)
ll3 <- subset(forbox2, level == 3)
ll4 <- subset(forbox2, level == 4)




#Level 0 tileplot, green tiles stand for min. criterion fulfilled
oo0 <- ggplot(ll0, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
  theme_bw() +
  geom_tile(stat="identity", color = "white") +
  scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
  xlab("") + ylab("") +
  theme(text = element_text(size=14))+
  theme(legend.position = "none") +
  theme(axis.text.x =  element_blank())

#Level 1 tileplot, green tiles stand for min. criterion fulfilled
oo1 <- ggplot(ll1, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
  theme_bw() +
  geom_tile(stat="identity", color = "white") +
  scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
  xlab("") + ylab("") +
  theme(text = element_text(size=14))+
  theme(legend.position = "none") +
  theme(axis.text.x =  element_blank())

#Level 2 tileplot, green tiles stand for min. criterion fulfilled
oo2 <- ggplot(ll2, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
  theme_bw() +
  geom_tile(stat="identity",color = "white") +
  scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
  xlab("") + ylab("") +
  theme(text = element_text(size=14))+
  theme(legend.position = "none") +
  theme(axis.text.x =  element_blank())

#Level 3 tileplot, green tiles stand for min. criterion fulfilled
oo3 <- ggplot(ll3, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
  theme_bw() +
  geom_tile(stat="identity", color = "white") +
  scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
  xlab("") + ylab("") +
  theme(text = element_text(size=14))+
  theme(legend.position = "none") +
  theme(axis.text.x =  element_blank())

#Level 4 tileplot, green tiles stand for min. criterion fulfilled
oo4 <- ggplot(ll4, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
  theme_bw() +
  geom_tile(stat="identity", color = "white") +
  scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
  xlab("") + ylab("") +
  theme(text = element_text(size=14))+
  theme(legend.position = "none") +
  theme(axis.text.x =  element_blank())



#Level 0 combined figure
grid.arrange(oo0, o0, ncol=4,
             layout_matrix = cbind(1,2,2,2))

#Level 1 combined figure             
grid.arrange(oo1, o1, ncol=4,
             layout_matrix = cbind(1,2,2,2))

#Level 2 combined figure
grid.arrange(oo2, o2, ncol=4,
             layout_matrix = cbind(1,2,2,2))

#Level 3 combined figure
grid.arrange(oo3, o3, ncol=4,
             layout_matrix = cbind(1,2,2,2))

#Level 4 combined figure
grid.arrange(oo4, o4, ncol=4,
             layout_matrix = cbind(1,2,2,2))

###########################################################################################################################################
###########################################################################################################################################
###########################################################################################################################################