rm(list=ls())
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(rpart)
## Warning: package 'rpart' was built under R version 3.6.2
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.3
library(RColorBrewer)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
library(scales)
## Warning: package 'scales' was built under R version 3.6.2
library(drc)
## Warning: package 'drc' was built under R version 3.6.2
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.6.2
##
## 'drc' has been loaded.
## Please cite R and 'drc' if used for a publication,
## for references type 'citation()' and 'citation('drc')'.
##
## Attaching package: 'drc'
## The following objects are masked from 'package:stats':
##
## gaussian, getInitial
library(plyr)
## Warning: package 'plyr' was built under R version 3.6.3
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 3.6.2
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see http://bit.ly/arialnarrow
library(viridis)
## Warning: package 'viridis' was built under R version 3.6.3
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
setwd("C:/Users/Bettina/Documents/Papers/Validation scale/datasets and analysis/R analysis/data/valscale_data_and_analysis")
val2 <- read.csv("valscale_basic.csv", sep = "," , header = TRUE)
colourCount = length(unique(val2$group))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
ggplot(val2, aes(x=group, fill=group)) +
theme_bw() +
theme(text = element_text(size=20))+
geom_bar(stat="count", fill=getPalette(colourCount))+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
xlab("") +
ylab("assay count")
colourCount = length(unique(val2$nocit))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
ggplot(val2, aes(x=as.factor(nocit), fill=as.factor(nocit))) +
theme_bw() +
theme(text = element_text(size=18))+
geom_bar(aes(y = (..count..)/sum(..count..)), fill=getPalette(colourCount)) +
scale_y_continuous(labels = percent) +
xlab("number of citations") +
ylab("relative frequency")
colourCount = length(unique(val2$type))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
ggplot(val2, aes(x=type, fill=type)) +
theme_bw() +
theme(text = element_text(size=18))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
scale_y_continuous(labels = percent) +
scale_fill_brewer(palette="Set3")+
theme(legend.position = "none")+
ylab("relative frequency")+
xlab("")
colourCount = length(unique(val2$gene))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
ggplot(val2, aes(x=gene, fill=gene)) +
theme_bw() +
theme(text = element_text(size=15))+
geom_bar(aes(y = (..count..)/sum(..count..)), fill=getPalette(colourCount)) +
scale_y_continuous(labels = percent) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ylab("relative frequency")+
xlab("")
colourCount = length(unique(val2$esou))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
ggplot(val2, aes(x=as.factor(esou), fill=as.factor(esou))) +
theme_bw() +
theme(text = element_text(size=18))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
scale_fill_brewer(palette="Set3")+
scale_y_continuous(labels = percent) +
theme(legend.position = "none")+
ylab("relative frequency")+
xlab("")
checkmet <- read.csv("checklist_metrics_1.csv", sep = "," , header = TRUE)
names(checkmet)
## [1] "ID" "spec"
## [3] "common" "group"
## [5] "esou" "type"
## [7] "gene" "rating"
## [9] "year" "Level"
## [11] "nocit" "L_min_crit_perc"
## [13] "X" "X.1"
## [15] "L1_min_isil_targetspec" "L1_min_tt_tt"
## [17] "L1_min._pripro" "L2_min_pcr_dna"
## [19] "L2_min_iv" "L3_min_ext_meth"
## [21] "L3_min_ftpc" "L3_min_envdet"
## [23] "L4_min_lod_lod" "L4_min_multlosa"
## [25] "L4_min_adv_iv" "L5_min_comp_specplus"
## [27] "L5_min_detprob" "L5_min_ecphy"
## [29] "X.2" "L5_isil_db"
## [31] "L5_isil_targetspec" "L5_isil_softw"
## [33] "L5_isil_softwpar" "L5_isil_mism"
## [35] "L5_isil_mismloc" "L5_isil_potprob"
## [37] "L5_isil_closrel" "isil_cooc"
## [39] "L1_tt_tt" "L5_tt_ts"
## [41] "L5_tt_geosc" "L5_tt_ind"
## [43] "L5_tt_ht" "L5_tt_conc"
## [45] "L5_min._pripro" "L5_ttp_cyccon"
## [47] "L5_ttp_acheck" "L5_ttp_seq"
## [49] "L5_ttp_gene" "L5_probe"
## [51] "L5_dye" "L5_iqen"
## [53] "L5_quen" "L5_TMM"
## [55] "L5_pcr_vol" "L5_pcr_pc"
## [57] "L5_pcr_commix" "L5_pcr_custmix"
## [59] "L5_pcr_mg" "L5_pcr_dntp"
## [61] "L5_pcr_buff" "L5_pcr_enz"
## [63] "L5_pcr_enha" "L5_pcr_dna"
## [65] "L5_pcr_cycler" "L5_pcr_annt"
## [67] "L5_pcr_anntemp" "L5_pcr_cyc"
## [69] "L5_pcr_tr" "L5_pcr_opt"
## [71] "L5_iv_ind" "L5_iv_geoorig"
## [73] "L5_iv_ts" "L5_iv_seq"
## [75] "L5_iv_conc" "L5_geosco"
## [77] "L5_unilabflow" "L5_geotest"
## [79] "L5_ext_meth" "L5_ext_modif"
## [81] "L5_ext_lysvol" "L5_ext_nk"
## [83] "L5_ext_elu" "L5_ext_verif"
## [85] "L5_proc_vol" "L5_proc_ft"
## [87] "L5_proc_ps" "L5_proc_diam"
## [89] "L5_proc_pressu" "L5_proc_preserv"
## [91] "L5_proc_precichen" "L5_proc_cent"
## [93] "L5_proc_temp" "L5_proc_stor"
## [95] "L5_proc_blanks" "L5_sam_det"
## [97] "L5_sam_type" "L5_sam_arthab"
## [99] "L5_sam_nathab" "L5_sam_spike"
## [101] "L5_sam_env" "L5_fieldsampseq"
## [103] "L5_lod_lod" "L5_lod_met"
## [105] "L5_lod_rep" "L5_avf_multloc"
## [107] "L5_avf_notth" "L5_avf_th"
## [109] "L5_avf_hist" "L5_avf_multsam"
## [111] "L5_avf_envgrad" "L5_avf_dens"
## [113] "L5_avf_trad" "L5_avf_inhi"
## [115] "L5_adv_iv_geosc" "L5_adv_iv_socc"
## [117] "L5_adv_iv_ind" "L5_adv_iv_geoorig"
## [119] "L5_adv_iv_ts" "L5_adv_iv_seq"
## [121] "L5_adv_iv_conc" "L5_comp_specplus"
## [123] "L5_comp_just" "L5_comp_seq"
## [125] "L5_stat_time" "L5_stat_spac"
## [127] "L5_stat_mod" "L5_stat_occm"
## [129] "L5_stat_param" "L5_env_orig"
## [131] "L5_env_state" "L5_env_degr"
## [133] "L5_env_transp" "X.3"
## [135] "nas" "sum"
## [137] "sco_perc"
ggplot(checkmet, aes(x=Level, fill=as.factor(Level))) +
theme_bw() +
theme(text = element_text(size=16))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF"))+
scale_y_continuous(labels = percent) +
theme(legend.position = "none")+
ylab("relative frequency")+
xlab("validation level based on minimum criteria")
ggplot(checkmet, aes(x=as.factor(year), fill=as.factor(Level))) +
theme_bw() +
geom_bar(position = "fill")+
theme(text = element_text(size=16))+
scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF")) +
scale_y_continuous(labels = scales::percent_format())+
ylab("relative frequency")+
xlab("publication year")+
theme(legend.position = "none")
data_summary <- function(x) {
m <- mean(x)
ymin <- m-sd(x)
ymax <- m+sd(x)
return(c(y=m,ymin=ymin,ymax=ymax))
}
ggplot(checkmet, aes(x=as.factor(Level), y=sco_perc, fill=as.factor(Level))) +
theme_bw() +
theme(text = element_text(size=16))+
geom_violin(trim=T)+
scale_y_continuous(labels = scales::percent_format(), limits=c(0,0.8))+
stat_summary(fun.data=data_summary, colour="white") +
scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF"))+
theme(legend.position = "none")+
ylab("total scoring percentage")+
xlab("validation level based on minimum criteria")
l0 <- subset(checkmet, Level == 0)
l1 <- subset(checkmet, Level == 1)
l2 <- subset(checkmet, Level == 2)
l3 <- subset(checkmet, Level == 3)
l4 <- subset(checkmet, Level == 4)
suml0 <- data.frame(
level = c(0,0,0),
min_crit=c("L1_min_isil_targetspec", "L1_min_tt_tt", "L1_min_pripro" ),
value=c(sum(l0$L1_min_isil_targetspec)/83, sum(l0$L1_min_tt_tt)/83, sum(l0$L1_min._pripro)/83))
suml1 <- data.frame(
level = c(1,1),
min_crit=c("L2_min_pcr_dna", "L2_min_iv" ),
value=c(sum(l1$L2_min_pcr_dna)/162, sum(l1$L2_min_iv)/162))
suml2 <- data.frame(
level = c(2,2,2),
min_crit=c("L3_min_ext_meth", "L3_min_ftpc", "L3_min_envdet"),
value=c(sum(l2$L3_min_ext_meth)/89, sum(l2$L3_min_ftpc)/89, sum(l2$L5_min_envdet)/89))
suml3 <- data.frame(
level = c(3,3,3),
min_crit=c("L4_min_lod_lod", "L4_min_multlosa", "L4_min_adv_iv"),
value=c(sum(l3$L4_min_lod_lod)/155, sum(l3$L4_min_multlosa)/155, sum(l3$L4_min_adv_iv)/155))
suml4 <- data.frame(
level = c(4,4,4),
min_crit=c("L5_min_comp_specplus", "L5_min_detprob", "L5_min_ecphy"),
value=c(sum(l4$L5_min_comp_specplus)/57, sum(l4$L5_min_detprob)/57, sum(l4$L5_min_ecphy)/57))
tot_summary <- rbind(suml0, suml1, suml2, suml3, suml4)
ggplot(tot_summary, aes(x=min_crit, y = 1-value, fill=as.factor(level))) +
theme_bw() +
geom_bar(stat = "identity")+
scale_fill_manual(values = c("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF")) +
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
ylab("relative frequency")+
xlab("minimum criteria") +
theme(legend.position = "none")
valvarsum <- read.csv("val_var_summary.csv", sep = "," , header = TRUE)
names(valvarsum)
## [1] "var" "old.var.name" "coun" "perc"
## [5] "var_level"
ggplot(valvarsum, aes(x=reorder(var, desc(perc)+(var_level) , "identity") ,y=perc, fill=as.factor(var_level))) +
theme_bw() +
geom_bar(stat = "identity")+
theme(text = element_text(size=16))+
scale_fill_manual(values = c("#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF", "#FDE725FF")) +
theme(axis.text.y = element_text(size = 10))+
scale_y_continuous(labels = scales::percent_format())+
ylab("relative frequency")+
xlab("validation variables") +
theme(legend.position = "none")+
coord_flip()
bubble <- read.csv("baseforbubbleplot.csv", sep = "," , header = TRUE)
ggplot(bubble, aes(x=Level, y= rating, size = coun, fill = col ))+
theme_bw()+
geom_point(alpha = 0.8, shape=21, color="black")+
scale_size(range = c(.1, 20)) +
scale_fill_manual(values = c( "#6633FF", "#CC00CC", "#FF3399", "#FF6633", "#FFCC00", "#FFFFCC")) +
ylab("subjective validation level")+
xlab("validation level based on minimum criteria")+
guides(fill=FALSE, size=guide_legend(title="count"))
## Warning: Removed 15 rows containing missing values (geom_point).
checkmet2 <- read.csv("checklist_metrics_2.csv", sep = "," , header = TRUE)
names(checkmet2)
## [1] "ID"
## [2] "spec"
## [3] "common"
## [4] "group"
## [5] "esou"
## [6] "type"
## [7] "gene"
## [8] "rating"
## [9] "year"
## [10] "Level"
## [11] "nocit"
## [12] "sco_perc"
## [13] "L1_min_isil_targetspec"
## [14] "L1_min_tt_tt"
## [15] "L1_min._pripro"
## [16] "L2_min_pcr_dna"
## [17] "L2_min_iv"
## [18] "L3_min_ext_meth"
## [19] "L3_min_ftpc"
## [20] "L3_min_envdet"
## [21] "L4_min_lod_lod"
## [22] "L4_min_multlosa"
## [23] "L4_min_adv_iv"
## [24] "L5_min_comp_specplus"
## [25] "L5_min_detprob"
## [26] "L5_min_ecphy"
## [27] "X"
## [28] "in.silico"
## [29] "target.tissue"
## [30] "PCR"
## [31] "in.vitro"
## [32] "stray_sco_perc"
## [33] "extraction"
## [34] "sample.processing"
## [35] "eDNA.sample"
## [36] "LOD"
## [37] "field.testing"
## [38] "adv.in.vitro"
## [39] "comprehensive.specificity.testing"
## [40] "statistics"
## [41] "ecological.and.physical.factors"
p1 <- rpart(data=checkmet2, Level ~
in.silico +target.tissue + PCR + in.vitro +
extraction + sample.processing + eDNA.sample + LOD + field.testing +
adv.in.vitro + comprehensive.specificity.testing + statistics + ecological.and.physical.factors ,
method = "class")
printcp(p1)
##
## Classification tree:
## rpart(formula = Level ~ in.silico + target.tissue + PCR + in.vitro +
## extraction + sample.processing + eDNA.sample + LOD + field.testing +
## adv.in.vitro + comprehensive.specificity.testing + statistics +
## ecological.and.physical.factors, data = checkmet2, method = "class")
##
## Variables actually used in tree construction:
## [1] adv.in.vitro field.testing in.silico in.vitro
## [5] LOD PCR sample.processing target.tissue
##
## Root node error: 384/546 = 0.7033
##
## n= 546
##
## CP nsplit rel error xerror xstd
## 1 0.236979 0 1.00000 1.02083 0.027383
## 2 0.151042 1 0.76302 0.79688 0.030202
## 3 0.117188 2 0.61198 0.65885 0.030344
## 4 0.066406 3 0.49479 0.53646 0.029495
## 5 0.039062 5 0.36198 0.42188 0.027797
## 6 0.033854 6 0.32292 0.34375 0.026053
## 7 0.023438 7 0.28906 0.28646 0.024407
## 8 0.018229 8 0.26562 0.28646 0.024407
## 9 0.015625 9 0.24740 0.28385 0.024324
## 10 0.011719 11 0.21615 0.27865 0.024154
## 11 0.010000 13 0.19271 0.27083 0.023895
plotcp(p1)
summary(p1)
## Call:
## rpart(formula = Level ~ in.silico + target.tissue + PCR + in.vitro +
## extraction + sample.processing + eDNA.sample + LOD + field.testing +
## adv.in.vitro + comprehensive.specificity.testing + statistics +
## ecological.and.physical.factors, data = checkmet2, method = "class")
## n= 546
##
## CP nsplit rel error xerror xstd
## 1 0.23697917 0 1.0000000 1.0208333 0.02738267
## 2 0.15104167 1 0.7630208 0.7968750 0.03020221
## 3 0.11718750 2 0.6119792 0.6588542 0.03034356
## 4 0.06640625 3 0.4947917 0.5364583 0.02949480
## 5 0.03906250 5 0.3619792 0.4218750 0.02779685
## 6 0.03385417 6 0.3229167 0.3437500 0.02605311
## 7 0.02343750 7 0.2890625 0.2864583 0.02440687
## 8 0.01822917 8 0.2656250 0.2864583 0.02440687
## 9 0.01562500 9 0.2473958 0.2838542 0.02432352
## 10 0.01171875 11 0.2161458 0.2786458 0.02415442
## 11 0.01000000 13 0.1927083 0.2708333 0.02389463
##
## Variable importance
## in.vitro in.silico
## 16 14
## adv.in.vitro sample.processing
## 12 11
## target.tissue PCR
## 10 9
## field.testing eDNA.sample
## 9 7
## LOD extraction
## 5 5
## ecological.and.physical.factors statistics
## 1 1
##
## Node number 1: 546 observations, complexity param=0.2369792
## predicted class=1 expected loss=0.7032967 P(node) =1
## class counts: 83 162 89 155 57
## probabilities: 0.152 0.297 0.163 0.284 0.104
## left son=2 (186 obs) right son=3 (360 obs)
## Primary splits:
## in.vitro < 0.1 to the left, improve=57.10230, (0 missing)
## in.silico < 0.2678571 to the left, improve=52.22768, (0 missing)
## PCR < 0.7913043 to the right, improve=47.28173, (0 missing)
## sample.processing < 0.3541667 to the left, improve=34.63709, (0 missing)
## target.tissue < 0.08333333 to the left, improve=32.34111, (0 missing)
## Surrogate splits:
## adv.in.vitro < 0.07142857 to the left, agree=0.766, adj=0.312, (0 split)
## target.tissue < 0.25 to the left, agree=0.749, adj=0.263, (0 split)
## in.silico < 0.4365079 to the left, agree=0.727, adj=0.199, (0 split)
## PCR < 0.6381818 to the left, agree=0.709, adj=0.145, (0 split)
## ecological.and.physical.factors < 0.125 to the right, agree=0.690, adj=0.091, (0 split)
##
## Node number 2: 186 observations, complexity param=0.1171875
## predicted class=1 expected loss=0.4193548 P(node) =0.3406593
## class counts: 64 108 2 10 2
## probabilities: 0.344 0.581 0.011 0.054 0.011
## left son=4 (51 obs) right son=5 (135 obs)
## Primary splits:
## in.silico < 0.2916667 to the left, improve=44.126950, (0 missing)
## PCR < 0.7888889 to the left, improve=37.935480, (0 missing)
## target.tissue < 0.08333333 to the left, improve=26.164370, (0 missing)
## sample.processing < 0.6904762 to the left, improve= 4.945116, (0 missing)
## field.testing < 0.1944444 to the left, improve= 3.621505, (0 missing)
## Surrogate splits:
## PCR < 0.3666667 to the left, agree=0.758, adj=0.118, (0 split)
## sample.processing < 0.9545455 to the right, agree=0.742, adj=0.059, (0 split)
## field.testing < 0.1388889 to the left, agree=0.731, adj=0.020, (0 split)
##
## Node number 3: 360 observations, complexity param=0.1510417
## predicted class=3 expected loss=0.5972222 P(node) =0.6593407
## class counts: 19 54 87 145 55
## probabilities: 0.053 0.150 0.242 0.403 0.153
## left son=6 (84 obs) right son=7 (276 obs)
## Primary splits:
## sample.processing < 0.3541667 to the left, improve=44.97246, (0 missing)
## LOD < 0.3333333 to the left, improve=22.81260, (0 missing)
## eDNA.sample < 0.1428571 to the left, improve=22.18717, (0 missing)
## extraction < 0.08333333 to the left, improve=18.66146, (0 missing)
## field.testing < 0.05555556 to the left, improve=17.55539, (0 missing)
## Surrogate splits:
## eDNA.sample < 0.1428571 to the left, agree=0.908, adj=0.607, (0 split)
## field.testing < 0.05555556 to the left, agree=0.850, adj=0.357, (0 split)
## extraction < 0.08333333 to the left, agree=0.844, adj=0.333, (0 split)
## adv.in.vitro < 0.9285714 to the right, agree=0.783, adj=0.071, (0 split)
## in.vitro < 0.9 to the right, agree=0.772, adj=0.024, (0 split)
##
## Node number 4: 51 observations
## predicted class=0 expected loss=0.07843137 P(node) =0.09340659
## class counts: 47 2 0 1 1
## probabilities: 0.922 0.039 0.000 0.020 0.020
##
## Node number 5: 135 observations, complexity param=0.03385417
## predicted class=1 expected loss=0.2148148 P(node) =0.2472527
## class counts: 17 106 2 9 1
## probabilities: 0.126 0.785 0.015 0.067 0.007
## left son=10 (21 obs) right son=11 (114 obs)
## Primary splits:
## target.tissue < 0.08333333 to the left, improve=20.59159, (0 missing)
## PCR < 0.7863636 to the right, improve=12.13041, (0 missing)
## in.silico < 0.5634921 to the left, improve= 6.13870, (0 missing)
## field.testing < 0.7638889 to the left, improve= 3.24923, (0 missing)
## eDNA.sample < 0.6428571 to the left, improve= 3.20936, (0 missing)
##
## Node number 6: 84 observations, complexity param=0.0390625
## predicted class=2 expected loss=0.3095238 P(node) =0.1538462
## class counts: 1 25 58 0 0
## probabilities: 0.012 0.298 0.690 0.000 0.000
## left son=12 (29 obs) right son=13 (55 obs)
## Primary splits:
## in.vitro < 0.3 to the left, improve=18.352040, (0 missing)
## PCR < 0.7809524 to the right, improve=12.959090, (0 missing)
## target.tissue < 0.4166667 to the left, improve= 7.882353, (0 missing)
## extraction < 0.4166667 to the left, improve= 7.342105, (0 missing)
## eDNA.sample < 0.1428571 to the left, improve= 5.669340, (0 missing)
## Surrogate splits:
## target.tissue < 0.4166667 to the left, agree=0.821, adj=0.483, (0 split)
## LOD < 0.3333333 to the left, agree=0.810, adj=0.448, (0 split)
## adv.in.vitro < 0.2142857 to the left, agree=0.786, adj=0.379, (0 split)
## in.silico < 0.6125 to the left, agree=0.750, adj=0.276, (0 split)
## field.testing < 0.3888889 to the right, agree=0.702, adj=0.138, (0 split)
##
## Node number 7: 276 observations, complexity param=0.06640625
## predicted class=3 expected loss=0.4746377 P(node) =0.5054945
## class counts: 18 29 29 145 55
## probabilities: 0.065 0.105 0.105 0.525 0.199
## left son=14 (122 obs) right son=15 (154 obs)
## Primary splits:
## LOD < 0.3333333 to the left, improve=22.524580, (0 missing)
## adv.in.vitro < 0.07142857 to the left, improve=20.955390, (0 missing)
## PCR < 0.952381 to the right, improve=14.200260, (0 missing)
## in.silico < 0.2678571 to the left, improve=11.980740, (0 missing)
## field.testing < 0.4097222 to the left, improve= 8.358417, (0 missing)
## Surrogate splits:
## eDNA.sample < 0.3571429 to the left, agree=0.645, adj=0.197, (0 split)
## field.testing < 0.2777778 to the left, agree=0.616, adj=0.131, (0 split)
## in.vitro < 0.775 to the right, agree=0.609, adj=0.115, (0 split)
## sample.processing < 0.4375 to the left, agree=0.605, adj=0.107, (0 split)
## comprehensive.specificity.testing < 0.8333333 to the right, agree=0.601, adj=0.098, (0 split)
##
## Node number 10: 21 observations
## predicted class=0 expected loss=0.2380952 P(node) =0.03846154
## class counts: 16 3 2 0 0
## probabilities: 0.762 0.143 0.095 0.000 0.000
##
## Node number 11: 114 observations
## predicted class=1 expected loss=0.09649123 P(node) =0.2087912
## class counts: 1 103 0 9 1
## probabilities: 0.009 0.904 0.000 0.079 0.009
##
## Node number 12: 29 observations, complexity param=0.01822917
## predicted class=1 expected loss=0.2413793 P(node) =0.05311355
## class counts: 0 22 7 0 0
## probabilities: 0.000 0.759 0.241 0.000 0.000
## left son=24 (22 obs) right son=25 (7 obs)
## Primary splits:
## in.silico < 0.5357143 to the right, improve=10.620690, (0 missing)
## PCR < 0.7636364 to the right, improve=10.620690, (0 missing)
## eDNA.sample < 0.1428571 to the left, improve= 6.997313, (0 missing)
## comprehensive.specificity.testing < 0.1666667 to the right, improve= 2.385396, (0 missing)
## extraction < 0.25 to the right, improve= 1.520690, (0 missing)
## Surrogate splits:
## PCR < 0.7636364 to the right, agree=1.000, adj=1.000, (0 split)
## eDNA.sample < 0.1428571 to the left, agree=0.931, adj=0.714, (0 split)
## target.tissue < 0.5833333 to the left, agree=0.897, adj=0.571, (0 split)
## sample.processing < 0.125 to the left, agree=0.897, adj=0.571, (0 split)
## field.testing < 0.2222222 to the left, agree=0.897, adj=0.571, (0 split)
##
## Node number 13: 55 observations
## predicted class=2 expected loss=0.07272727 P(node) =0.1007326
## class counts: 1 3 51 0 0
## probabilities: 0.018 0.055 0.927 0.000 0.000
##
## Node number 14: 122 observations, complexity param=0.015625
## predicted class=3 expected loss=0.2295082 P(node) =0.2234432
## class counts: 8 6 14 94 0
## probabilities: 0.066 0.049 0.115 0.770 0.000
## left son=28 (39 obs) right son=29 (83 obs)
## Primary splits:
## PCR < 0.718254 to the left, improve=11.348960, (0 missing)
## extraction < 0.08333333 to the left, improve=10.277980, (0 missing)
## sample.processing < 0.9375 to the right, improve= 6.953752, (0 missing)
## target.tissue < 0.25 to the left, improve= 3.693274, (0 missing)
## adv.in.vitro < 0.5 to the left, improve= 3.131007, (0 missing)
## Surrogate splits:
## extraction < 0.08333333 to the left, agree=0.738, adj=0.179, (0 split)
## sample.processing < 0.9375 to the right, agree=0.738, adj=0.179, (0 split)
## in.silico < 0.4097222 to the left, agree=0.705, adj=0.077, (0 split)
## target.tissue < 0.08333333 to the left, agree=0.705, adj=0.077, (0 split)
## field.testing < 0.7638889 to the right, agree=0.689, adj=0.026, (0 split)
##
## Node number 15: 154 observations, complexity param=0.06640625
## predicted class=4 expected loss=0.6428571 P(node) =0.2820513
## class counts: 10 23 15 51 55
## probabilities: 0.065 0.149 0.097 0.331 0.357
## left son=30 (54 obs) right son=31 (100 obs)
## Primary splits:
## adv.in.vitro < 0.07142857 to the left, improve=37.334670, (0 missing)
## PCR < 0.952381 to the right, improve= 9.315181, (0 missing)
## field.testing < 0.4097222 to the left, improve= 6.575170, (0 missing)
## in.silico < 0.2678571 to the left, improve= 5.860967, (0 missing)
## in.vitro < 0.45 to the right, improve= 4.628889, (0 missing)
## Surrogate splits:
## field.testing < 0.2777778 to the left, agree=0.714, adj=0.185, (0 split)
## target.tissue < 0.25 to the left, agree=0.695, adj=0.130, (0 split)
## PCR < 0.5727273 to the left, agree=0.675, adj=0.074, (0 split)
## extraction < 0.9166667 to the right, agree=0.675, adj=0.074, (0 split)
## in.silico < 0.1111111 to the left, agree=0.662, adj=0.037, (0 split)
##
## Node number 24: 22 observations
## predicted class=1 expected loss=0 P(node) =0.04029304
## class counts: 0 22 0 0 0
## probabilities: 0.000 1.000 0.000 0.000 0.000
##
## Node number 25: 7 observations
## predicted class=2 expected loss=0 P(node) =0.01282051
## class counts: 0 0 7 0 0
## probabilities: 0.000 0.000 1.000 0.000 0.000
##
## Node number 28: 39 observations, complexity param=0.015625
## predicted class=3 expected loss=0.5897436 P(node) =0.07142857
## class counts: 6 3 14 16 0
## probabilities: 0.154 0.077 0.359 0.410 0.000
## left son=56 (19 obs) right son=57 (20 obs)
## Primary splits:
## in.vitro < 0.55 to the right, improve=9.856410, (0 missing)
## extraction < 0.08333333 to the left, improve=5.193910, (0 missing)
## in.silico < 0.7321429 to the left, improve=3.652132, (0 missing)
## statistics < 0.1 to the left, improve=3.293252, (0 missing)
## adv.in.vitro < 0.5 to the left, improve=3.042125, (0 missing)
## Surrogate splits:
## statistics < 0.1 to the left, agree=0.744, adj=0.474, (0 split)
## extraction < 0.08333333 to the left, agree=0.692, adj=0.368, (0 split)
## field.testing < 0.1111111 to the left, agree=0.692, adj=0.368, (0 split)
## in.silico < 0.5634921 to the right, agree=0.641, adj=0.263, (0 split)
## PCR < 0.6742424 to the left, agree=0.641, adj=0.263, (0 split)
##
## Node number 29: 83 observations
## predicted class=3 expected loss=0.06024096 P(node) =0.1520147
## class counts: 2 3 0 78 0
## probabilities: 0.024 0.036 0.000 0.940 0.000
##
## Node number 30: 54 observations
## predicted class=3 expected loss=0.1296296 P(node) =0.0989011
## class counts: 6 0 1 47 0
## probabilities: 0.111 0.000 0.019 0.870 0.000
##
## Node number 31: 100 observations, complexity param=0.0234375
## predicted class=4 expected loss=0.45 P(node) =0.1831502
## class counts: 4 23 14 4 55
## probabilities: 0.040 0.230 0.140 0.040 0.550
## left son=62 (9 obs) right son=63 (91 obs)
## Primary splits:
## PCR < 0.952381 to the right, improve=9.081099, (0 missing)
## in.vitro < 0.45 to the right, improve=7.385457, (0 missing)
## adv.in.vitro < 0.3095238 to the right, improve=7.245153, (0 missing)
## comprehensive.specificity.testing < 0.1666667 to the right, improve=5.478611, (0 missing)
## field.testing < 0.4097222 to the left, improve=5.124698, (0 missing)
## Surrogate splits:
## field.testing < 0.8819444 to the right, agree=0.94, adj=0.333, (0 split)
##
## Node number 56: 19 observations
## predicted class=2 expected loss=0.2631579 P(node) =0.03479853
## class counts: 0 3 14 2 0
## probabilities: 0.000 0.158 0.737 0.105 0.000
##
## Node number 57: 20 observations
## predicted class=3 expected loss=0.3 P(node) =0.03663004
## class counts: 6 0 0 14 0
## probabilities: 0.300 0.000 0.000 0.700 0.000
##
## Node number 62: 9 observations
## predicted class=1 expected loss=0 P(node) =0.01648352
## class counts: 0 9 0 0 0
## probabilities: 0.000 1.000 0.000 0.000 0.000
##
## Node number 63: 91 observations, complexity param=0.01171875
## predicted class=4 expected loss=0.3956044 P(node) =0.1666667
## class counts: 4 14 14 4 55
## probabilities: 0.044 0.154 0.154 0.044 0.604
## left son=126 (27 obs) right son=127 (64 obs)
## Primary splits:
## field.testing < 0.4097222 to the left, improve=7.478531, (0 missing)
## adv.in.vitro < 0.6190476 to the right, improve=5.386999, (0 missing)
## in.silico < 0.6904762 to the right, improve=4.790820, (0 missing)
## in.vitro < 0.45 to the right, improve=4.650121, (0 missing)
## sample.processing < 0.8819444 to the right, improve=4.027473, (0 missing)
## Surrogate splits:
## sample.processing < 0.9444444 to the right, agree=0.769, adj=0.222, (0 split)
## in.silico < 1.071429 to the right, agree=0.736, adj=0.111, (0 split)
## extraction < 0.25 to the left, agree=0.736, adj=0.111, (0 split)
## PCR < 0.6055556 to the left, agree=0.714, adj=0.037, (0 split)
##
## Node number 126: 27 observations, complexity param=0.01171875
## predicted class=1 expected loss=0.6666667 P(node) =0.04945055
## class counts: 2 9 7 3 6
## probabilities: 0.074 0.333 0.259 0.111 0.222
## left son=252 (10 obs) right son=253 (17 obs)
## Primary splits:
## in.silico < 0.6696429 to the left, improve=4.099782, (0 missing)
## PCR < 0.7863636 to the right, improve=3.580897, (0 missing)
## adv.in.vitro < 0.6190476 to the right, improve=2.568173, (0 missing)
## eDNA.sample < 0.4642857 to the right, improve=2.481481, (0 missing)
## sample.processing < 0.7840909 to the right, improve=2.304581, (0 missing)
## Surrogate splits:
## extraction < 0.25 to the left, agree=0.778, adj=0.4, (0 split)
## sample.processing < 0.9090909 to the right, agree=0.778, adj=0.4, (0 split)
## PCR < 0.767316 to the left, agree=0.667, adj=0.1, (0 split)
##
## Node number 127: 64 observations
## predicted class=4 expected loss=0.234375 P(node) =0.1172161
## class counts: 2 5 7 1 49
## probabilities: 0.031 0.078 0.109 0.016 0.766
##
## Node number 252: 10 observations
## predicted class=2 expected loss=0.4 P(node) =0.01831502
## class counts: 2 0 6 1 1
## probabilities: 0.200 0.000 0.600 0.100 0.100
##
## Node number 253: 17 observations
## predicted class=1 expected loss=0.4705882 P(node) =0.03113553
## class counts: 0 9 1 2 5
## probabilities: 0.000 0.529 0.059 0.118 0.294
par(mfrow=c(1,2))
rsq.rpart(p1)
##
## Classification tree:
## rpart(formula = Level ~ in.silico + target.tissue + PCR + in.vitro +
## extraction + sample.processing + eDNA.sample + LOD + field.testing +
## adv.in.vitro + comprehensive.specificity.testing + statistics +
## ecological.and.physical.factors, data = checkmet2, method = "class")
##
## Variables actually used in tree construction:
## [1] adv.in.vitro field.testing in.silico in.vitro
## [5] LOD PCR sample.processing target.tissue
##
## Root node error: 384/546 = 0.7033
##
## n= 546
##
## CP nsplit rel error xerror xstd
## 1 0.236979 0 1.00000 1.02083 0.027383
## 2 0.151042 1 0.76302 0.79688 0.030202
## 3 0.117188 2 0.61198 0.65885 0.030344
## 4 0.066406 3 0.49479 0.53646 0.029495
## 5 0.039062 5 0.36198 0.42188 0.027797
## 6 0.033854 6 0.32292 0.34375 0.026053
## 7 0.023438 7 0.28906 0.28646 0.024407
## 8 0.018229 8 0.26562 0.28646 0.024407
## 9 0.015625 9 0.24740 0.28385 0.024324
## 10 0.011719 11 0.21615 0.27865 0.024154
## 11 0.010000 13 0.19271 0.27083 0.023895
## Warning in rsq.rpart(p1): may not be applicable for this method
par(mfrow=c(1,1))
plot(p1, uniform=TRUE,
main="Regression rating vs 1s")
text(p1, use.n=TRUE, all=TRUE, cex=.8)
pfit<- prune(p1, cp=0.01)
plot(pfit, uniform=TRUE,
main="Regression rating vs 1s")
text(pfit, use.n=TRUE, all=TRUE, cex=.8)
rpart.plot(pfit, type = 3, clip.right.labs = FALSE, branch = 1, extra = 101, under= TRUE,
fallen.leaves = FALSE, cex =NULL, tweak = 1.64, xcompact = FALSE, ycompact = FALSE, ycompress = FALSE,
box.palette = list("gray65","#481567FF", "#39568CFF", "#1F968BFF", "#55C667FF"), col = "grey99",
round=1, split.font = 1, legend.x = NA)
forbox <- read.csv("forbox1.csv", sep = "," , header = TRUE)
names(forbox)
## [1] "spec" "ID" "block" "block_score" "level"
forbox$block2 <- factor(forbox$block, c("in silico","target tissue testing", "PCR", "in vitro", "extraction", "sample processing",
"eDNA field detection", "LOD", "field testing", "advanced in vitro testing", "comprehensive specificity testing",
"statistics", "ecological and physical factors"))
l0 <- subset(forbox, level == 0)
l1 <- subset(forbox, level == 1)
l2 <- subset(forbox, level == 2)
l3 <- subset(forbox, level == 3)
l4 <- subset(forbox, level == 4)
o0 <- ggplot(l0, aes(x=block2, y=block_score, fill=block_score)) +
theme_bw() +
geom_bar(stat="identity") +
scale_fill_viridis() +
ylim(0,1) +
theme(text = element_text(size=14))+
xlab("") + ylab("block score") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "none")
o1 <- ggplot(l1, aes(x=block2, y=block_score, fill=block_score)) +
theme_bw() +
geom_bar(stat="identity") +
scale_fill_viridis() +
ylim(0,1) +
theme(text = element_text(size=14))+
xlab("") + ylab("block score") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "none")
o2 <- ggplot(l2, aes(x=block2, y=block_score, fill=block_score)) +
theme_bw() +
geom_bar(stat="identity") +
scale_fill_viridis() +
ylim(0,1) +
theme(text = element_text(size=14))+
xlab("") + ylab("block score") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "none")
o3 <- ggplot(l3, aes(x=block2, y=block_score, fill=block_score)) +
theme_bw() +
geom_bar(stat="identity") +
scale_fill_viridis() +
ylim(0,1) +
theme(text = element_text(size=14))+
xlab("") + ylab("block score") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "none")
o4 <- ggplot(l4, aes(x=block2, y=block_score, fill=block_score)) +
theme_bw() +
geom_bar(stat="identity") +
scale_fill_viridis() +
ylim(0,1) +
theme(text = element_text(size=14))+
xlab("") + ylab("block score") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme(legend.position = "none")
forbox2 <- read.csv("forbox2.csv", sep = "," , header = TRUE)
names(forbox2)
## [1] "spec" "ID" "min" "min_score" "level"
ll0 <- subset(forbox2, level == 0)
ll1 <- subset(forbox2, level == 1)
ll2 <- subset(forbox2, level == 2)
ll3 <- subset(forbox2, level == 3)
ll4 <- subset(forbox2, level == 4)
oo0 <- ggplot(ll0, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
theme_bw() +
geom_tile(stat="identity", color = "white") +
scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
xlab("") + ylab("") +
theme(text = element_text(size=14))+
theme(legend.position = "none") +
theme(axis.text.x = element_blank())
oo1 <- ggplot(ll1, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
theme_bw() +
geom_tile(stat="identity", color = "white") +
scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
xlab("") + ylab("") +
theme(text = element_text(size=14))+
theme(legend.position = "none") +
theme(axis.text.x = element_blank())
oo2 <- ggplot(ll2, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
theme_bw() +
geom_tile(stat="identity",color = "white") +
scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
xlab("") + ylab("") +
theme(text = element_text(size=14))+
theme(legend.position = "none") +
theme(axis.text.x = element_blank())
oo3 <- ggplot(ll3, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
theme_bw() +
geom_tile(stat="identity", color = "white") +
scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
xlab("") + ylab("") +
theme(text = element_text(size=14))+
theme(legend.position = "none") +
theme(axis.text.x = element_blank())
oo4 <- ggplot(ll4, aes(x=as.factor(level), y=min, fill=as.factor(min_score))) +
theme_bw() +
geom_tile(stat="identity", color = "white") +
scale_fill_manual(values = c("#481567FF","#FDE725FF")) +
xlab("") + ylab("") +
theme(text = element_text(size=14))+
theme(legend.position = "none") +
theme(axis.text.x = element_blank())
grid.arrange(oo0, o0, ncol=4,
layout_matrix = cbind(1,2,2,2))
grid.arrange(oo1, o1, ncol=4,
layout_matrix = cbind(1,2,2,2))
grid.arrange(oo2, o2, ncol=4,
layout_matrix = cbind(1,2,2,2))
grid.arrange(oo3, o3, ncol=4,
layout_matrix = cbind(1,2,2,2))
grid.arrange(oo4, o4, ncol=4,
layout_matrix = cbind(1,2,2,2))