## 18.4.2updatedfileAllDataPotAssay.csv was edited to make names of pathogens and cultivars consistent S1 <- read.csv ("18.4.2updatedfileAllDataPotAssay.csv") tl <- complete.cases (S1) S1 <- S1[tl,] S1$cpy <- paste (S1$cultivar,S1$pathogen,S1$year, sep=".") S1$cp <- paste (S1$cultivar,S1$pathogen, sep=".") S1$areatx <- scale (S1$area) S1$year2 <- factor(S1$year) S1$bad <- numeric (nrow (S1)) tl <- (S1$pitted == 1 & S1$area > 3) | (S1$raised == 1 & S1$area > 12) | (S1$super == 1 & S1$area > 25) S1$bad[tl] <- 1 ## is there a year effect? Just for cultivars and pathogens present in both years table1 <- table (S1$cultivar, S1$year) table2 <- table (S1$pathogen, S1$year) tl <- table1[,1] > 0 & table1[,2] > 0 tl2 <- table2[,1] > 0 & table2[,2] > 0 gdcult <- rownames (table1)[tl] gdpath <- rownames (table2)[tl2] tl <- (S1$cultivar %in% gdcult) & (S1$pathogen %in% gdpath) require (lmerTest) fit1 <- lmer (bad ~ (cultivar + pathogen + year2)^2 + (1|plant), data=S1, subset=tl) anova (fit1) ## Analysis of Variance Table of type III with Satterthwaite ## approximation for degrees of freedom ## Sum Sq Mean Sq NumDF DenDF F.value Pr(>F) ## cultivar 11.7460 0.6182 19 148.90 4.373 1.067e-07 *** ## pathogen 10.6523 10.6523 1 161.72 75.351 3.997e-15 *** ## year2 0.2524 0.2524 1 163.39 1.786 0.183331 ## cultivar:pathogen 5.6041 0.2950 19 146.22 2.086 0.007816 ** ## cultivar:year2 2.0077 0.1057 19 143.61 0.747 0.764203 ## pathogen:year2 0.4426 0.4426 1 148.70 3.131 0.078883 . ## ##------------ all random effects model for variance decomposition -------## fit2 <- lmer (bad ~ (1|cultivar) + (1|pathogen) + (1|year2) + (1|plant) + (1|cultivar:pathogen) +(1|cultivar:year2) + (1|pathogen:year2), data=S1, subset=tl) var1 <- c(unlist(summary(fit2)$varcor), Residual=(sigma(fit2))^2) data.frame (var = var1, percent = round(100*var1/sum(var1), digits=4)) ## var percent ## plant 0.02789616 9.8911 ## cultivar:year2 0.00000000 0.0000 ## cultivar:pathogen 0.02091698 7.4165 ## cultivar 0.02682833 9.5125 ## pathogen:year2 0.00304435 1.0794 ## year2 0.00000000 0.0000 ## pathogen 0.06550031 23.2244 ## Residual 0.13784613 48.8760 ## how much of residual is due to binomial sampling error? tl5 <- S1$bad < 1 bad.n <- numeric (length (tl5)) ## create non-discrete values bad.n [tl5] <- runif (sum (tl5), 0, 0.5) bad.n [!tl5] <- runif (sum (!tl5), 0.5, 1.0) ## now have values between 0 and 1 var (S1$bad[tl]) # 0.249361 var (bad.n[tl]) # 0.08272367 1 - var (bad.n[tl])/var (S1$bad[tl]) ## proportion due to discreteness = 0.6682574 (about 2/3) ## a check on that calculation fit2n <- lmer (bad.n ~ (1|cultivar) + (1|pathogen) + (1|year2) + (1|plant) + (1|cultivar:pathogen) +(1|cultivar:year2) + (1|pathogen:year2), data=S1, subset=tl) var1n <- c(unlist(summary(fit2n)$varcor), Residual=(sigma(fit2n))^2) data.frame (var = var1n, percent = round(100*var1n/sum(var1n), digits=4)) ## var percent ## plant 1.189490e-02 13.1846 ## cultivar:year2 0.000000e+00 0.0000 ## cultivar:pathogen 6.124262e-03 6.7883 ## cultivar 5.786404e-03 6.4138 ## pathogen:year2 7.989430e-16 0.0000 ## year2 0.000000e+00 0.0000 ## pathogen 1.585432e-02 17.5733 ## Residual 5.055824e-02 56.0400 5.055824e-02/0.13784613 ## from table above ## [1] 0.366773 ## about 1/3, so checks with above ## Table 3 ## look to see which pathogens are the most stable over cultivars var1 <- numeric (length (unique(S1$pathogen))) mn1 <- numeric (length (unique(S1$pathogen))) uniqcult1 <- unique(S1$pathogen) len1 <- numeric (length (unique(S1$pathogen))) for (i in 1:length (unique(S1$pathogen))) { tl <- S1$pathogen == unique(S1$pathogen)[i] sum2 <- aggregate (S1$bad[tl], list (S1$cultivar[tl]), function (x) { bad1 <- sum(x); length1 <- length(x); frac1 <- bad1/length1; tmp <- c (bad1, length1, frac1); return (tmp)}) if (nrow (sum2) > 1) { var1[i] <- var (sum2$x[,3]) mn1[i] <- mean (sum2$x[,3]) len1[i] <- length (sum2$x[,3]) } else { var1[i] <- NA mn1[i] <- NA len1[i] <- NA } } tl <- !is.na (var1) var1 <- var1[tl] uniqcult1 <- uniqcult1[tl] mn1 <- mn1[tl] len1 <- len1[tl] idx1 <- order (var1) var1 <- var1 [idx1] uniqcult1 <- uniqcult1 [idx1] mn1 <- mn1 [idx1] len1 <- len1 [idx1] stability2 <- data.frame (pathogen = uniqcult1, variance = var1, mean = mn1, numpath = len1) stability2 ## == Table 3 ## ## Figures 2-5 pdf ("EstimatesByPathogen.pdf", height = 14, width = 10) rangex <- range (0,100) par (mar = c(4, 8, 3, 1) + 0.1) for (i in 1:5) { if (i < 5) { tl <- S1$pathogen == unique(S1$pathogen)[i] title1 <- unique(S1$pathogen)[i] } else { tl <- S1$pathogen == unique(S1$pathogen)[3] | S1$pathogen == unique(S1$pathogen)[4] title1 <- paste (unique(S1$pathogen)[3], unique(S1$pathogen)[4], sep = " or ") } ## which cultivars have variances? sum2 <- aggregate (S1$bad[tl], list (S1$cultivar[tl]), function (x) { bad1 <- sum(x); length1 <- length(x); tmp <- c (bad1, length1); return (tmp)}) tl2 <- sum2$x[,1] == 0 | sum2$x[,1] == sum2$x[,2] ## these are cultivars with no variance ## create a subset to use for model fitting tl3 <- S1$cultivar %in% sum2$Group.1 [!tl2] tl4 <- tl & tl3 fit4 <- glm (bad ~ -1 + cultivar, data=S1, family = "binomial", subset = tl4) coef1 <- summary(fit4)$coefficients idx3 <- order (coef1[,1]) coef1 <- coef1[idx3,] l95 <- coef1[,1] - 2*coef1[,2] u95 <- coef1[,1] + 2*coef1[,2] est1 <- exp (coef1[,1])/(1 + exp (coef1[,1])) l95 <- exp (l95)/(1 + exp (l95)) u95 <- exp (u95)/(1 + exp (u95)) cultnames1 <- rownames (coef1) cultnames2 <- strsplit (cultnames1, "cultivar") cultnames1 <- sapply(cultnames2, '[[', 2) TmpS1 <- data.frame (cultivar = cultnames1, estimate = est1, l95 = l95, u95 = u95) ## add back the ones with no variance tl2 <- sum2$x[,1] == 0 if (sum (tl2) > 0) { TmpS2 <- data.frame (cultivar = sum2$Group.1[tl2], estimate = 0, l95 = 0, u95 = 0) TmpS1 <- rbind (TmpS2, TmpS1) } tl2 <- sum2$x[,1] == sum2$x[,2] if (sum (tl2) > 0) { TmpS2 <- data.frame (cultivar = sum2$Group.1[tl2], estimate = 1, l95 = 1, u95 = 1) TmpS1 <- rbind (TmpS1, TmpS2) } plot (TmpS1$estimate, 1:nrow(TmpS1), yaxt = "n", pch = "", bty = "l", main = title1, xlab = "Percent Bad Tubers", ylab = "", xlim = rangex) axis (2, 1:nrow(TmpS1), TmpS1$cultivar, las = 1) segments (TmpS1$l95 * 100, 1:nrow(TmpS1), TmpS1$u95 * 100, 1:nrow(TmpS1), col = "gray", lwd = 4) points (TmpS1$estimate * 100, 1:nrow(TmpS1), pch = 16) } dev.off() ## ## Figure 5 ## look to see which cultivars are the most stable over pathogens var1 <- numeric (length (unique(S1$cultivar))) mn1 <- numeric (length (unique(S1$cultivar))) uniqcult1 <- unique(S1$cultivar) len1 <- numeric (length (unique(S1$cultivar))) for (i in 1:length (unique(S1$cultivar))) { tl <- S1$cultivar == unique(S1$cultivar)[i] sum2 <- aggregate (S1$bad[tl], list (S1$pathogen[tl]), function (x) { bad1 <- sum(x); length1 <- length(x); frac1 <- bad1/length1; tmp <- c (bad1, length1, frac1); return (tmp)}) if (nrow (sum2) > 1) { var1[i] <- var (sum2$x[,3]) mn1[i] <- mean (sum2$x[,3]) len1[i] <- length (sum2$x[,3]) } else { var1[i] <- NA mn1[i] <- NA len1[i] <- NA } } tl <- !is.na (var1) var1 <- var1[tl] uniqcult1 <- uniqcult1[tl] mn1 <- mn1[tl] len1 <- len1[tl] idx1 <- order (var1) var1 <- var1 [idx1] uniqcult1 <- uniqcult1 [idx1] mn1 <- mn1 [idx1] len1 <- len1 [idx1] stability1 <- data.frame (cultivar = uniqcult1, variance = var1, mean = mn1, numpath = len1) pdf ("stability20180424.pdf", height=12, width=12) plot (stability1$mean, sqrt(stability1$variance), pch = 16, xlab="mean", ylab="standard deviation across pathogens", bty="l", xlim = c(0.05, 1.1)) text (stability1$mean + 0.01, sqrt(stability1$variance), stability1$cultivar, adj = c(0, 0.5), cex=0.7) dev.off()