R scripts for the analysis of bioclimatic niche evolution in Gesneria and Rhytidophyllum from Alexandre, Faure, Ginzbarg, Clark and Joly (2017). RSOS. Bioclimatic niches are conserved and unrelated to pollination syndromes in Antillean Gesneriaceae

the script is devided into 4 parts:

  1. Perform environnemental PCA and get niche values for each species

  2. Perform niche overlap analysis (compute D) and compare D values

  3. Fit evolutionary models on niches variables (identity and breadth)

  4. Compute AICc weights for evolutionary models

All the files necessary to run the script below are available in a zip folder supplied as electronic supplementary material of the paper.

1. Perform environnemental PCA and get niche values for each species

This section is used in the material and methods section 2.2 Species bioclimatic niche evaluation

library(dismo)
library(raster)
library(ade4)

# extract the environmental variables (resolution: 5 arc-minute)
files <- list.files(path=paste(getwd(),"/bioclim_alt",sep=""),pattern="asc$",full.names=TRUE)
predictors <- stack(files)
names(predictors) <-c("alt","bio01","bio02","bio03","bio04","bio05","bio06","bio07","bio08",
                      "bio09","bio10","bio11","bio12","bio13","bio14","bio15","bio16","bio17","bio18",
                      "bio19")
pred<-as.data.frame(predictors)
PRED<-cbind(pred,rownames(pred))

# extract the occurence data
occtot<-unique(read.table("occurrence_total_clean.txt",sep="\t",h=T))
names(occtot)<-c("espece","long","lat","type","ile")

# define sampling units (su) SU are all the pixel present over the carribean
su<-rownames(na.omit(pred))

# extract environmental variables for each su
env<-na.omit(pred[rownames(pred) %in% su,])
colnames(env)<-c(names(predictors))

# perform a PCA for environmental variables and each su
acp_su<-dudi.pca(env[,1:length(names(predictors))],scannf=F,nf=length(names(predictors)))
# get variables contribution to axes
inertia <- inertia.dudi(acp_su, row.inertia = TRUE,
                       col.inertia = TRUE)
var.contrib <- inertia$col.abs/100

# get variance percentage explained by each PC
100*acp_su$eig/sum(acp_su$eig)
##  [1] 40.770315100 29.391028933 13.809565014  6.982589354  5.784073138
##  [6]  1.689662831  0.506481486  0.306417322  0.193988783  0.143482085
## [11]  0.111270009  0.092328575  0.076407108  0.063430985  0.050007205
## [16]  0.017460909  0.005996785  0.003539330  0.001955045
# extract unique pixels for each species
presence_unique<-unique(data.frame(occtot$espece,cellFromXY(predictors,occtot[,c(2,3)])))
# make a list of species with a least 1 pixels (on can here change the minimum number of points per species)
sp_good<-occtot[occtot$espece%in%as.list(names(table(presence_unique[,1])[table(presence_unique[,1])>=1])),]

# make a matrix with presence/absence of each species in each pixel
# get pixel for each occurence of sp_good
pres_matrix<-matrix(nrow=length(rownames(env)),
                    ncol=length(unique(sp_good$espece)),data=0)
especes_uniques <- unique(sp_good$espece)
rownames(pres_matrix) <- rownames(env)
colnames(pres_matrix) <- especes_uniques

for (i in 1:length(especes_uniques)) {
  temp_cells <- cellFromXY(predictors,sp_good[sp_good$espece==especes_uniques[i],c(2,3)])
  pres_matrix[rownames(pres_matrix) %in% temp_cells,i] <- 1
}

# presence matrix with no missing data
p<-as.data.frame(pres_matrix[,colSums(pres_matrix)!=0])
names(p) <- colnames(p)

# get a table with, for each pixel the value of environmental PC and the presence (1) or
# absence (0) of each species
ind_pix<-cbind(acp_su$li[,1:19],pres_matrix[,colnames(pres_matrix)%in%as.list(colnames(pres_matrix)[colSums(pres_matrix)>=1])])
library(ggplot2)
niches <- read.table("niches_ACP.txt",h=T,sep="\t",check.names=F)
ggplot(niches)+
  geom_point(aes(x=Axis1,y=Axis2,col=ile))

2. Perform niche overlap analysis (compute D) and compare D values

This section is used in the material and methods section 2.3 Niche overlap

# A. compute D -----
#Script to analyse niche overlaps with the method developped by Broennimann et al. 2012
# source scripts "niche.overlap.functions.R" and "occ.prep.functions.R" available from Broennimann, O. et al. 2012 Measuring ecological niche overlap from occurrence and spatial environmental data. Glob. Ecol. Biogeogr. 21, 481-497. (doi:10.1111/j.1466-8238.2011.00698.x)
source("niche.overlap.functions.R")
source("occ.prep.functions.R")
library(ade4)
library(adehabitat)
library(sp)
library(gam)
library(MASS)
library(mvtnorm)
library(gbm)
library(dismo)

# measure niche overlap along the two first axes 
niches <- read.table("niches_ACP.txt",h=T,sep="\t",check.names=F)
# keep only species present in at least 5 pixels
good <- names(niches[,22:121])[colSums(niches[,22:121])>=5]

for (esp1 in 1:(length(good)-1)){
  res <- as.data.frame(matrix(nrow=length(good),ncol=22))  
  names(res) <- c("espece1","espece2",
                  "D","p_eqD","cinq_eqD","quatre_vingt_quinze_eqD",
                  "p_sim1D","cinq_sim1D","quatre_vingt_quinze_sim1D",
                  "p_sim2D","cinq_sim2D","quatre_vingt_quinze_sim2D")
  for (esp2 in (esp1+1):length(good)){
    
    
    sp1 <- good[esp1]
    sp2 <- good[esp2]
    
    # predict the scores on the axes
    island_sp1 <- unique(niches$ile[niches[,names(niches)==sp1]==1])
    scores.clim1 <- niches[niches$ile %in% island_sp1,3:4]
    
    island_sp2 <- unique(niches$ile[niches[,names(niches)==sp2]==1])
    scores.clim2 <- niches[niches$ile %in% island_sp2,3:4]
    
    scores.clim12<- unique(rbind(scores.clim1,scores.clim2))
    
    scores.sp1 <- niches[niches[,names(niches)==sp1]==1,3:4]
    scores.sp2 <- niches[niches[,names(niches)==sp2]==1,3:4]
    
    # calculation of occurence density and test of niche equivalency and similarity 
    R <- 100        
    z1<- grid.clim(scores.clim12,scores.clim1,scores.sp1,R)
    z2<- grid.clim(scores.clim12,scores.clim2,scores.sp2,R)
    a<-niche.equivalency.test(z1,z2,rep=100)# test of niche equivalency and similarity according to Warren et al. 2008
    b<-niche.similarity.test(z1,z2,rep=100)
    b2<-niche.similarity.test(z2,z1,rep=100)
    
    res$espece1[esp2]<-sp1
    res$espece2[esp2]<-sp2
    
    res$D[esp2]<-a$obs$D
    res$p_eqD[esp2]<-a$p.D
    res$cinq_eqD[esp2]<-quantile(a$sim$D,probs=0.05)
    res$quatre_vingt_quinze_eqD[esp2]<-quantile(a$sim$D,probs=0.95)
    res$p_sim1D[esp2]<-b$p.D
    res$cinq_sim1D[esp2]<-quantile(b$sim$D,probs=0.05)
    res$quatre_vingt_quinze_sim1D[esp2]<-quantile(b$sim$D,probs=0.95)
    res$p_sim2D[esp2]<-b2$p.D
    res$cinq_sim2D[esp2]<-quantile(b2$sim$D,probs=0.05)
    res$quatre_vingt_quinze_sim2D[esp2]<-quantile(b2$sim$D,probs=0.95)
  }
 # write.table(na.omit(res),file="results_comparison_broennimann.txt",sep="\t",append=T)
}
# B. D comparisons -----
comp <- read.table("results_comparison_broennimann.txt",sep="\t",h=T)
type <- read.table("species_type",sep="\t",h=T)

COMP <- merge(merge(x=comp,y=type,by.x="espece1",by.y="espece"),type,by.x="espece2",by.y="espece")
names(COMP)[23:26] <- c("type_sp1","pol_plante_sp1","type_sp2","pol_plante_sp2")

# remove plants species for which we have no phylogenetic data
to_keep <- read.table("espece_bonne_niche_phylo",h=T)
"%ni%" <- Negate("%in%")

COMP <- COMP[-as.numeric(rownames(COMP[((COMP$pol_plante_sp2=="plants") & (COMP$espece2 %ni% to_keep$espece)) |
                                             ((COMP$pol_plante_sp1=="plants") & (COMP$espece1 %ni% to_keep$espece)),] )),]

# load table of species presnece for each island
pres <- read.table("tableau_de presence.txt",sep="\t",h=T)

comp4 <- merge(merge(COMP,pres,by.x="espece1",by.y="espece"),pres,by.x="espece2",by.y="espece")
names(comp4)[27:38] <- c("type","Cuba1","Hispaniola1","Jamaica1",
                         "PuertoRico1","LesserAntilles1",
                         "type","Cuba2","Hispaniola2","Jamaica2",
                         "PuertoRico2","LesserAntilles2")


for (i in 1:nrow(comp4)){
  if ((comp4$Cuba1[i]==1 && comp4$Cuba2[i]==1) ||
      (comp4$Hispaniola1[i]==1 && comp4$Hispaniola2[i]==1) ||
      (comp4$Jamaica1[i]==1 && comp4$Jamaica2[i]==1) ||
      (comp4$PuertoRico1[i]==1 && comp4$PuertoRico2[i]==1) ||
      (comp4$LesserAntilles1[i]==1 && comp4$LesserAntilles2[i]==1)){
    comp4$same_island[i] <- "yes"
  } else {comp4$same_island[i] <- "no"}
}

# linear models ------------
# plants comparison 
plants <- comp4[((comp4$pol_plante_sp1=="plants") & (comp4$pol_plante_sp2=="plants")) &
                   (comp4$type_sp1!="unknown")&(comp4$type_sp2!="unknown"), ]
plants$cat <- paste(plants$type_sp1,plants$type_sp2)
plants$cat[plants$cat=="G B"] <- "B G"
plants$cat[plants$cat=="G H"] <- "H G"
plants$cat[plants$cat=="H B"] <- "B H"

plants$same_pol_mode <- "no"
plants$same_pol_mode[plants$cat %in% c("B B", "H H","G G")] <- "yes"
plants$cat[plants$cat%in%list("B H","B G","H G")] <-"DIFF"

AIC(lm(plants$D ~ plants$same_pol_mode),
    lm(plants$D ~ plants$same_island),
    lm(plants$D ~ plants$same_pol_mode+plants$same_island),
    lm(plants$D ~ plants$same_pol_mode*plants$same_island))
##                                                          df        AIC
## lm(plants$D ~ plants$same_pol_mode)                       3  -64.46944
## lm(plants$D ~ plants$same_island)                         3 -100.77384
## lm(plants$D ~ plants$same_pol_mode + plants$same_island)  4  -99.94953
## lm(plants$D ~ plants$same_pol_mode * plants$same_island)  5 -107.31617
summary(lm(plants$D ~ plants$same_pol_mode*plants$same_island))
## 
## Call:
## lm(formula = plants$D ~ plants$same_pol_mode * plants$same_island)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39756 -0.12494 -0.03309  0.10853  0.60227 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                    0.15978    0.02091   7.641
## plants$same_pol_modeyes                       -0.03262    0.03283  -0.994
## plants$same_islandyes                          0.10869    0.03145   3.456
## plants$same_pol_modeyes:plants$same_islandyes  0.16533    0.05394   3.065
##                                               Pr(>|t|)    
## (Intercept)                                   6.01e-13 ***
## plants$same_pol_modeyes                       0.321466    
## plants$same_islandyes                         0.000654 ***
## plants$same_pol_modeyes:plants$same_islandyes 0.002441 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1894 on 227 degrees of freedom
## Multiple R-squared:  0.1841, Adjusted R-squared:  0.1733 
## F-statistic: 17.07 on 3 and 227 DF,  p-value: 4.96e-10
# pollinators comparisons
pol <- comp4[((comp4$pol_plante_sp1=="pollinators") & (comp4$pol_plante_sp2=="pollinators")), ]
pol$cat <- paste(pol$type_sp1,pol$type_sp2)
pol$same_group <- "same"
pol$same_group[pol$cat =="bat hummingbird" ] <- "diff"

AIC(lm(pol$D ~ pol$same_group),
    lm(pol$D ~ pol$same_island),
    lm(pol$D ~ pol$same_group+pol$same_island),
    lm(pol$D ~ pol$same_group*pol$same_island))
##                                              df       AIC
## lm(pol$D ~ pol$same_group)                    3 -110.3350
## lm(pol$D ~ pol$same_island)                   3 -117.7073
## lm(pol$D ~ pol$same_group + pol$same_island)  4 -119.4621
## lm(pol$D ~ pol$same_group * pol$same_island)  5 -118.4713
summary(lm(pol$D ~ pol$same_group+pol$same_island))
## 
## Call:
## lm(formula = pol$D ~ pol$same_group + pol$same_island)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36074 -0.13956 -0.02099  0.12025  0.54992 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.26640    0.02252  11.831  < 2e-16 ***
## pol$same_groupsame  0.04728    0.02446   1.933 0.054481 .  
## pol$same_islandyes  0.08214    0.02449   3.354 0.000932 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1848 on 228 degrees of freedom
## Multiple R-squared:  0.05713,    Adjusted R-squared:  0.04886 
## F-statistic: 6.907 on 2 and 228 DF,  p-value: 0.001224

3. Fit evolutionary models on niches variables (identity and breadth)

This section is used in the material and methods section 2.4. Bioclimatic niche evolution among plants

library(phytools)
library(geiger)
library (picante)
library(caper)
library(mvMORPH)


#charge the phylogenetic tree
spe.tree <- read.nexus(file.choose()) # choose the file phylogenies.trees

# charge niche values
niche_id <- read.table("sample_pc_par_axe_par_sp_toutes_iles.txt",h=T,sep="\t") # to analyse niche identity
niche_size <- read.table("sample_sd_par_axe_par_sp_toutes_iles.txt",h=T,sep="\t") # to analyse niche breadth (standard deviation)

# list of species to keep, which have values for niche(at least 5 pixels)/phylogeny/pollination
data_pol_phylo <- read.table("data_species",sep="\t",h=T)

# create data_frame for all the data
data_global <- merge(cbind(niche_id,niche_size$sd),data_pol_phylo, by.x="espece",by.y="species")
names(data_global)[5] <- "sd"

fit_model <- function(Variables, Axes){
  alea<-sample(x=(1:length(spe.tree)),size=1000)
  
  res <- data.frame(AIC_BM1=vector(length=1000),
                    AIC_BM3=vector(length=1000),
                    AIC_BM4=vector(length=1000),
                    AIC_OU1=vector(length=1000),
                    AIC_OU3=vector(length=1000),
                    AIC_OU4=vector(length=1000),
                    alpha_OU1=vector(length=1000),
                    sigma_OU1=vector(length=1000))
  
  
  for (arbre in 1:1000){
    tree<-spe.tree[[alea[arbre]]]
    to.drop<-tree$tip.label[tree$tip.label%in%data_global$sp_phylo==F]
    tree<-drop.tip(tree,to.drop)
    
    data_rep <- data_global[(data_global$echantillon==arbre) & (data_global$axe==Axes),]
    data_rep <- data_rep[match(tree$tip.label,data_rep$sp_phylo),] 
    rownames(data_rep)<-data_rep$sp_phylo
    
    island <- as.vector(data_rep$island)
    names(island) <- data_rep$sp_phylo
    island["Gcubensis"] <- sample (c("cuba","hispaniola"),size=1)
    island["Gfruticosa"] <- sample (c("cuba","hispaniola"),size=1)
    island["Greticulata"] <- sample (c("cuba","hispaniola"),size=1)
    island["Rauriculatum"] <- sample (c("puertorico","hispaniola"),size=1)
    
    pol_proba <- as.matrix(data_rep[,c("proba_H","proba_B","proba_G")])
    
    tree.pol.simmap<-make.simmap(tree,x=pol_proba,nsim=1,model="ARD")
    tree.ile.simmap<-make.simmap(tree,x=island,nsim=1,model="SYM")
    
    tryCatch({ mvOU.mvMORPH.1regime.pol <- mvOU(tree=tree.pol.simmap,data=data_rep[,names(data_rep)%in%Variables],model="OU1")
    res$AIC_OU1[arbre] <- mvOU.mvMORPH.1regime.pol$AIC
    res$alpha_OU1[arbre] <-  mvOU.mvMORPH.1regime.pol$param$alpha
    res$sigma_OU1[arbre] <-  mvOU.mvMORPH.1regime.pol$param$sigma},
    error=function(e){res$AIC_OU1[arbre] <- -999
    res$alpha_OU1[arbre] <- -999
    res$sigma_OU1[arbre] <- -999})
    
    tryCatch({ mvOU.mvMORPH.3regimes.pol <- mvOU(tree=tree.pol.simmap,data=data_rep[,names(data_rep)%in%Variables],model="OUM")
    res$AIC_OU3[arbre] <- mvOU.mvMORPH.3regimes.pol$AIC},
    error=function(e){res$AIC_OU3[arbre] <- -999})
    
    tryCatch({ mvBM.mvMORPH.1regime.pol <- mvBM(tree=tree.pol.simmap,data=data_rep[,names(data_rep)%in%Variables],model="BM1")
    res$AIC_BM1[arbre] <- mvBM.mvMORPH.1regime.pol$AIC},
    error=function(e){res$AIC_BM1[arbre] <- -999})
    
    tryCatch({ mvBM.mvMORPH.3regimes.pol <- mvBM(tree=tree.pol.simmap,data=data_rep[,names(data_rep)%in%Variables],model="BMM")
    res$AIC_BM3[arbre] <- mvBM.mvMORPH.3regimes.pol$AIC},
    error=function(e){res$AIC_BM3[arbre] <- -999})
    
    tryCatch({ mvOU.mvMORPH.4regimes.pol <- mvOU(tree=tree.ile.simmap,data=data_rep[,names(data_rep)%in%Variables],model="OUM")
    res$AIC_OU4[arbre] <- mvOU.mvMORPH.4regimes.pol$AIC},
    error=function(e){res$AIC_OU4[arbre] <- -999})
    
    tryCatch({ mvBM.mvMORPH.4regimes.pol <- mvBM(tree=tree.ile.simmap,data=data_rep[,names(data_rep)%in%Variables],model="BMM")
    res$AIC_BM4[arbre] <- mvBM.mvMORPH.4regimes.pol$AIC},
    error=function(e){res$AIC_BM4[arbre] <- -999})
  } 
  write.table(res,file=paste("resAIC_",Variables[1],"_",Variables[2],"_",Axes[1],"_",Axes[2],".txt"),sep="\t",col.names=T,row.names=F)
}

fit_model(Variables=c("valeur_moyenne"),Axes=1)

fit_model(Variables=c("valeur_moyenne","sd"),Axes=1)

fit_model(Variables=c("valeur_moyenne"),Axes=2)

fit_model(Variables=c("valeur_moyenne","sd"),Axes=2)

fit_model(Variables=c("sd"),Axes=1)

fit_model(Variables=c("sd"),Axes=2)

fit_model(Variables=c("sd"),Axes=c(1,2))

fit_model(Variables=c("valeur_moyenne"),Axes=c(1,2))

4. Compute AICc weights for evolutionary models

mat_res <- read.table("resAIC_ sd _ NA _ 1 _ NA .txt",sep="\t",h=T)

# suppress rows with some 0
mat_res[mat_res==0] <- NA
mat_res <- na.omit(mat_res)

# compute AICc for univariate analysis -----
mat_res$AICc_BM1 <- mat_res$AIC_BM1+((2*2*3)/(35-2-1))
mat_res$AICc_BM3 <- mat_res$AIC_BM3+((2*4*5)/(35-4-1))
mat_res$AICc_BM4 <- mat_res$AIC_BM4+((2*5*6)/(35-5-1))
mat_res$AICc_OU1 <- mat_res$AIC_OU1+((2*3*4)/(35-3-1))
mat_res$AICc_OU3 <- mat_res$AIC_OU3+((2*5*6)/(35-5-1))
mat_res$AICc_OU4 <- mat_res$AIC_OU4+((2*6*7)/(35-6-1))

# compute AICc for multivariate analysis -----
mat_res$AICc_BM1 <- mat_res$AIC_BM1+((2*2*3)/(70-2-1))
mat_res$AICc_BM3 <- mat_res$AIC_BM3+((2*4*5)/(70-4-1))
mat_res$AICc_BM4 <- mat_res$AIC_BM4+((2*5*6)/(70-5-1))
mat_res$AICc_OU1 <- mat_res$AIC_OU1+((2*3*4)/(70-3-1))
mat_res$AICc_OU3 <- mat_res$AIC_OU3+((2*5*6)/(70-5-1))
mat_res$AICc_OU4 <- mat_res$AIC_OU4+((2*6*7)/(70-6-1))


# compute AICc weights
mat_res$AICc_min <- apply(mat_res[,9:14],MARGIN=1,FUN=min)

mat_res$delta_BM1 <- mat_res$AICc_BM1 - mat_res$AICc_min
mat_res$delta_BM3 <- mat_res$AICc_BM3 - mat_res$AICc_min
mat_res$delta_BM4 <- mat_res$AICc_BM4 - mat_res$AICc_min
mat_res$delta_OU1 <- mat_res$AICc_OU1 - mat_res$AICc_min
mat_res$delta_OU3 <- mat_res$AICc_OU3 - mat_res$AICc_min
mat_res$delta_OU4 <- mat_res$AICc_OU4 - mat_res$AICc_min

S <- (exp(-mat_res$delta_BM1/2))+
  (exp(-mat_res$delta_BM3/2))+
  (exp(-mat_res$delta_BM4/2))+
  (exp(-mat_res$delta_OU1/2))+
  (exp(-mat_res$delta_OU3/2))+
  (exp(-mat_res$delta_OU4/2))

mat_res$w_BM1 <- (exp(-mat_res$delta_BM1/2))/S
mat_res$w_BM3 <- (exp(-mat_res$delta_BM3/2))/S
mat_res$w_BM4 <- (exp(-mat_res$delta_BM4/2))/S
mat_res$w_OU1 <- (exp(-mat_res$delta_OU1/2))/S
mat_res$w_OU3 <- (exp(-mat_res$delta_OU3/2))/S
mat_res$w_OU4 <- (exp(-mat_res$delta_OU4/2))/S
library(ggplot2)
require(reshape2)
W <- melt(mat_res)
ggplot(data =W[W$variable%in%c("w_BM1","w_BM3","w_BM4","w_OU1","w_OU3","w_OU4"),] , aes(x=variable, y=value)) + geom_boxplot(aes(fill=variable))