##SCRIPTS USED ###PHYLUCE WORKFLOW #Trim and clean sequences illumiprocessor --input rawreads --output clean-fastq --config anthozoa.config --cores 32 --trimmomatic /data/mcfadden/aquattrini/PROGRAMS/Trimmomatic-0.35/trimmomatic-0.35.jar #./rawreads/filename example: ANT12_CTCCTAGA_L001_R1_001.fastq.gz ANT8_CGTACGAA_L001_R1_001.fastq.gz ANT12_CTCCTAGA_L001_R2_001.fastq.gz ANT8_CGTACGAA_L001_R2_001.fastq.gz #anthozoa.config example [adapters] i7:GATCGGAAGAGCACACGTCTGAACTCCAGTCAC*ATCTCGTATGCCGTCTTCTGCTTG i5:AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT*GTGTAGATCTCGGTGGTCGCCGTATCATT [tag sequences] i5-N501:AAGCCACA i5-N502:AGAACGAG i5-N503:CGTACGAA i5-N504:CTCCTAGA i5-N505:CTCGTCTT i5-N506:GACGAATG i5-N507:GCATGTCT i5-N508:GGTCAGAT i7-N703:AGGTTCGA i7-N702:ACTCCATC i7-N701:AAGAAGGC i7-N708:GGTGTCTT i7-N707:GCATGTCT i7-N704:CGAAGAAC i7-N709:TGTGACTG i7-N705:CGAGACTA i7-N706:GACATGGT [tag map] ANT12_CTCCTAGA:i7-N706,i5-N504 ANT8_CGTACGAA:i7-N701,i5-N503 [names] ANT12_CTCCTAGA:ANT12a ANT8_CGTACGAA:ANT8a ###Spades Assembly nohup /data/mcfadden/aquattrini/PROGRAMS/SPAdes-3.9.0-Linux/bin/spades.py -o spades-assemblies/ANT120 -1 cleanreads/ANT120/split-adapter-quality-trimmed/ANT120-READ1.fastq.gz -2 cleanreads/ANT120/split-adapter-quality-trimmed/ANT120-READ2.fastq.gz --careful --threads 42 --cov-cutoff 2; #example of matching uce probes to contigs, extracting and aligning them, and then running them through RAxML phyluce_assembly_match_contigs_to_probes --contigs spades-assemblies/contigs --probes ../anthozoa-v1-probes.final.fasta --output uce-search-results --min-coverage 70 --min-identity 70 phyluce_assembly_get_match_counts --locus-db uce-search-results/probe.matches.sqlite --taxon-list-config taxon-set.conf --taxon-group 'all' --incomplete-matrix --output taxon-sets/all/all-taxa-incomplete.conf #example of taxon-set.conf [samples] ANT12:/data/mcfadden/aquattrini/clean-fastq/ANT12/split-adapter-quality-trimmed/ ANT8:/data/mcfadden/aquattrini/clean-fastq/ANT8/split-adapter-quality-trimmed/ phyluce_assembly_get_fastas_from_match_counts --contigs ../../spades-assemblies/contigs --locus-db ../../uce-search-results/probe.matches.sqlite --match-count-output all-taxa-incomplete.conf --output all-taxa-incomplete.fasta --incomplete-matrix all-taxa-incomplete.incomplete --log-path log phyluce_align_seqcap_align --fasta all-taxa-incomplete.fasta --output mafft-nexus-internal-trimmed --taxa 238 --aligner mafft --cores 12 --incomplete-matrix --output-format fasta --no-trim --log-path log phyluce_align_get_gblocks_trimmed_alignments_from_untrimmed --alignments mafft-nexus-internal-trimmed --output mafft-nexus-internal-trimmed-gblocks --cores 12 --log log --b1 0.5, --b2-0.5, --b3 10, --b4 5 phyluce_align_get_align_summary_data --alignments mafft-nexus-internal-trimmed-gblocks --cores 12 --log-path log --show-taxon-counts phyluce_align_remove_locus_name_from_nexus_lines --alignments mafft-nexus-internal-trimmed-gblocks --output mafft-nexus-internal-trimmed-gblocks-clean --cores 12 --log-path log phyluce_align_get_only_loci_with_min_taxa --alignments mafft-nexus-internal-trimmed-gblocks-clean --taxa 234 --percent 0.50 --output mafft-nexus-internal-trimmed-gblocks-clean-50p --cores 12 --log-path log phyluce_align_format_nexus_files_for_raxml--alignments mafft-nexus-internal-trimmed-gblocks-clean-50p --output mafft-nexus-internal-trimmed-gblocks-clean-50p-raxml --charsets --log-path log ###RAxML nohup raxmlHPC-PTHREADS -T 32 -f a -m GTRGAMMA -p 123415 -x 123435 -# 200 -s ../mafft-nexus-internal-trimmed-gblocks-clean-50p-raxml/mafft-nexus-internal-trimmed-gblocks-clean-50p.phylip -n final.50p ###ExABAYES nohup mpirun -np 12 exabayes -f ../mafft-all-50p-edit-raxml/mafft-all-50p.phylip -n 50p.run2 -q aln.part -s 24007 -c config.nexus -R 4 consense -f ExaBayes_topologies.run* -n 50p.run2 -b 0.25 postProcParam -f ExaBayes_parameters.run* -n 50p.run2 #example of a config.nexus file for exabayes #NEXUS begin run; numruns 4 numgen 1e9 end; #example of aln.part file for exabayes DNA, p1=1-278819 ###SPECIES TREE METHODS #make gene trees using raxml-ng or raxml-pthreads as above, with gene trees from 75% data matrices #concatenate trees together into 1 file cat uce* >out.tre #remove long branches from gene trees using TreeShrink run_treeshrink.py -t out.tre -o treeshr #remove strange characters in output sed 's/'\''/ /g' treeshr/out_0.05.tre >treeshr.tre #prune low support (<30) branches using nw_ed LD_LIBRARY_PATH=/data/mcfadden/aquattrini/PROGRAMS/newick-utils-1.6/lib export LD_LIBRARY_PATH /data/mcfadden/aquattrini/PROGRAMS/newick-utils-1.6/bin/nw_ed treeshr.tre 'i & b<=30' o > treeshr.nobs.tre #remove strange characters in output sed "s/'//g" treeshr.nobs.tre >treeshr.nobs.v2.tre #run astral java -jar /data/mcfadden/aquattrini/PROGRAMS/ASTRAL/Astral/astral.5.6.3.jar -i treeshr.nobs.v2.tre -o astral.all.75p.tre #root astral tree /data/mcfadden/aquattrini/PROGRAMS/phyx/src/pxrr -g aamask,hmmask,ccmask,cxmask -t astral.all.75p.tre -o astral.all.75p.r.tre #root raxml tree /data/mcfadden/aquattrini/PROGRAMS/phyx/src/pxrr -g aamask,hmmask,ccmask,cxmask -t raxml.50p.final.tre -o raxml.50p.final.r.tre ###FIND CLOCK-LIKE GENES #root all gene trees using pxrr.sh script #!/bin/bash for i in uce*; do /data/mcfadden/aquattrini/PROGRAMS/phyx/src/pxrr -g aamask,hmmask,ccmask,cxmask -s -o $i.r.tree -t $i done; #run sortadate on rooted gene trees python /data/mcfadden/aquattrini/PROGRAMS/SortaDate/src/get_var_length.py ./ --flend .r.tree --outf sortadate --outg cxmask,ccmask,aamask,hmmask #run sortadate on rooted species tree python /data/mcfadden/aquattrini/PROGRAMS/SortaDate/src/get_bp_genetrees.py ./raxml.50p.final.r.tre --flend .r.tree --outf sortadate2 #run sortadate to combine two runs python /data/mcfadden/aquattrini/PROGRAMS/SortaDate/src/combine_results.py sortadate sortadate2 --outf sortadatecomb # run sortadate to Sort and get the list of the good genes python /data/mcfadden/aquattrini/PROGRAMS/SortaDate/src/get_good_genes.py sortadatecomb --max 50 --order 3,1,2 --outf out.txt ###DATING TREE USING Penalised Likelihood Method library(ape) library(ggplot2) library(ggtree) library(phytools) tree=read.tree("/Users/quattrinia/Projects/UCEPRoject_FinalResults/FinalTrees/raxml.50p.final.r.tre") print(tree) ggtree(tree) + geom_text2(aes(subset=!isTip, label=node), hjust=-.3) + geom_tiplab() getMRCA(tree, tip=c('ANT28', 'ANT79')) #helioporacea getgetMRCA(tree, tip=c('ANT177', 'ANT29')) #keartoisdinae getMRCA(tree, tip=c('ANT255', 'ANT180', 'ANT95')) #caryophylidae getMRCA(tree, tip=c('atmask', 'amilmask', 'admask', 'ANT13a', 'ANT163')) #acropora getMRCA(tree, tip=c('ANT192', 'ANT197', 'ANT182', 'ANT183')) #oculina getMRCA(tree, tip=c('ANT283', 'ANT284', 'ANT212', 'ANT185', 'ANT118')) #dendrophylidae tree2=chronopl(tree, lambda=1, age.min=c(136,16,160,55.8,127,56,520), age.max=c(161,23,160,58.7,127,56,741), node=c(457,462,293,269,262,282,239)) plotTree(tree2) write.tree(tree2) ###BEAST Analyses #See xml files; all ran on CIPRES portal #Tree Annotator produced "MCC.withlabs.final.nex" ####ANCESTRAL STATE CHARACTERIZATION library(phytools) library(ggtree) x=read.csv("/Users/quattrinia/Projects/UCEPRoject_FinalResults/skeletal_traits.final.csv", row.names=1) T=read.nexus("/Users/quattrinia/Projects/UCEPRoject_FinalResults/MCC.withlabs.final.nex") plotTree(T, fsize=0.3) skeletal<-setNames(x[,1],rownames(x)) cols(=setNames(c("blue", "green", "orange", "yellow", "grey", "pink", "red", "light blue", "purple", "dark green"), levels(skeletal)) mtrees<-make.simmap(T,skeletal,model="ER",nsim=100) pd<-summary(mtrees,plot=FALSE) plot(mtrees[[1]],cols,type="fan",fsize=0.5,ftype="i") nodelabels(pie=pd$ace,piecol=cols,cex=0.2) ###95% HPD DISTRIBUTIONS (R Code Here:https://github.com/johnjschenk/Rcode/blob/master/NodeAgeDensity.R) library(ape) library(coda) #resampled trees, output from BEAST to include ~13K trees. beastrees=read.nexus(file="/Users/quattrinia/Projects/UCEPRoject_FinalResults/trees-resample50k.trees") AgeDensity <- function(phy, species1, species2){ NodeNumber <- mrca(phy)[species1, species2] ages <- branching.times(phy)[as.character(NodeNumber)] return(as.numeric(ages)) } NodeAges_antho <- unlist(lapply(.uncompressTipLabel(beastrees), AgeDensity, "ANT24", "admask")) HPD_antho<- HPDinterval(as.mcmc(NodeAges_antho), prob = 0.95) #output example HPD_antho lower upper var1 639.0152 900.1042 attr(,"Probability") [1] 0.9500037 par(bg=NA) #transparent background +minor.tick(nx=8) #add minor ticks ANTHplot=plot(density(as.numeric(NodeAges_antho[which(NodeAges_antho > HPD_antho[1, 1] & NodeAges_antho < HPD_antho[1, 2])])), main="", las=3, xlab="", lwd=2, ylim=c(0, 0.04), xlim=c(1000,0), ylab="") Hexacorallia ###DIVERSIFICATION RATE ANALYSIS WITH REVBaYEs #Code for Episodic Rate Diversification Analysis between traditional aragonitc and calcitic sea time intervals T=readTrees("MCC.withlabs.final.nex")[1] taxa <- T.taxa() moves = VectorMoves() monitors = VectorMonitors() NUM_INTERVALS = 9 SD = abs(0.587405 / NUM_INTERVALS) speciation_sd ~ dnExponential( 1.0 / SD) extinction_sd ~ dnExponential( 1.0 / SD) moves.append( mvScale(speciation_sd,weight=5.0) ) moves.append( mvScale(extinction_sd,weight=5.0) ) log_speciation[1] ~ dnUniform(-10.0,10.0) log_speciation[1].setValue(0.0) log_extinction[1] ~ dnUniform(-10.0,10.0) log_extinction[1].setValue(-1.0) moves.append( mvSlide(log_speciation[1], weight=2) ) moves.append( mvSlide(log_extinction[1], weight=2) ) speciation[1] := exp( log_speciation[1] ) extinction[1] := exp( log_extinction[1] ) for (i in 1:NUM_INTERVALS) { index = i+1 log_speciation[index] ~ dnNormal( mean=log_speciation[i], sd=speciation_sd ) log_extinction[index] ~ dnNormal( mean=log_extinction[i], sd=extinction_sd ) moves.append( mvSlide(log_speciation[index], weight=2) ) moves.append( mvSlide(log_extinction[index], weight=2) ) speciation[index] := exp( log_speciation[index] ) extinction[index] := exp( log_extinction[index] ) } moves.append( mvVectorSlide(log_speciation, weight=10) ) moves.append( mvVectorSlide(log_extinction, weight=10) ) moves.append( mvShrinkExpand( log_speciation, sd=speciation_sd, weight=10 ) ) moves.append( mvShrinkExpand( log_extinction, sd=extinction_sd, weight=10 ) ) interval_times <- v(40, 66, 180, 200, 358, 510, 542, 600) rho <- T.ntips()/7500 root_time <- T.rootAge() timetree ~ dnEpisodicBirthDeath(rootAge=T.rootAge(), lambdaRates=speciation, lambdaTimes=interval_times, muRates=extinction, muTimes=interval_times, rho=rho, samplingStrategy="uniform", condition="survival", taxa=taxa) timetree.clamp(T) mymodel = model(speciation) monitors.append( mnModel(filename="output_500k_octo_AC/mcc_EBD.log",printgen=10, separator = TAB) ) monitors.append( mnFile(filename="output_500k_octo_AC/mcc_EBD_speciation_rates.log",printgen=10, separator = TAB, speciation) ) monitors.append( mnFile(filename="output_500k_octo_AC/mcc_EBD_speciation_times.log",printgen=10, separator = TAB, interval_times) ) monitors.append( mnFile(filename="output_500k_octo_AC/mcc_EBD_extinction_rates.log",printgen=10, separator = TAB, extinction) ) monitors.append( mnFile(filename="output_500k_octo_AC/mcc_EBD_extinction_times.log",printgen=10, separator = TAB, interval_times) ) monitors.append( mnScreen(printgen=1000, extinction_sd, speciation_sd) ) mymcmc = mcmc(mymodel, monitors, moves, nruns=2, combine="mixed") mymcmc.run(generations=500000, tuningInterval=200) #Code for ERD analysis between mass extinction and reef crises, same as above but with the following intervals NUM_INTERVALS = 10 interval_times <- v(55.8, 65.5, 183 , 199, 251, 374, 444, 488, 542) #Code for Branch Rate Diversification Analysis -no shift in extinction rate- with pruned tips to have 1 species per genus observed_phylogeny=readTrees("MCC.pruned.final.nex")[1] taxa <- observed_phylogeny.taxa() root <- observed_phylogeny.rootAge() tree_length <- observed_phylogeny.treeLength() moves = VectorMoves() monitors = VectorMonitors() NUM_RATE_CATEGORIES = 8 NUM_TOTAL_SPECIES = 7500 H = 0.587405 speciation_mean ~ dnLoguniform( 1E-6, 1E2) moves.append( mvScale(speciation_mean, lambda=1, tune=true, weight=2.0) ) speciation_sd ~ dnExponential( 1.0 / H ) moves.append( mvScale(speciation_sd, lambda=1, tune=true, weight=2.0) ) speciation := fnDiscretizeDistribution( dnLognormal(ln(speciation_mean), speciation_sd), NUM_RATE_CATEGORIES ) extinction_mean.setValue( speciation_mean / 2.0 ) moves.append( mvScale(extinction_mean, lambda=1, tune=true, weight=2.0) ) extinction := rep( extinction_mean, NUM_RATE_CATEGORIES ) event_rate ~ dnUniform(0.0, 100.0/tree_length) moves.append( mvScale(event_rate, lambda=1, tune=true, weight=2.0) ) rate_cat_probs <- simplex( rep(1, NUM_RATE_CATEGORIES) ) rho <- observed_phylogeny.ntips() / NUM_TOTAL_SPECIES timetree ~ dnCDBDP( rootAge = root, speciationRates = speciation, extinctionRates = extinction, Q = fnJC(NUM_RATE_CATEGORIES), delta = event_rate, pi = rate_cat_probs, rho = rho, condition = "time" ) timetree.clamp(observed_phylogeny) mymodel6 = model(speciation) monitors.append( mnModel(filename="output_500K_BRDv2/mcc_BDS.log",printgen=1, separator = TAB) ) monitors.append( mnStochasticBranchRate(cdbdp=timetree, printgen=1, filename="output_500K_BRD2/mcc_BDS_rates.log") ) monitors.append( mnScreen(printgen=10, event_rate, speciation_mean, extinction_mean) ) mymcmc = mcmc(mymodel6, monitors, moves, nruns=2, combine="mixed") mymcmc.run(generations=2500,tuning=200) ####AIC TO FIND BEST Episodic Diversification Rate model #Mass extinction intervals xME=read.table("ME.txt") myvec=as.vector(xME$V1) aicm(myvec) [1] 4773.978 #Aragonite-calcite intervals xAC=read.table("AC.txt") myvec=as.vector(xAC$V1) aicm(myvec) [1] 4774.933 names(AIC.scores)=c("ME", "AC") aicw(AIC.scores) aicw(AIC.scores) fit delta w ME 4773.978 0.355 0.3552953 AC 4774.933 1.310 0.2204011 ###Plot Episodic Diversification Rates using R library(ggtree) library(RevGadgets) library(devtools) library(ggplot2) tree=read.nexus("/Users/quattrinia/Manuscripts_and_Reports/UCE_Anthozoa/DraftTrees/MCC.withlabs.final.nex") rev_outAC <- rev.process.div.rates(speciation_times_file = "/Users/quattrinia/Projects/UCEProject_2015/revbayes_output_500k_AC/mcc_EBD_speciation_times.log", speciation_rates_file ="/Users/quattrinia/Projects/UCEProject_2015/revbayes_output_500k_AC/mcc_EBD_speciation_rates.log", extinction_times_file = "/Users/quattrinia/Projects/UCEProject_2015/revbayes_output_500k_AC/mcc_EBD_extinction_times.log", extinction_rates_file = "/Users/quattrinia/Projects/UCEProject_2015/revbayes_output_500k_AC/mcc_EBD_extinction_rates.log", tree=tree, burnin=0.25, numIntervals=100) pdf("EBD_AC.pdf") par(mfrow=c(2,2)) rev.plot.div.rates(rev_outAC,use.geoscale=FALSE) #####Plotting Div Rate tree and density plot library(ape) library(tidyverse) library(BAMMtools) library(coda) library(phytools) library(scales) library(RevGadgets) library(ggtree) library(treeio) library(ggridges) library(scales) # read in tree tr <- read.tree("Coral_plotting/MCC.final.pruned.final.tre") ### Plotting Tip rate density plot### events <- read.table("Coral_plotting/mcc_BDS_rates.log", header = T, stringsAsF = F) lambda <- events[,3:409] mu <- events[,410:816] net_div <- lambda - mu lambda_mean <- colMeans(lambda) mu_mean <- colMeans(mu) net_div_mean <- colMeans(net_div) lambda_tips <- lambda_mean[1:204] mu_tips <- mu_mean[1:204] net_div_tips <- lambda_tips - mu_tips # plot tree my_tree_file = "Coral_plotting/MCC.final.pruned.final.tre" my_branch_rates_file = "Coral_plotting/mcc_BDS_rates.log" tree_plot = plot_branch_rates_tree(tree_file=my_tree_file,branch_rates_file=my_branch_rates_file) # colour Corals = c("#5D3E22", "#4A047B", "#4FBA66", "#008AC5","#B05B9E") # reverse axis tree_plot2 <- revts(tree_plot) # get space in plot tree_plot2 + coord_cartesian(xlim = c(-900,400),ylim = c(-5,Ntip(tr)+2), expand = FALSE) + scale_x_continuous(breaks=seq(-800,0,100), labels=abs(seq(800,0,-100))) + theme_tree2() + theme(legend.position = c(.12, .87), legend.box.background = element_blank(),legend.key = element_blank()) -> tree_plot2 # alter gradient fill tree_plot2 + scale_colour_gradientn(breaks = c(0.010, 0.012, 0.014, 0.016, 0.018), colours = c("#5D3E22", "#4A047B", "#4FBA66", "#008AC5","#B05B9E")) -> tree_plot3 #save ggsave("Rate_plot.pdf", tree_plot3, height = 10, width = 6, useDingbats = FALSE) ####### Plot tip rate density # read in function to plot tip rates (from A Siqueira) source("Coral_plotting/Plotting_functions_Siqueira.R") ### Plotting events <- read.table("Coral_plotting/mcc_BDS_rates.log", header = T, stringsAsF = F) lambda <- events[,3:409] mu <- events[,410:816] net_div <- lambda - mu lambda_mean <- colMeans(lambda) mu_mean <- colMeans(mu) net_div_mean <- colMeans(net_div) lambda_tips <- lambda_mean[1:204] mu_tips <- mu_mean[1:204] net_div_tips <- lambda_tips - mu_tips dens.tips = density(net_div_tips) colorbreaks <- assignColorBreaks(net_div_tips, 64, spex = "netdiv", logcolor = F, "linear", JenksSubset = 20000) colorobj <- colorMap2(net_div_tips, pal = "Corals", colorbreaks, logcolor = F, color.interval = NULL) col.dens = colorobj$colsdensity$coldens pdf("Coral_plotting/Tip_density_corals.pdf", height = 8, width = 10) plot(dens.tips, bty = "n", xlim = c(0.005,0.02), ylim = c(0,500), col = "Black", xlab="", ylab="", yaxt="n", xaxt="n", main="", lwd=0.5, zero.line = F) axis(side = 1, at = seq(0.005,0.02,0.005), lwd = 1.5, cex.axis = 1, tcl = -0.3, mgp = c(3, 0.5, 0)) mtext(side = 1, text = "Net Diversification", line = 1.8, cex = 1) axis(side = 2, at = seq(0,500,100), lwd = 1.5, cex.axis = 1, tcl = -0.3, mgp = c(3, 0.5, 0), las = 1, labels = as.character(seq(0,50,10))) mtext(side = 2, text = "Tip density", line = 1.8, cex = 1) for(i in 1:length(col.dens)){ if(i == 1){ polygon(c(0,dens.tips$x[i],dens.tips$x[i]), c(0,dens.tips$y[i],0), col = col.dens[i], lty = 0) } else if (i == length(col.dens)){ polygon(c(dens.tips$x[i-1],dens.tips$x[i],dens.tips$x[i]), c(0,dens.tips$y[i],0), col = col.dens[i], lty = 0) } else { polygon(c(dens.tips$x[i-1],dens.tips$x[i-1],dens.tips$x[i],dens.tips$x[i+1],dens.tips$x[i+1]), c(0,dens.tips$y[i-1],dens.tips$y[i],dens.tips$y[i+1],0), col = col.dens[i], lty = 0) } } dev.off() =