OSD Chlorophyta - Sci Rep 2018

1 Aim

  • Analysis of OSD Chlorophyta data for Sci Report paper 2018

2 Initialize.

This file defines all the necessary libraries and variables

  source('OSD Chlorophyta_init.R', echo=FALSE)

3 BLAST of Chlorophyta OTUs

3.1 Run BLAST on server

Using a qsub run the following code


# submitted with
# qsub -q short.q qsub_blast_osd.sh

DIR_PROJECT="/projet/umr7144/dipo/vaulot/metabarcodes/OSD/chlorophyta_margot_2018/"

cd $DIR_PROJECT

FILE=OSD_LGC_Chlorophyta

FASTA=$DIR_PROJECT$FILE".fasta"
BLAST_XLM=$DIR_PROJECT$FILE".xml"
BLAST_TSV=$DIR_PROJECT$FILE".tsv"
OUT_FMT="6 qseqid sseqid sacc stitle sscinames staxids sskingdoms sblastnames pident slen length mismatch gapopen qstart qend sstart send evalue bitscore"

blastn -max_target_seqs 100 -evalue 1.00e-10 -query $FASTA -out $BLAST_TSV -db /db/blast/all/nt -outfmt "$OUT_FMT"

3.2 Process BLAST output

We call the blast_18S function to create two files

  • a modified BLAST output file which includes additional coolumns
    • kingdom -> species : PR2 taxonomy for those accession numbers that are present in PR2
    • hit_rank : the rank of the hit based on decreasing % identity and decreasing bit scores
    • uncultured : TRUE if the hit corresponds to an uncultured item
    • hit_lineage : GenBank taxonomy of the hit
  • a summary file with one line per query that contains three sets of columns
    • The top hit (column with prefix hit_top_)
    • The top hit for which a PR2 sequence is available (columns starting with hit_pr2_).
    • The top hit corresponding to a culture or an isolate (columns starting with hit_cult_).
blast_18S_reformat("../blast/OSD_LGC_Chlorophyta.tsv")

4 Read data

4.1 Read excel file

otu <- read_excel(path = "../supplementary data/Supplementary_dataS2 version 5.0.xlsx", 
    sheet = "otus")
samples <- read_excel(path = "../supplementary data/Supplementary_dataS2 version 5.0.xlsx", 
    sheet = "samples")
metadata <- read_excel(path = "../supplementary data/Supplementary_dataS2 version 5.0.xlsx", 
    sheet = "metadata")

4.2 Merge otu table with BLAST file

blast_summary_file <- "../blast/OSD_LGC_Chlorophyta.blast.summary.tsv"
blast_summary <- read_tsv(blast_summary_file, guess_max = 10000) %>% mutate(OTUNumber = str_sub(query_id, 
    start = 1, end = 8))

otu <- left_join(otu, blast_summary)

5 Summarize at class level

5.1 Compute different dataframes

samples_surface <- samples %>% filter(Surface_sample == 1)
sample_number <- nrow(samples_surface)
glue::glue("Number of surface samples: {sample_number}")
Number of surface samples: 141
otu_long <- otu %>% select(OTUNumber, Class:Species, contains("OSD")) %>% gather(station, 
    n_seq, contains("OSD")) %>% filter(station %in% samples_surface$Sample)

otu_seq <- otu_long %>% group_by(OTUNumber) %>% summarise(n_seq_otu = sum(n_seq)) %>% 
    filter(n_seq_otu > 0)

glue::glue("Number of OTUs for surface samples: {nrow(otu_seq)}")
Number of OTUs for surface samples: 749
glue::glue("Number of reads for surface samples: {sum(otu_seq$n_seq_otu)}")
Number of reads for surface samples: 313240
class_seq <- otu_long %>% group_by(Class) %>% summarise(n_seq_class = sum(n_seq))

# write_tsv(class_seq, 'OSD Chlorophyta sequences.tsv')

station_seq <- otu_long %>% group_by(station) %>% summarise(n_seq_station = sum(n_seq))
sample_number_above100 <- nrow(filter(station_seq, n_seq_station >= 100))
glue::glue("Number of surface samples with more than 100 Chlorophyta reads: {sample_number_above100}")
Number of surface samples with more than 100 Chlorophyta reads: 122
otu_long <- otu_long %>% left_join(otu_seq) %>% left_join(class_seq) %>% left_join(station_seq)

class_stations <- otu_long %>% group_by(Class, station, n_seq_station) %>% summarise(n_seq = sum(n_seq)) %>% 
    mutate(pct_seq = n_seq/n_seq_station * 100)

class_stations_metadata <- class_stations %>% mutate(OSD_station = as.numeric(str_extract(station, 
    "[0-9]{1,3}"))) %>% left_join(metadata) %>% mutate(Latitude = as.numeric(Latitude), 
    Longitude = as.numeric(Longitude))

# Summaries at class level

class_pct <- class_stations %>% group_by(Class) %>% summarise(mean_pct_seq_class = mean(pct_seq))

class_presence <- class_stations %>% filter(n_seq_station >= 100) %>% filter(n_seq > 
    0) %>% group_by(Class) %>% summarise(n_stations_present = n(), pct_stations_present = 100 * 
    n()/sample_number_above100)

class_more_1pct <- class_stations %>% filter(n_seq_station >= 100) %>% filter(pct_seq > 
    1) %>% group_by(Class) %>% summarise(n_stations_more_1pct = n(), pct_stations_more_1pct = 100 * 
    n()/sample_number_above100)

class_summary <- class_seq %>% left_join(class_pct) %>% left_join(class_presence) %>% 
    left_join(class_more_1pct)

5.2 Graphics at class levels

Histograms and treemaps

theme_chlorophyta_map <- theme(legend.position = c(0.1, 0.2), legend.background = element_rect(fill = "transparent"), 
    legend.text = element_text(size = 7), legend.title = element_text(size = 9), 
    plot.tag.position = "topright", plot.tag = element_text(size = 24, face = "bold"), 
    plot.title = element_text(size = 14))
# Number of sequences per class
ggplot(class_summary, aes(x = reorder(Class, n_seq_class), y = n_seq_class)) + 
    geom_col() + coord_flip() + xlab("Class")

treemap_dv(class_summary, "Class", "n_seq_class", "OSD - number of sequences")

# Mean pct of sequences per class
treemap_dv(class_summary, "Class", "mean_pct_seq_class", "OSD - mean of contribution")

# Fig 5

graph_stations_present <- ggplot(class_summary, aes(x = reorder(Class, pct_stations_present), 
    y = pct_stations_present)) + geom_col() + geom_text(aes(label = n_stations_present), 
    hjust = -0.25) + coord_flip() + xlab("Class") + ylab("% of stations where class detected") + 
    ylim(0, 100) + theme_dviz_grid() + theme_chlorophyta_map + labs(title = "", 
    tag = "A")

graph_stations_present

graph_stations_more_1pct <- ggplot(class_summary, aes(x = reorder(Class, pct_stations_present), 
    y = pct_stations_more_1pct)) + geom_col() + geom_text(aes(label = n_stations_more_1pct), 
    hjust = -0.25) + coord_flip() + xlab("Class") + ylab("% of stations where class represesents more than 1% of Chlorophyta") + 
    ylim(0, 100) + theme_dviz_grid() + theme_chlorophyta_map + labs(title = "", 
    tag = "B")

graph_stations_more_1pct

grid_graphs <- gridExtra::grid.arrange(grobs = list(graph_stations_present, 
    graph_stations_more_1pct), ncol = 1, nrow = 2, clip = FALSE, padding = unit(0, 
    "line"))

ggsave(plot = grid_graphs, filename = "pdf/Fig 5 new Class and stations.pdf", 
    width = 15, height = 20, scale = 1.8, units = "cm", useDingbats = FALSE)

# Relation between stations present and stations more than 1 pct
ggplot(class_summary, aes(x = pct_stations_more_1pct, y = pct_stations_present, 
    label = Class)) + geom_point() + geom_text(size = 2, check_overlap = TRUE, 
    vjust = 1) + xlim(0, 100)

# Relation between sequence per class and stations present
ggplot(class_summary, aes(x = mean_pct_seq_class, y = pct_stations_present, 
    label = Class)) + geom_point() + geom_text(size = 2, check_overlap = TRUE, 
    vjust = 1)

6 Metadata

6.1 Statistics

metadata_summarized <- metadata %>% transmute(Temperature = as.numeric(Temperature), 
    Salinity = as.numeric(Salinity), Nitrates = as.numeric(Nitrates), Phosphates = as.numeric(Phosphates), 
    Chlorophylle_a = as.numeric(Chlorophylle_a)) %>% summarise_all(funs(min, 
    max, mean, median, sd), na.rm = TRUE)
knitr::kable(metadata_summarized)
Temperature_min Salinity_min Nitrates_min Phosphates_min Chlorophylle_a_min Temperature_max Salinity_max Nitrates_max Phosphates_max Chlorophylle_a_max Temperature_mean Salinity_mean Nitrates_mean Phosphates_mean Chlorophylle_a_mean Temperature_median Salinity_median Nitrates_median Phosphates_median Chlorophylle_a_median Temperature_sd Salinity_sd Nitrates_sd Phosphates_sd Chlorophylle_a_sd
-1.6 0.14 0 0 0.07 31 100 12 1.6 75 20 32 2.3 0.23 7.4 20 34 0.77 0.16 2.6 6.3 10 3.3 0.23 13

6.2 Graph

# metadata <- metadata %>% filter(Chlorophyta_reads > 100)

# Histogram of Chlorophyta %
ggplot(metadata, aes(x = Chlorophyta_pct_reads)) + geom_histogram(binwidth = 5, 
    center = 2.5) + ggtitle("Chllorophyta %")

# Chlorophyta % vs Chla
ggplot(metadata, aes(x = Chlorophylle_a, y = Chlorophyta_pct_reads)) + geom_point() + 
    scale_x_log10()

# Chlorophyta % vs Chlorophyta OTUs
ggplot(metadata, aes(x = Chlorophyta_OTUs, y = Chlorophyta_pct_reads)) + geom_point() + 
    geom_smooth(method = "lm", formula = y ~ x)

summary(lm(metadata$Chlorophyta_OTUs ~ metadata$Chlorophyta_pct_reads))

Call:
lm(formula = metadata$Chlorophyta_OTUs ~ metadata$Chlorophyta_pct_reads)

Residuals:
   Min     1Q Median     3Q    Max 
-42.18 -12.70  -2.48  11.11  52.54 

Coefficients:
                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                     29.4453     2.4930   11.81  < 2e-16 ***
metadata$Chlorophyta_pct_reads   0.2955     0.0653    4.53  1.3e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 19 on 139 degrees of freedom
Multiple R-squared:  0.128, Adjusted R-squared:  0.122 
F-statistic: 20.5 on 1 and 139 DF,  p-value: 1.28e-05
# Chlorophyta % vs Latitude
ggplot(metadata, aes(x = as.numeric(Latitude), y = Chlorophyta_pct_reads)) + 
    geom_point() + geom_smooth(method = lm, formula = y ~ splines::ns(x, df = 4), 
    se = FALSE) + xlim(-90, 90)

# Chlorophyta vs range of latitudes
ggplot(metadata, aes(x = as.numeric(Latitude), y = Chlorophyta_pct_reads)) + 
    geom_boxplot(aes(group = cut_width(as.numeric(Latitude), 10)))

# Chlorophyta % vs Temp
ggplot(metadata, aes(x = as.numeric(Temperature), y = Chlorophyta_pct_reads)) + 
    geom_point() + geom_smooth(method = "lm", formula = y ~ x)

# Chlorophyta % vs Nitrates
ggplot(metadata, aes(x = as.numeric(Nitrates), y = Chlorophyta_pct_reads)) + 
    geom_point() + geom_smooth(method = "lm", formula = y ~ x)

# Chlorophyta % vs Phosphates
ggplot(metadata, aes(x = as.numeric(Phosphates), y = Chlorophyta_pct_reads)) + 
    geom_point() + geom_smooth(method = "lm", formula = y ~ x)

7 Similarity of OTUs to culture sequences

7.1 Plot histograms of % identity

ggplot(otu, aes(x = hit_cult_pct_identity)) + geom_histogram(binwidth = 0.5, 
    boundary = 0) + xlim(100, 85) + ggtitle("Top hit culture")

ggplot(otu, aes(x = hit_top_pct_identity)) + geom_histogram(binwidth = 0.5, 
    boundary = 0) + xlim(100, 85) + ggtitle("Top hit GenBank")

7.2 Filters

Filter for top hit culture < 0.98 and OTUs > 10 reads compute number of OTUs uncultured Also compute mean % to cultured organism (if no hit to cultured, assume 80% similarity)

otu_10 <- otu %>% filter(Size > 10) %>% select(OTUNumber:Size, hit_cult_pct_identity, 
    hit_cult_hit_title)

otu_10$hit_cult_pct_identity <- otu_10$hit_cult_pct_identity %>% replace_na(80)

class_otu_10 <- otu_10 %>% group_by(Class) %>% summarize(n_otu_10 = n(), mean_hit_cult_pct_identity = mean(hit_cult_pct_identity, 
    na.rm = TRUE))

class_summary <- class_summary %>% left_join(class_otu_10)

otu_uncult <- otu_10 %>% filter(hit_cult_pct_identity < 98)


class_otu_uncult <- otu_uncult %>% group_by(Class) %>% summarize(n_otu_uncult = n())

class_summary <- class_summary %>% left_join(class_otu_uncult)

class_summary <- class_summary %>% mutate(pct_otu_uncult = 100 * n_otu_uncult/n_otu_10)

class_summary$pct_otu_uncult <- class_summary$pct_otu_uncult %>% replace_na(0)

7.3 Plot bargraphs

ggplot(class_summary, aes(x = reorder(Class, pct_otu_uncult), y = pct_otu_uncult)) + 
    geom_col() + geom_text(aes(label = n_otu_uncult), hjust = -0.7) + coord_flip() + 
    xlab("Class") + ylab("% of uncultured OTUs") + theme_dviz_grid()

ggsave("pdf/Fig S2 uncultured scale1.pdf", scale = 1, width = 22, height = 13, 
    units = "cm")

ggplot(class_summary, aes(x = reorder(Class, n_otu_uncult), y = n_otu_uncult)) + 
    geom_col() + coord_flip() + xlab("Class") + ylab("Number of uncultured OTUs") + 
    theme_dviz_grid()

ggplot(class_summary, aes(x = reorder(Class, pct_otu_uncult), y = mean_hit_cult_pct_identity)) + 
    geom_point() + coord_flip() + xlab("Class") + ylab("Mean percent identity to culture (more than 10 seq)") + 
    ylim(100, 80) + cowplot::theme_cowplot()

8 World Maps for each class

# Define constants for the map
size_factor = 1
size_points <- 2.5
size_cross <- 1
color_ice <- "lightslateblue"
color_water <- "red"
color_cultures <- "green"
color_not_present <- "blue"
color_morpho <- "brown"

classes_main_fig <- c("Mamiellophyceae", "Chlorodendrophyceae", "Chlorophyceae", 
    "Pyramimonadales", "Trebouxiophyceae", "Ulvophyceae")
classes_abundant <- c(classes_main_fig, "Chloropicophyceae", "Palmophyllophyceae", 
    "Prasinophytes clade IX")


map_array_main_fig <- list()  # to store the maps to do tiled graph
map_array_supp_fig <- list()

# Map zoomed for EU
map_array_main_fig_eu <- list()  # to store the maps to do tiled graph
map_array_supp_fig_eu <- list()

for (one_class in class_summary$Class) {
    
    if (one_class %in% classes_abundant) {
        class_limits = c(0, 100)
        class_breaks = c(10, 25, 50, 75, 100)
    } else {
        class_limits = c(0, 20)
        class_breaks = c(1, 2.5, 5, 10, 20)
    }
    
    class.absent <- class_stations_metadata %>% filter(n_seq_station >= 100 & 
        pct_seq < 1 & Class == one_class)
    
    class.present <- class_stations_metadata %>% filter(n_seq_station >= 100 & 
        pct_seq >= 1 & Class == one_class)
    
    one_class_map <- map_world() + geom_point(data = class.absent, aes(x = Longitude, 
        y = Latitude), color = color_not_present, size = size_cross, shape = 3) + 
        geom_point(data = class.present, aes(x = Longitude, y = Latitude, size = pct_seq, 
            color = pct_seq, alpha = pct_seq)) + theme(legend.position = c(0.1, 
        0.2), legend.background = element_rect(fill = "transparent"), legend.text = element_text(size = 7), 
        legend.title = element_text(size = 9)) + scale_size(name = "% of Chlorophyta", 
        range = c(0, 5), limits = class_limits, breaks = class_breaks) + scale_alpha_continuous(name = "% of Chlorophyta", 
        range = c(0.5, 0.9), limits = class_limits, breaks = class_breaks) + 
        viridis::scale_color_viridis(option = "magma", name = "% of Chlorophyta", 
            limits = class_limits, breaks = class_breaks) + guides(colour = guide_legend()) + 
        theme(plot.title = element_text(margin = margin(t = 10, b = 5))) + ggtitle(one_class)
    
    # range gives maximum and minimum size of symbols, limits the extent of the
    # scale replace guide = 'legend' by guide=FALSE to remove the legend....
    # NOT USED : guides(color = guide_legend(override.aes = list(size=5))) +
    
    one_class_map_eu <- one_class_map + coord_fixed(1.3, xlim = c(-40, 40), 
        ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) * 10) + scale_y_continuous(breaks = (3:7) * 
        10)
    
    print(one_class_map)
    print(one_class_map_eu)
    
    if (one_class %in% classes_main_fig) {
        map_array_main_fig[[one_class]] <- one_class_map
    } else {
        map_array_supp_fig[[one_class]] <- one_class_map
    }
}

grid_map_main_fig <- gridExtra::grid.arrange(grobs = map_array_main_fig, ncol = 2, 
    nrow = 3, clip = FALSE, padding = unit(0, "line"))

grid_map_supp_fig <- gridExtra::grid.arrange(grobs = map_array_supp_fig, ncol = 2, 
    nrow = 4, clip = FALSE, padding = unit(0, "line"))

# Save pdf
ggsave(plot = grid_map_main_fig, filename = "pdf/Fig 4 Map Classes main.pdf", 
    width = 20, height = 20, scale = 1.8, units = "cm", useDingbats = FALSE)

ggsave(plot = grid_map_supp_fig, filename = "pdf/Fig S3 Map Classes supp.pdf", 
    width = 20, height = 26, scale = 1.8, units = "cm", useDingbats = FALSE)

9 Maps for % of Chlorophyta and OTU

9.1 Prepare the data

Chlorophyta_surface <- metadata %>% select(OSD_station, Chlorophyta_reads, Chlorophyta_pct_reads, 
    Chlorophyta_OTUs, Latitude, Longitude) %>% mutate(Latitude = as.numeric(Latitude), 
    Longitude = as.numeric(Longitude))

9.2 Statistics

Chlorophyta_surface_summary <- Chlorophyta_surface %>% select(Chlorophyta_reads, 
    Chlorophyta_pct_reads, Chlorophyta_OTUs) %>% summarise_all(funs(min, max, 
    mean, median, sd), na.rm = TRUE)
knitr::kable(Chlorophyta_surface_summary)
Chlorophyta_reads_min Chlorophyta_pct_reads_min Chlorophyta_OTUs_min Chlorophyta_reads_max Chlorophyta_pct_reads_max Chlorophyta_OTUs_max Chlorophyta_reads_mean Chlorophyta_pct_reads_mean Chlorophyta_OTUs_mean Chlorophyta_reads_median Chlorophyta_pct_reads_median Chlorophyta_OTUs_median Chlorophyta_reads_sd Chlorophyta_pct_reads_sd Chlorophyta_OTUs_sd
9 0.21 4 18569 94 98 2217 29 38 937 19 37 3172 25 20

9.3 Map for Chlorophyta %

pct_limits = c(0, 100)
pct_breaks = c(10, 25, 50, 75, 100)
theme_chlorophyta_map <- theme(legend.position = c(0.1, 0.2), legend.background = element_rect(fill = "transparent"), 
    legend.text = element_text(size = 7), legend.title = element_text(size = 9), 
    plot.tag.position = "topright", plot.tag = element_text(size = 18, face = "bold"), 
    plot.title = element_text(size = 14))

Chlorophyta_pct_map <- map_world() + # coord_map(xlim = c(-180, 180),ylim = c(-90, 90)) +
# scale_x_continuous(breaks = (-4:4) * 45) + scale_y_continuous(breaks =
# (-2:2) * 30) +
geom_point(data = filter(Chlorophyta_surface, Chlorophyta_pct_reads < 1), aes(x = Longitude, 
    y = Latitude), color = color_not_present, size = size_cross, shape = 3) + 
    geom_point(data = filter(Chlorophyta_surface, Chlorophyta_pct_reads >= 1), 
        aes(x = Longitude, y = Latitude, size = Chlorophyta_pct_reads, color = Chlorophyta_pct_reads, 
            alpha = Chlorophyta_pct_reads)) + scale_size(name = "%", range = c(0, 
    5), limits = pct_limits, breaks = pct_breaks) + scale_alpha_continuous(name = "%", 
    range = c(0.5, 0.9), limits = pct_limits, breaks = pct_breaks) + viridis::scale_color_viridis(option = "magma", 
    name = "%", limits = pct_limits, breaks = pct_breaks) + guides(colour = guide_legend()) + 
    labs(title = "Chlorophyta - % of Photosynthetic reads", tag = "A") + theme_chlorophyta_map

print(Chlorophyta_pct_map)

# Zoom on Europe

Chlorophyta_pct_europe <- Chlorophyta_pct_map + # coord_map(xlim = c(-30, 40),ylim = c(30, 70)) +
coord_fixed(1.3, xlim = c(-40, 40), ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) * 
    10) + scale_y_continuous(breaks = (3:7) * 10) + labs(title = "", tag = "B")

print(Chlorophyta_pct_europe)

9.4 Map for OTUs

otu_limits = c(0, 100)
otu_breaks = c(10, 25, 50, 75, 100)

Chlorophyta_otu_map <- map_world() + # coord_map(xlim = c(-180, 180),ylim = c(-90, 90)) +
# scale_x_continuous(breaks = (-4:4) * 45) + scale_y_continuous(breaks =
# (-2:2) * 30) +
geom_point(data = Chlorophyta_surface, aes(x = Longitude, y = Latitude, size = Chlorophyta_OTUs, 
    color = Chlorophyta_OTUs, alpha = Chlorophyta_OTUs)) + scale_size(name = "# OTUs", 
    range = c(0, 5), limits = otu_limits, breaks = otu_breaks) + scale_alpha_continuous(name = "# OTUs", 
    range = c(0.5, 0.9), limits = otu_limits, breaks = otu_breaks) + viridis::scale_color_viridis(option = "magma", 
    name = "# OTUs", limits = otu_limits, breaks = otu_breaks) + guides(colour = guide_legend()) + 
    # theme(plot.title = element_text(margin = margin(t = 10, b = 5))) +
labs(title = "Chlorophyta - Number of OTUs", tag = "C") + theme_chlorophyta_map

print(Chlorophyta_otu_map)

# Zoom on Europe

Chlorophyta_otu_europe <- Chlorophyta_otu_map + # coord_map(xlim = c(-30, 40),ylim = c(30, 70)) +
coord_fixed(1.3, xlim = c(-40, 40), ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) * 
    10) + scale_y_continuous(breaks = (3:7) * 10) + labs(title = "", tag = "D")

print(Chlorophyta_otu_europe)

9.5 Arrange the files into the final figures with 4 panels

map_array_chloro <- list(Chlorophyta_pct_map, Chlorophyta_pct_europe, Chlorophyta_otu_map, 
    Chlorophyta_otu_europe)
grid_map_chloro <- gridExtra::grid.arrange(grobs = map_array_chloro, ncol = 2, 
    nrow = 2, widths = c(1, 0.95), clip = FALSE, padding = unit(0, "line"))

ggsave(plot = grid_map_chloro, filename = "pdf/Fig 2 Map Chlorophyta.pdf", width = 20, 
    height = 15, scale = 1.8, units = "cm", useDingbats = FALSE)

10 Heatmaps

10.1 Filter the data to remove the class that have less than 1% contribution

class_summary_abundant <- class_summary %>% filter(mean_pct_seq_class >= 1)
class_abundant <- class_summary_abundant$Class
class_heatmap_data <- class_stations_metadata %>% filter((Class %in% class_abundant) & 
    (Chlorophyta_reads >= 100)) %>% ungroup() %>% mutate(OSD_station_label = sprintf("%s %03d", 
    Ocean_code, OSD_station)) %>% select(OSD_station_label, Class, pct_seq) %>% 
    spread(Class, pct_seq) %>% tibble::column_to_rownames(var = "OSD_station_label") %>% 
    t()

class_heatmap_data_vertical <- class_stations_metadata %>% filter((Class %in% 
    class_abundant) & (Chlorophyta_reads >= 100)) %>% ungroup() %>% mutate(OSD_station_label = sprintf("%03d %s", 
    OSD_station, Ocean_code)) %>% select(OSD_station_label, Class, pct_seq) %>% 
    spread(Class, pct_seq) %>% tibble::column_to_rownames(var = "OSD_station_label")

10.2 Use ComplexHeatmap

See Web site

library(ComplexHeatmap)

# Palette de couleurs

reds = colorRampPalette(c("grey95", "orange", "red3"))
couleurs = reds(10)



pdf(file = "pdf/Fig 6 heatmap horizontal.pdf", width = 12, height = 6)
Heatmap(as.matrix(class_heatmap_data), col = couleurs, clustering_distance_columns = function(m) vegan::vegdist(m), 
    cluster_rows = TRUE, cluster_columns = TRUE, show_column_dend = TRUE, show_row_dend = FALSE, 
    column_dend_height = unit(30, "mm"), column_title = "Station", row_title = "", 
    column_title_side = "bottom", row_title_side = "left", row_title_gp = gpar(fontsize = 12, 
        fontface = "plain"), column_title_gp = gpar(fontsize = 12, fontface = "plain"), 
    column_names_gp = gpar(fontsize = 6, fontface = "plain"), name = "%", heatmap_legend_param = list(title_gp = gpar(fontface = "plain")))
dev.off
function (which = dev.cur()) 
{
    if (which == 1) 
        stop("cannot shut down device 1 (the null device)")
    .External(C_devoff, as.integer(which))
    dev.cur()
}
<bytecode: 0x0000000008fc3c80>
<environment: namespace:grDevices>
pdf(file = "pdf/Fig 6 heatmap vertical.pdf", width = 6, height = 12)
Heatmap(as.matrix(class_heatmap_data_vertical), col = couleurs, split = 8, combined_name_fun = NULL, 
    km_title = "", clustering_distance_rows = function(m) vegan::vegdist(m), 
    clustering_distance_columns = function(m) vegan::vegdist(m), cluster_rows = TRUE, 
    cluster_columns = TRUE, show_column_dend = FALSE, show_row_dend = FALSE, 
    row_dend_width = unit(30, "mm"), column_title = "", row_title = "Station", 
    column_title_side = "bottom", row_title_side = "right", row_title_gp = gpar(fontsize = 0, 
        fontface = "plain"), column_title_gp = gpar(fontsize = 15, fontface = "plain"), 
    row_names_gp = gpar(fontsize = 6, fontface = "plain"), name = "%", heatmap_legend_param = list(title_gp = gpar(fontface = "plain")))
dev.off
function (which = dev.cur()) 
{
    if (which == 1) 
        stop("cannot shut down device 1 (the null device)")
    .External(C_devoff, as.integer(which))
    dev.cur()
}
<bytecode: 0x0000000008fc3c80>
<environment: namespace:grDevices>

10.3 Use heatmap.2 (Not used)

# pdf(file='Fig heatmap.pdf', width =15, height = 12)
gplots::heatmap.2(as.matrix(class_heatmap_data), col = couleurs, breaks = c(1, 
    5, 1:9 * 10), colsep = T, rowsep = T, trace = "none", margins = c(10, 10), 
    keysize = 0.8, key.title = "% of Chlorophyta", key.xlab = "", density.info = "none", 
    cexCol = 0.8, cexRow = 1.3, dendrogram = "column", Rowv = TRUE)
# dev.off

gplots::heatmap.2(as.matrix(class_heatmap_data), col = rev(terrain.colors(10)), 
    breaks = 11, colsep = T, rowsep = T, trace = "none", margins = c(5, 10), 
    keysize = 1, cexCol = 0.4, cexRow = 0.8, density.info = "none", dendrogram = "column", 
    Rowv = TRUE)



# colsep, rowsep, sepcolor (optional) vector of integers indicating which
# columns or rows should be separated from the preceding columns or rows by
# a narrow space of color sepcolor

# trace character string indicating whether a solid 'trace' line should be
# drawn across 'row's or down 'column's, 'both' or 'none'.  The distance of
# the line from the center of each color-cell is proportional to the size of
# the measurement. Defaults to 'column'.

Daniel Vaulot

22 07 2018