### Scripts relative to: ### Macrophage expression and prognostic significance of the long pentraxin PTX3 in COVID-19 ### #Fig1a #************************************************************************************************ # In vitro SARS-CoV-2 infected respiratory epithelial cells RNA-Seq analysis #************************************************************************************************ #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° # Download and extract fastq files #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° mkdir sra for i in $(cat sample | cut -f1); do echo $i; prefetch -O sra $i; fastq-dump.2.10.0 "sra/""$i; done fastq-dump.2.10.0 sra/* #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° # Align reads with star #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° mkdir star for file in $(ls *.fastq | sed -e "s/.fastq//g" ) ; do STAR --runThreadN 16 --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 16 --outMultimapperOrder Random --genomeDir star_genome_dir --quantMode GeneCounts --readFilesIn $file".fastq" --outFileNamePrefix "star/star_"$file > "star/star_"$file".log" 2>&1; done #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° # Perform differential expression and plots #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° cd star R # IMPORT RAW COUNTS # targets = dir(pattern = "ReadsPerGene.out.tab") temp = read.table(targets[1], header = F, row.names=1, stringsAsFactors = F) nGenes = nrow(temp) nSamples = length(targets) mRNA.rawcounts = matrix(data = NA, ncol = nSamples, nrow = nGenes) colnames(mRNA.rawcounts)=targets #rownames(mRNA.rawcounts)= make.names(rownames(temp), unique=TRUE) rownames(mRNA.rawcounts)=rownames(temp) pb <- txtProgressBar(min = 0, max = length(targets), initial = 0, char = "=",width = NA, style = 3) for (i in 1:length(targets)) { setTxtProgressBar(pb, i) mRNA.rawcounts[,i] = read.table(targets[i], header = F, row.names=1, stringsAsFactors = F)[,1] } dim(mRNA.rawcounts) #mRNA.rawcounts = mRNA.rawcounts[grep("^ENSG",rownames(mRNA.rawcounts)),] keep_names = sapply(strsplit(rownames(mRNA.rawcounts), split=".",fixed=TRUE), `[`, 1) rownames(mRNA.rawcounts)=keep_names colnames(mRNA.rawcounts) = gsub(colnames(mRNA.rawcounts), pattern="ReadsPerGene.out.tab",replacement = "") colnames(mRNA.rawcounts) = gsub(colnames(mRNA.rawcounts), pattern="star_",replacement = "") mRNA.rawcounts = mRNA.rawcounts[which(!duplicated(rownames(mRNA.rawcounts))),] head(mRNA.rawcounts) dim(mRNA.rawcounts) mRNA.rawcounts<-mRNA.rawcounts[-c(1:4),] # ANNOTATE SAMPLES AND GENES # #samples # "sample_tab_mod" file derived from sra with sample annotations ann<-read.table(file="sample_tab_mod", header=F, row.names=1) exp<-mRNA.rawcounts colnames(exp)<-ann[match(colnames(exp), rownames(ann)),1] annotations = data.frame(GROUP = colnames(exp), SHORT_ID=colnames(exp)) rownames(annotations) = colnames(mRNA.rawcounts) #genes library(org.Hs.eg.db) symbols <- mapIds(org.Hs.eg.db, keys = row.names(exp), column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first" ) entrez <- mapIds(org.Hs.eg.db, keys = row.names(exp), column = "ENTREZID", keytype = "ENSEMBL", multiVals = "first") gene_name <- mapIds(org.Hs.eg.db, keys = row.names(exp), column = "GENENAME", keytype = "ENSEMBL", multiVals = "first" ) columns(org.Hs.eg.db) gene_annot <- data.frame(SYMBOL=symbols, ENTREZ=entrez, GENE_NAME=gene_name ) # NORMALIZE DATA AND PLOT PCA # library(DESeq2) library(ggplot2) library(ggrepel) dds <- DESeqDataSetFromMatrix(countData = mRNA.rawcounts, colData = annotations, design = ~ 0 + GROUP ) # + EXPERIMENT) total_genes = nrow(dds) dds <- dds[ (rowSums(counts(dds)) > 2), ] total_genes = nrow(dds) rld <- vst(dds, blind = TRUE) ematrix = assay(rld) symbol<-gene_annot[match(rownames(ematrix),rownames(gene_annot)),1] mat<-as.data.frame(cbind(as.character(symbol), ematrix)) mat<-mat[which (!is.na(mat[,1])),] save(ematrix, file="ematrix.RData") colnames(mat)<-ann[match(colnames(mat), rownames(ann)),1] write.table(mat, file="mat.txt", sep="\t", row.names=T,col.names=T, quote=F) setEPS() postscript(file="PCA.eps",width=20, height=20) (z.polyA <- plotPCA(rld, intgroup = c("GROUP"), ntop = 1000) + theme(panel.background = element_rect(fill = "white", colour = "grey50"))+ geom_point(size=0.2, color = "black")) dev.off() # PTX3 EXPRESSION AND BOXPLOT REPRESENTATION # colnames(ematrix)<-ann[match(colnames(ematrix), rownames(ann)),1] library(ggplot2) library(svglite) PTX3<-ematrix[grep("PTX3", symbol),] dat<-cbind(as.double(PTX3),colnames(ematrix)) colnames(dat)<- c("PTX3","infection_type") rownames(dat)<-colnames(ematrix) dat<-data.frame(dat) ord<-unique(dat$infection_type) dat$infection_type<-factor(dat$infection_type, levels=ord , ordered = TRUE) dat$PTX3<-as.double(PTX3) g<-ggplot(dat, aes(x=infection_type, y=PTX3, fill=infection_type, alpha=25)) + ylim(0,8) + guides(fill=FALSE) + geom_jitter( size=2.5,position = position_jitter(width = .25)) + geom_boxplot(outlier.shape=NA) + theme_bw() + theme(legend.position="none") + theme(axis.text.x = element_text(angle = 45, hjust = 1, size=14)) + theme(plot.margin = unit(c(3,3,3,3), "cm")) ggsave(g, file="PTX3_boxplot_covid19.svg") # FIND DIFFERENTIALLY EXPRESSED GENES # library(limma) my_design <- model.matrix(~ 0 + GROUP, data = annotations) sars_cov2_NHBE <- makeContrasts(GROUPSARSCoV2infectedNHBEcells_24hr - GROUPMockNHBEcells_24hr, levels=my_design) dds = DESeq(dds, betaPrior = F) res <- results(dds, contrast = sars_cov2_NHBE, independentFiltering = TRUE, alpha = 0.1) length(which(res$padj<0.05)) # total_sig (3101) length(which( (res$padj<0.05) & (res$log2FoldChange>0) ) ) # total_sig_up length(which( (res$padj<0.05) & (res$log2FoldChange<0) ) ) # total_sig_dn res2<-cbind(res,symbol) res2[which(res2$symbol=="PTX3"),] save(res2, file="res_sars_cov2_NHBE.RData") write.table(res2, file="res_sars_cov2_NHBE_gene_name.txt", sep="\t", quote=F) sars_cov2_A549 <- makeContrasts(GROUPSARSCoV2infectedA549cells_24hr - GROUPMockA549cells1_24hr, levels=my_design) dds = DESeq(dds, betaPrior = F) res <- results(dds, contrast = sars_cov2_A549, independentFiltering = TRUE, alpha = 0.1) length(which(res$padj<0.05)) # total_sig (3101) length(which( (res$padj<0.05) & (res$log2FoldChange>0) ) ) # total_sig_up length(which( (res$padj<0.05) & (res$log2FoldChange<0) ) ) # total_sig_dn res2<-cbind(res,symbol) res2[which(res2$symbol=="PTX3"),] save(res2, file="res_sars_cov2_A549.RData") write.table(res2, file="res_sars_cov2_A549_gene_name.txt", sep="\t", quote=F) sars_cov2_Calu3 <- makeContrasts(GROUPSARSCoV2Calu3 - GROUPMockCalu3, levels=my_design) dds = DESeq(dds, betaPrior = F) res <- results(dds, contrast = sars_cov2_Calu3, independentFiltering = TRUE, alpha = 0.1) length(which(res$padj<0.05)) # total_sig (3101) length(which( (res$padj<0.05) & (res$log2FoldChange>0) ) ) # total_sig_up length(which( (res$padj<0.05) & (res$log2FoldChange<0) ) ) # total_sig_dn res2<-cbind(res,symbol) res2[which(res2$symbol=="PTX3"),] save(res2, file="res_sars_cov2_Calu3.RData") write.table(res2, file="res_sars_cov2_Calu3_gene_name.txt", sep="\t", quote=F) sars_cov2_A549_MOI <- makeContrasts(GROUPSARSCoV2A549_MOI2 - GROUPMockA549_MOI2, levels=my_design) dds = DESeq(dds, betaPrior = F) res <- results(dds, contrast = sars_cov2_A549_MOI, independentFiltering = TRUE, alpha = 0.1) length(which(res$padj<0.05)) # total_sig (3101) length(which( (res$padj<0.05) & (res$log2FoldChange>0) ) ) # total_sig_up length(which( (res$padj<0.05) & (res$log2FoldChange<0) ) ) # total_sig_dn res2<-cbind(res,symbol) res2[which(res2$symbol=="PTX3"),] save(res2, file="res_sars_cov2_A549_MOI.RData") write.table(res2, file="res_sars_cov2_A549_MOI_gene_name.txt", sep="\t", quote=F) ## The end #Fig1b #************************************************************************************************ # Peripheral monocytes from COVID19 hospitalized patients and healthy donors RNA-Seq #************************************************************************************************ #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° # Reads quality control with fastc #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° mkdir fastqc fastqc -o fastqc *gz #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° # Align reads with star #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° mkdir star for file in $(ls *_R1.fq.gz | sed -e "s/_R1.fq.gz//g" ) ; do STAR --runThreadN 36 --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 36 --outMultimapperOrder Random --genomeDir star_genome_dir --quantMode GeneCounts --readFilesCommand zcat --readFilesIn $file"_R1.fq.gz" $file"_R2.fq.gz" --outFileNamePrefix "star/star_"$file > "star/star_"$file".log" 2>&1; done #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° # Perform differential expression and plots #°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°° cd star for i in $(ls *ReadsPerGene.out.tab); do var=$(echo $i| cut -d"_" -f1,2,3 | awk '{print $0".tab"}' ); echo $var; mv $i $var; done R targets = dir(pattern = ".tab") temp = read.table(targets[1], header = F, row.names=1, stringsAsFactors = F) nGenes = nrow(temp) nSamples = length(targets) mRNA.rawcounts = matrix(data = NA, ncol = nSamples, nrow = nGenes) colnames(mRNA.rawcounts)=targets #rownames(mRNA.rawcounts)= make.names(rownames(temp), unique=TRUE) rownames(mRNA.rawcounts)=rownames(temp) pb <- txtProgressBar(min = 0, max = length(targets), initial = 0, char = "=",width = NA, style = 3) for (i in 1:length(targets)) { setTxtProgressBar(pb, i) mRNA.rawcounts[,i] = read.table(targets[i], header = F, row.names=1, stringsAsFactors = F)[,3] } dim(mRNA.rawcounts) keep_names = sapply(strsplit(rownames(mRNA.rawcounts), split=".",fixed=TRUE), `[`, 1) rownames(mRNA.rawcounts)=keep_names colnames(mRNA.rawcounts) = gsub(colnames(mRNA.rawcounts), pattern=".tab",replacement = "") colnames(mRNA.rawcounts) = gsub(colnames(mRNA.rawcounts), pattern="star_",replacement = "") mRNA.rawcounts = mRNA.rawcounts[which(!duplicated(rownames(mRNA.rawcounts))),] head(mRNA.rawcounts) dim(mRNA.rawcounts) mRNA.rawcounts<-mRNA.rawcounts[-c(1:4),] # ANNOTATE SAMPLES AND GENES # #samples ann<-read.table(file="sample_annotations_nat_imm.txt", header=T, sep="\t") rownames(ann)<-ann$SHORT_ID mRNA.rawcounts<-mRNA.rawcounts[,match(rownames(ann),colnames(mRNA.rawcounts))] annotations = ann #genes library(org.Hs.eg.db) symbols <- mapIds(org.Hs.eg.db, keys = row.names(mRNA.rawcounts), column = "SYMBOL", keytype = "ENSEMBL", multiVals = "first" ) entrez <- mapIds(org.Hs.eg.db, keys = row.names(mRNA.rawcounts), column = "ENTREZID", keytype = "ENSEMBL", multiVals = "first") gene_name <- mapIds(org.Hs.eg.db, keys = row.names(mRNA.rawcounts), column = "GENENAME", keytype = "ENSEMBL", multiVals = "first" ) columns(org.Hs.eg.db) gene_annot <- data.frame(SYMBOL=symbols, ENTREZ=entrez, GENE_NAME=gene_name ) # NORMALIZE DATA AND PLOT PCA # library(DESeq2) library(ggplot2) library(ggrepel) dds <- DESeqDataSetFromMatrix(countData = mRNA.rawcounts, colData = annotations, design = ~ 0 + GROUP ) total_genes = nrow(dds) dds <- dds[ (rowSums(counts(dds)) > 2), ] total_genes = nrow(dds) rld <- vst(dds, blind = TRUE) ematrix = assay(rld) symbol<-gene_annot[match(rownames(ematrix),rownames(gene_annot)),1] mat<-as.data.frame(cbind(as.character(symbol), ematrix)) mat<-mat[which (!is.na(mat[,1])),] save(ematrix, file="ematrix.RData") colnames(mat)<-ann[match(colnames(mat), rownames(ann)),1] write.table(mat, file="mat.txt", sep="\t", row.names=T,col.names=T, quote=F) setEPS() postscript(file="PCA.eps",width=20, height=20) (z.polyA <- plotPCA(rld, intgroup = c("GROUP"), ntop = 1000) + theme(panel.background = element_rect(fill = "white", colour = "grey50"))+ geom_point(size=0.2, color = "black")) dev.off() # PTX3 EXPRESSION AND BOXPLOT REPRESENTATION # colnames(ematrix)<-ann[match(colnames(ematrix), rownames(ann)),1] library(ggplot2) library(svglite) PTX3<-ematrix[grep("PTX3", symbol),] dat<-cbind(as.double(PTX3),colnames(ematrix)) colnames(dat)<- c("PTX3","infection_type") rownames(dat)<-colnames(ematrix) dat<-data.frame(dat) dat$PTX3<-as.double(PTX3) g<-ggplot(dat, aes(x=infection_type, y=PTX3, fill=infection_type, alpha=25)) + ylim(0,10) + guides(fill=FALSE) + geom_jitter( size=2.5,position = position_jitter(width = .25)) + geom_boxplot(outlier.shape=NA) + theme_bw() + theme(legend.position="none") + theme(axis.text.x = element_text(angle = 45, hjust = 1, size=14)) + theme(plot.margin = unit(c(3,3,3,3), "cm")) ggsave(g, file="PTX3_boxplot_covid19_vs_control.svg", width=5, height=7 ) # FIND DIFFERENTIALLY EXPRESSED GENES # library(limma) my_design <- model.matrix(~ 0 + GROUP, data = as.data.frame(annotations)) sars_cov2_control <- makeContrasts(GROUPDEG - GROUPHealthyDonor, levels=my_design) dds = DESeq(dds, betaPrior = F) res <- results(dds, contrast = sars_cov2_control, independentFiltering = TRUE, alpha = 0.1) res2<-cbind(res,symbol) ord_res<-res2[order(res2$padj),] ord_res<-res2[order(res2$padj),] res2[which(res2$symbol=="PTX3"),] save(ord_res, file="res_DEGvsHealthy.RData") write.table(res2, file="res_GROUPDEG_GROUPHealthy_Donor.txt", sep="\t", quote=F) ## The end #Fig1c ####################################### # scRNAseq PBMC # Seurat object derived from the authors ####################################### R library("Seurat") covid<-readRDS("blish_covid.seu.rds") library("reticulate") library("Rmagic") library("SingleR") # CELL TYPE ANNOTATION # results_singleR <- SingleR(sc_data = GetAssayData(covid), ref_data=blueprint_encode$data, types=blueprint_encode$main_types, method="cluster", clusters=as.factor(covid@meta.data$cell.type.fine)) save(results_singleR, file="results_singleR__blueprint_last.RData") covid<-AddMetaData(object = covid ,metadata = results_singleR$labels, col.name = 'singleR_blueprint') for (i in 1:length(results_singleR$labels)){ covid@meta.data$singleR_blueprint[which(covid@meta.data$celltype_sub==rownames(results_singleR$labels)[i])]<-results_singleR$labels[i,1] } # REMOVE MIXED AND UNDERREPRESENTED CELL POPULATIONS # tr<-WhichCells(object = covid, expression=singleR_blueprint != "HSC") tr_o<-subset(covid, cells = tr ) covid<-tr_o # IMPUTATION # cov_magic<-magic(covid, n.jobs=8) cov_magic@active.assay <- 'MAGIC_SCT' a<-covid$singleR_blueprint Idents(cov_magic) <- covid$singleR_blueprint # UMAP AND VIOLIN PLOTS # png("clustering_populations_cov_magic_singler_PTX3_last.png",width = 3000, height = 3000) FeaturePlot(object = cov_magic, features = "PTX3", cols = c("grey", "blue"), reduction = "umap", pt.size=5) dev.off() png("VlnPlot_cov_magic_PTX3_last.png",width = 3000, height = 3000) VlnPlot(object = cov_magic, features ="PTX3", pt.size=1) dev.off() png("clustering_populations_cov_magic_last.png",width = 3000, height = 3000) DimPlot(cov_magic, reduction = "umap", cols=1:30, pt.size=5) dev.off() setEPS() postscript(file="clustering_populations_PTX3.eps") FeaturePlot(object = cov_magic, features = "PTX3", cols = c("grey", "blue"), reduction = "umap" , pt.size=5) dev.off() setEPS() postscript(file="VlnPlot_PTX3.eps") VlnPlot(object = cov_magic, features ="PTX3",pt.size=1) dev.off() setEPS() postscript(file="clustering_populations.eps") DimPlot(cov_magic, reduction = "umap",pt.size=5) dev.off() ## The end #Fig1d #************************************************************************************************ # scRNA-Seq #************************************************************************************************ ####################################### # scRNAseq BALF # Seurat object derived from the authors ####################################### R library("Seurat") covid<-readRDS("covid_nbt_loc.rds") library("reticulate") library("Rmagic") library("SingleR") # SELECT BALF # tr<-WhichCells(covid, expression=location == "BL") tr_o<-subset(covid, cells = tr ) covid<-tr_o # CELL TYPE ANNOTATION # results_singleR <- SingleR(sc_data = GetAssayData(covid), ref_data=blueprint_encode$data, types=blueprint_encode$main_types, method="cluster", clusters=as.factor(covid@meta.data$celltype_sub)) save(results_singleR, file="results_singleR_blueprint.RData") covid<-AddMetaData(object = covid ,metadata = results_singleR$labels, col.name = 'singleR_blueprint') for (i in 1:length(results_singleR$labels)){ covid@meta.data$singleR_blueprint[which(covid@meta.data$celltype_sub==rownames(results_singleR$labels)[i])]<-results_singleR$labels[i,1] } # REMOVE MIXED AND UNDERREPRESENTED CELL POPULATIONS # #Idents(covid)<-covid@meta.data$singleR_blueprint tr<-WhichCells(object = covid, expression=singleR_blueprint != "HSC") tr_o<-subset(covid, cells = tr ) covid<-tr_o # IMPUTATION # cov_magic<-magic(covid, n.jobs=8) cov_magic@active.assay <- 'MAGIC_RNA' a<-covid$singleR_blueprint Idents(cov_magic) <- covid$singleR_blueprint # UMAP AND VIOLIN PLOTS # png("clustering_populations_cov_magic_singler_PTX3_last.png",width = 3000, height = 3000) FeaturePlot(object = cov_magic, features = "PTX3", cols = c("grey", "blue"), reduction = "umap", pt.size=5) dev.off() png("VlnPlot_cov_magic_PTX3_last.png",width = 3000, height = 3000) VlnPlot(object = cov_magic, features ="PTX3", pt.size=1) dev.off() png("clustering_populations_cov_magic_last.png",width = 3000, height = 3000) DimPlot(cov_magic, reduction = "umap", cols=1:30, pt.size=5) dev.off() setEPS() postscript(file="clustering_populations_PTX3.eps") FeaturePlot(object = cov_magic, features = "PTX3", cols = c("grey", "blue"), reduction = "umap" , pt.size=5) dev.off() setEPS() postscript(file="VlnPlot_PTX3.eps") VlnPlot(object = cov_magic, features ="PTX3",pt.size=1) dev.off() setEPS() postscript(file="clustering_populations.eps") DimPlot(cov_magic, reduction = "umap",pt.size=5) dev.off() ## The end