Files for ttrees

library(bigsnpr)
## Loading required package: bigstatsr
celiac2 <- snp_attach("backingfiles/celiacQC_sub1.rds")
G <- celiac2$genotypes
# jdb
file.jdb <- "backingfiles/ttrees.jdb"
cat(rep(";", 5), sep = "\n", file = file.jdb)
big_apply(G, a.FUN = function(X, ind) {
  df <- cbind(paste0("ind_", ind), rep(3, length(ind)), as.data.frame(X[ind, ]))
  data.table::fwrite(df, file = file.jdb, sep = " ", append = TRUE,
                     quote = FALSE, col.names = FALSE)
  NULL
}, a.combine = "c", ind = rows_along(G), block.size = 1e3)
## NULL
# bloc
bigsnpr:::write.table2(
  bigstatsr:::CutBySize(ncol(G), block.size = 10)[, 1:2] - 1,
  file = file.bloc <- sub("\\.jdb$", ".bloc", file.jdb)
)
readLines(file.bloc, 5)
## [1] "0\t9"   "10\t19" "20\t29" "30\t39" "40\t49"

File with recessive/dominant information

// [[Rcpp::depends(bigstatsr, BH)]]
#include <bigstatsr/BMCodeAcc.h>

// [[Rcpp::export]]
void tripleBM(Environment BM, Environment BM2) {
  
  XPtr<FBM> xpMat = BM["address"];
  int n = xpMat->nrow();
  int m = xpMat->ncol();
  SubBMCode256Acc macc(xpMat, seq_len(n)-1, seq_len(m)-1, BM["code256"]);
  
  XPtr<FBM> xpMat2 = BM2["address"];
  BMAcc<unsigned char> macc2(xpMat2);
  
  int i, j, j2;
  
  for (j = j2 = 0; j < m; j++, j2 += 3) {
    for (i = 0; i < n; i++) {
      macc2(i, j2)   = macc(i, j);
      macc2(i, j2+1) = macc(i, j) >= 0.5;
      macc2(i, j2+2) = macc(i, j) >  1.5;
    }
  }
}
snp_triple <- function(x) {
  
  G0 <- x$genotypes
  G <- G0$copy(code = round(G0$code256))
  
  newfile <- bigsnpr:::getNewFile(x, "tripled")
  G2 <- FBM.code256(nrow(G), 3 * ncol(G), code = bigsnpr:::CODE_012, 
                    backingfile = newfile, save = TRUE)
  
  tripleBM(G, G2)
  
  paste0(newfile, ".rds")
}
celiac <- snp_attach("backingfiles/celiacQC.rds")
snp_triple(celiac)
## [1] "/home/privef/Bureau/paper2-PRS/backingfiles/celiacQC_tripled1.rds"
snp_triple(celiac2)
## [1] "/home/privef/Bureau/paper2-PRS/backingfiles/celiacQC_sub1_tripled1.rds"
# List all the files with their size in GB
round(sapply(list.files("backingfiles", full.names = TRUE), file.size) / 1024^3, 1)
##                backingfiles/celiacQC.bk               backingfiles/celiacQC.rds 
##                                     4.0                                     0.0 
##           backingfiles/celiacQC_sub1.bk          backingfiles/celiacQC_sub1.rds 
##                                     1.9                                     0.0 
##  backingfiles/celiacQC_sub1_tripled1.bk backingfiles/celiacQC_sub1_tripled1.rds 
##                                     5.6                                     0.0 
##       backingfiles/celiacQC_tripled1.bk      backingfiles/celiacQC_tripled1.rds 
##                                    11.9                                     0.0 
##            backingfiles/cluster_Finn_v2                 backingfiles/cluster_IT 
##                                     0.0                                     0.0 
##                 backingfiles/cluster_NL             backingfiles/cluster_UK1_v2 
##                                     0.0                                     0.0 
##             backingfiles/cluster_UK3_v2                   backingfiles/PCA2.rds 
##                                     0.0                                     0.0 
##                backingfiles/ttrees.bloc                 backingfiles/ttrees.jdb 
##                                     0.0                                     3.7