Skip to contents
Load the package and data
library(DURIAN)
data(c("C","B","pDataC"))
set.seed(42)
Preprocess the data
comgenes = intersect(rownames(C),rownames(B))
C = DURIAN::subsetsc(scremoutlier(C),geneids=comgenes,return_obj=TRUE,nsd=3)
pDataC = pDataC[colnames(C),]
Run imputation on the single-cell data, using the bulk data for supervision.
impresult_list=run_durian(
      scrabble_parameters = c(1,1e-6,1e-4),
      nEM = 5,
      scdata = C,
      metadata = pDataC,
      bulkdata = B,
      deconv_method = "MuSiC",
      nIter_outer = 10,
      nIter_inner = 10,
      nSDCIters = 500000,
      DECONVGENETHRESH=-0.01,
      SCRGENETHRESH=-0.01,
      outerStats = FALSE,
      durianEps=1e-3,
      saveImputationLog = FALSE,
      saveDeconvolutionLog = FALSE,
      saveImputedStep=FALSE)
  impresult = impresult_list[["C"]]
Construct a low-dimensional UMAP embedding from imputed and original data
library(umap)
library(ggplot2)
library(reshape2)
umap_imputed = umap(t(impresult))
umap_orig = umap(t(as.matrix(C)))
imputed_df = cbind(umap_imputed$layout,pDataC)
imputed_df$status = "Imputed"
orig_df = cbind(umap_orig$layout,pDataC)
orig_df$status = "Unimputed"
df = rbind(imputed_df,orig_df)
colnames(df) = c("UMAP1","UMAP2","cellID","cellType","sampleID","status")
df$status = factor(df$status,levels=c("Unimputed","Imputed"))
Plot the original and imputed data
ggplot(df,aes(x=UMAP1, y=UMAP2,color=cellType)) + 
      geom_point(size=1)+
      facet_grid(~status,scales="free") + theme_bw()