Run imputation on the single-cell data, using the bulk data for supervision.
impresult_list=run_durian(
scrabble_parameters = c(1,1e-6,1e-4),
nEM = 5,
scdata = C,
metadata = pDataC,
bulkdata = B,
deconv_method = "MuSiC",
nIter_outer = 10,
nIter_inner = 10,
nSDCIters = 500000,
DECONVGENETHRESH=-0.01,
SCRGENETHRESH=-0.01,
outerStats = FALSE,
durianEps=1e-3,
saveImputationLog = FALSE,
saveDeconvolutionLog = FALSE,
saveImputedStep=FALSE)
impresult = impresult_list[["C"]]
Construct a low-dimensional UMAP embedding from imputed and original data
library(umap)
library(ggplot2)
library(reshape2)
umap_imputed = umap(t(impresult))
umap_orig = umap(t(as.matrix(C)))
imputed_df = cbind(umap_imputed$layout,pDataC)
imputed_df$status = "Imputed"
orig_df = cbind(umap_orig$layout,pDataC)
orig_df$status = "Unimputed"
df = rbind(imputed_df,orig_df)
colnames(df) = c("UMAP1","UMAP2","cellID","cellType","sampleID","status")
df$status = factor(df$status,levels=c("Unimputed","Imputed"))
Plot the original and imputed data
ggplot(df,aes(x=UMAP1, y=UMAP2,color=cellType)) +
geom_point(size=1)+
facet_grid(~status,scales="free") + theme_bw()