Seurat 原理和使用实例

目录

要点: 单细胞实验最大的目的是:划分细胞亚群,寻找分化及发育轨迹。Seurat能完成第一个目标,其原理参考官网,实例看文章及解读。

Seurat官网

SATIJA LAB: seurat首页, seurat实例,

单细胞分析教程: Analysis of single cell RNA-seq data

Seurat4 源码解析: Seurat 4 源码解析

Seurat分析实例(IF>5)

发育过程中

六个样本的单细胞数据如何发一篇10分以上的文章[cellranger+Seurat]: 主要研究对象是人类肠道组织,我们知道肠道主要分为回肠、结肠及直肠,所以这篇文章就采集了每个部位2个样本,来自于6个不同个人的手术中切除肠道组织。采用10XGenomics技术检测到了14537个肠道细胞,其中2个人类回肠6167个细胞,2个结肠4472个细胞,以及两个直肠3898个细胞。



免疫过程中

衰老过程中

Seurat 可视化及自定义函数

常用的ggplot2修饰函数

# large dot in legend
LargeLegend = function(size=5){
  guides(colour = guide_legend(override.aes = list(alpha = 1,size=size)))
}

# add panel box for ggplot
AddBox = function(...){
  theme(
    panel.border = element_rect(color = "black", size = 1),
    validate = TRUE, 
    ...
  )
}

FeaturePlot 及修饰

csdn 效果图
# feature plot with no axis
FeaturePlot(scObj, features =c("CD3D", "CD8A", "GZMB",   "CCR7", "CCL5", 
                               "CD5","CD7", 
                               
                               "HLA-DRA",
                               "LYZ", "CD33", "ANPEP", "FCGR1A", "IL3RA",
                               
                               "CD79A","CD79B","VPREB1", "IGJ",
                               "MME","CD38",
                               "HBA1", "GATA1", "TFRC",#CD71
                               "CD34", "SOCS2", 
                               "GAPDH"), 
            cols = c("lightgrey", 'red'),
            ncol = 5 ) & NoLegend() & NoAxes() & theme(
              panel.border = element_rect(color = "black", size = 1)
            )

DotPlot 及修饰

barplot 自定义函数

要点:1. table的2个参数是对细胞的2种分类指标,比如 样品处理时间 vs 亚群;2. 注意对原始样品归一化为1万个细胞,消除样品间测序细胞数差异,见底部的示例;3. colors=颜色是为table的第一个参数指定;

#' draw barplot, from a data.frame generated by table(para1, para2), colored by 1st parameter of table()
#' v2.1
#' v2.2 第一参数的空值跳过
#'
#' @param tbl1 data.frame, draw by each column
#' @param colors colors of each row(default NULL, auto-color)
#' @param scale whether scale to 1 or not(default T)
#' @param title the main title of the figure,(is main in the function)
#' @param legendY the position y of legend, adjust as needed, default -0.25
#' @param omit whether to omit some columns
#' @param ... 
#'
#' @return NULL
#' @export
#'
#' @examples
#' 
table2barplot = function(tbl1, colors=NULL, levels=NULL, scale=T, title="", 
                       omit=NULL, xlab="", ylab="", legendTitle=NULL){
  tbl1= tbl1[, which(colSums(tbl1)>0)] #remove all 0 columns
  tbl1= tbl1[which(rowSums(tbl1)>0), ] #remove all 0 rows
  # remove some columns by column names
  if(!is.null(omit)){
    tbl1=tbl1[, setdiff( colnames(tbl1), as.character( omit ) ) ]
  }
  # table to data.frame(wide to long)
  df2=as.data.frame(tbl1)
  if(!is.null(levels)){
    df2$Var1=factor(df2$Var1, levels =levels ) #change order
  }
  if(""==ylab){ ylab=ifelse(scale, "Freq", "Count") }
  if(""==xlab){ xlab="Index" }
  
  # draw
  g1=ggplot(df2, aes(x=Var2, y=Freq, fill=Var1))+
    geom_bar(stat="identity", position=ifelse(scale, "fill","stack") )+
    labs(x=xlab, y=ylab, title=title)+
    theme_classic(base_size = 14)+
    theme(axis.text.x=element_text(angle=60, hjust=1,size=rel(1.2)) )
  if(is.null(colors)){
    return (g1 + scale_fill_discrete( ifelse(is.null(legendTitle), "", legendTitle) ) )
  }else{
    return( g1+scale_fill_manual(legendTitle, values = colors) )
  }
}

if(0){
  table2barplot(
    as.table(t(
      apply(
        table(scObj$time, scObj$seurat_clusters), 
        1, 
        function(x){ x/sum(x) * 1e4})
    )),
    colors = c("0h"="#FFA500", "18h"="#FF1493", "48h"="#4876FF"),
    title="time"
  )
}

VolcanoPlot 经典火山图

csdn 效果图
#' Volcano plot
#'
#' @param dif a data.frame, must provide at leat 3 columns: log2FoldChange, padj, symbol
#' @param log2FC the threashold of DEG, default log2(1.5)
#' @param padj the threashold of DEG, default 0.05
#' @param label.symbols genes symbols to label, optional, if not provied, select by log2FC in DEG;
#' @param label.max if not provide label.symbols, automaticly select DEG number.
#' @param cols the dot colors of down and up genees.
#' @param title title of the plot
#'
#' @return a ggplot obj
#' @export
#'
#' @examples
#' if(0){
#' VolcanoPlot(dif, padj=0.05, title="DSS vs WT", label.max = 50)
#' VolcanoPlot(dif, padj=1e-10, title="DSS vs WT -2", 
#'     label.symbols=dif[ ((abs(dif$log2FoldChange) > 2) & (dif$padj < 1e-50) ) | 
#'                        abs(dif$log2FoldChange) > 4,]$symbol )
#' }
VolcanoPlot=function(dif, log2FC=log2(1.5), padj=0.05, 
                     label.symbols=NULL, label.max=30,
                     cols=c("#497aa2", "#ae3137"), title=""){
  if( all(!c("log2FoldChange", "padj", "symbol") %in% colnames(dif)) ){
    stop("Colnames must include: log2FoldChange, padj, symbol")
  }
  rownames(dif)=dif$symbol
  
  # (1) define up and down
  dif$threshold="ns";
  dif[which(dif$log2FoldChange > log2FC & dif$padj  2) & (dif$padj < 1e-50) ) | 
                                      abs(dif$log2FoldChange) > 4,]$symbol )

multiVolcanoPlot

csdn 效果图
# > head(DEG)
#              p_val avg_log2FC pct.1 pct.2     p_val_adj     cluster  gene label
#RPS12 1.273332e-143  0.7298951 1.000 0.991 1.746248e-139 Naive CD4 T RPS12    Up
#RPS6  6.817653e-143  0.6870694 1.000 0.995 9.349729e-139 Naive CD4 T  RPS6    Up
#


color.pals = c("#DC143C","#0000FF","#20B2AA","#FFA500","#9370DB","#98FB98","#F08080","#1E90FF","#7CFC00","#FFFF00",
               "#808000","#FF00FF","#FA8072","#7B68EE","#9400D3","#800080","#A0522D","#D2B48C","#D2691E","#87CEEB","#40E0D0","#5F9EA0",
               "#FF1493","#0000CD","#008B8B","#FFE4B5","#8A2BE2","#228B22","#E9967A","#4682B4","#32CD32","#F0E68C","#FFFFE0","#EE82EE",
               "#FF6347","#6A5ACD","#9932CC","#8B008B","#8B4513","#DEB887")

#
#' multi volcano plot for scRNA-seq
#' @version 0.2 change legend order
#' @version 0.3 add max_overlaps for annotation
#'
#' @param dat Seurat FindAllMarkers returns, must set only.pos = F;
#' @param color.arr color list, default same as Seurat
#' @param onlyAnnotateUp only annote gene symbols for up genes
#' @param log2Foldchang threshold for annotation
#' @param adjp  threshold for annotation
#' @param top_marker gene number for annotation
#' @param max_overlaps annotation label overlapping
#'
#' @return ggplot2 obj
#' @export
#'
#' @examples
multiVolcanoPlot = function(dat, color.arr=NULL, onlyAnnotateUp=T,
                            log2Foldchang=0.58, adjp=0.05, top_marker=5, 
                            max_overlaps=10, width=0.9){
  library(dplyr)
  library(ggrepel)
  # set default color list
  if(is.null(color.arr)){
    len = length(unique(dat$cluster))
    color.arr=scales::hue_pal()(len)
  }
  
  dat.plot <- dat %>% mutate(
    "significance"=case_when(p_val_adj < adjp & avg_log2FC >= log2Foldchang  ~ 'Up',
                             p_val_adj < adjp & avg_log2FC <= -log2Foldchang  ~ 'Down',
                             TRUE ~ 'None'))
  tbl = table(dat.plot$significance)
  print( tbl )
  background.dat <- data.frame(
    dat.plot %>% group_by(cluster) %>% filter(avg_log2FC>0) %>%
      summarise("y.localup"=max(avg_log2FC)),
    dat.plot %>% group_by(cluster) %>% filter(avg_log2FC<=0) %>%
      summarise("y.localdown"=min(avg_log2FC)),
    x.local=seq(1:length(unique(dat.plot$cluster)))
  ) %>% select(-cluster.1)
  #names(background.dat)
  #head(background.dat)
  #dim(background.dat)
  
  #
  x.number <- background.dat %>% select(cluster, x.local)
  dat.plot <- dat.plot%>% left_join(x.number,by = "cluster")
  #names(dat.plot)
  #head(dat.plot)
  
  #selecting top-up and top-down proteins
  dat.marked.up <- dat.plot %>% filter(significance=="Up") %>%
    group_by(cluster) %>% arrange(-avg_log2FC) %>%
    top_n(top_marker,abs(avg_log2FC))
  dat.marked.down <- dat.plot %>% filter(significance=="Down") %>%
    group_by(cluster) %>% arrange(avg_log2FC) %>%
    top_n(top_marker,abs(avg_log2FC))
  dat.marked <- dat.marked.up %>% bind_rows(dat.marked.down)
  #referring group information data
  dat.infor <- background.dat %>%
    mutate("y.infor"=rep(0,length(cluster)))
  #names(dat.infor)
  #dim(dat.infor)
  #head(dat.infor)
  
  ##plotting:
  #setting color by loading local color schemes
  vol.plot <- ggplot()+
    # background
    geom_col(background.dat,mapping=aes(x.local, y.localup),
             fill="grey80", alpha=0.2, width=0.9, just = 0.5)+
    geom_col(background.dat,mapping=aes(x.local,y.localdown),
             fill="grey80", alpha=0.2, width=0.9, just = 0.5)+
    # point plot
    geom_jitter(dat.plot, mapping=aes(x.local, avg_log2FC, #x= should be number, Not string or factor
                                      color=significance),
                size=0.8, width = 0.4, alpha= 1)+
    scale_color_manual(name="significance", 
                       breaks = c('Up', 'None', 'Down'),
                       values = c("#d56e5e","#cccccc", "#5390b5")) + #set color for: Down None   Up
    geom_tile(dat.infor, mapping=aes(x.local, y.infor), #x axis color box
              height = log2Foldchang*1.3,
              fill = color.arr[1:length(unique(dat.plot$cluster))],
              alpha = 0.5,
              width=width) +
    labs(x=NULL,y="log2 Fold change")+
    geom_text(dat.infor, mapping=aes(x.local,y.infor,label=cluster))+
    # Down is not recommend, not meaningful, hard to explain; so prefer dat.marked.up to dat.marked
    ggrepel::geom_label_repel(data=if(onlyAnnotateUp) dat.marked.up else dat.marked, #gene symbol, of up group default
                              mapping=aes(x=x.local, y=avg_log2FC, label=gene),
                              force = 2, #size=2,
                              max.overlaps = max_overlaps,
                              label.size = 0, #no border
                              fill="#00000000", #box fill color
                              seed = 233,
                              min.segment.length = 0,
                              force_pull = 2,
                              box.padding = 0.1,
                              segment.linetype = 3,
                              #segment.color = 'black',
                              #segment.alpha = 0.5,
                              #direction = "x", #line direction
                              hjust = 0.5)+
    annotate("text", x=1.5, y=max(background.dat$y.localup)+1,
             label=paste0("|log2FC|>=", log2Foldchang, " & FDR<", adjp))+
    theme_classic(base_size = 12)+
    
    theme(
      axis.title = element_text(size = 13, color = "black"),
      axis.text = element_text(size = 15, color = "black"),
      axis.line.y = element_line(color = "black", size = 0.8),
      #
      axis.line.x = element_blank(), #no x axis line
      axis.ticks.x = element_blank(), #no x axis ticks
      axis.title.x = element_blank(), #
      axis.text.x = element_blank(),
      #
      legend.spacing.x = unit(0.1,'cm'),
      legend.key.width = unit(0.5,'cm'),
      legend.key.height = unit(0.5,'cm'),
      legend.background = element_blank(),
      legend.box = "horizontal",
      legend.position = c(0.13, 0.77),legend.justification = c(1,0)
    )+
    guides( #color = guide_legend( override.aes = list(size=5) ), #legend circle size
      color=guide_legend( override.aes = list(size=5), title="Change")
    )
  #guides(fill=guide_legend(title="Change"))+ #change legend title
  vol.plot
}
#multiVolcanoPlot(DEG, color.pals)
multiVolcanoPlot(scObj.markers.time)
multiVolcanoPlot(scObj.markers.time, onlyAnnotateUp = F)

单细胞分析展望

整合是单细胞的利器,空间是单细胞的未来。