Description

The protein encoded by this gene is the fourth major glycoprotein of the platelet surface and serves as a receptor for thrombospondin in platelets and various cell lines. Since thrombospondins are widely distributed proteins involved in a variety of adhesive processes, this protein may have important functions as a cell adhesion molecule. It binds to collagen, thrombospondin, anionic phospholipids and oxidized LDL. It directly mediates cytoadherence of Plasmodium falciparum parasitized erythrocytes and it binds long chain fatty acids and may function in the transport and/or as a regulator of fatty acid transport. Mutations in this gene cause platelet glycoprotein deficiency. Multiple alternatively spliced transcript variants have been found for this gene. [provided by RefSeq, Feb 2014]

Isoforms

Stats on raw UMI counts:

id transcript_id biotype cds_length sum total_gene perc is_major is_top
CD36-201 ENST00000309881 protein_coding 1419 327 483 67.701863 TRUE TRUE
CD36-202 ENST00000394788 protein_coding 1419 130 483 26.915114 TRUE FALSE
CD36-214 ENST00000447544 protein_coding 1419 26 483 5.383023 FALSE FALSE

Cell-type expression:

Switches

1 switches found for CD36

geneId t1 c1 p1 log2fc1 t2 c2 p2 log2fc2
CD36 ENST00000309881 Mid cap C3 1.55e-23 -1.8 ENST00000394788 Lymphatic endo C3 5.25e-10 1.1
---
title: "`r params$gene` gene report"
params:
  seurat : "a"
  markers : "b" 
  gene : "c" 
output:
  html_notebook: 
    css: style.css
    code_folding: none
    toc: yes
    toc_float: yes
  html_document:
    df_print: paged
    css: "style.css"
    code_folding: none
    theme: journal
    toc: yes
    toc_depth: 3
    toc_float: yes
  pdf_document:
    toc: yes
    toc_depth: '3'
---




```{r setup, echo=FALSE, warnings=FALSE}


knitr::opts_chunk$set(
	echo = FALSE,
	message = FALSE,
	cache = FALSE,
	cache.lazy = FALSE,
	tidy = TRUE
)

seurat <- params$seurat
setwd("/data/10x_data/10x_5psanger/")
source("./isoswitch/00.common.import.R")
source("./isoswitch/00.visual.R")
library(rentrez)
library(tidyr)
library(tibble)
library(forcats)
library(ggtranscript)

library(rmarkdown)
#rmarkdown::paged_table(params$markers)
library(knitr)
#knitr::kable()
```


## Description

```{r echo=FALSE}

if(!exists('gene_meta_df'))
  gene_meta_df <- readRDS("/data/10x_data/10x_5psanger/isoswitch/data/adult_gen_metadata.rds") 

# retrieve gene description
gene_meta <- gene_meta_df %>%
  filter(external_gene_name == params$gene)

if(nrow(gene_meta)==0) {
  description <- ""
} else {
  entrez_id <- gene_meta$entrezgene_id[[1]]
  
  if(is.na(entrez_id))
    description <- gene_meta$description[[1]]
  else {
    entrez_data <- entrez_summary(db="gene", id=entrez_id)
    description <- entrez_data$summary
  }
}

```

`r description`

```{r echo=FALSE, fig.width=6, fig.height=4}
DefaultAssay(seurat) <- "RNA"
FeaturePlot(seurat, features=params$gene, label=TRUE, label.size=2.5, repel=TRUE, raster=TRUE)
```


```{r include=FALSE} 

# ugly temporary hack; export this properly from isoswitch

._report.locus <- function(gtf_df, gene, meta, legend=TRUE){
   
   # get exons for this gene (only transcripts shown)
   exons <- gtf_df %>% 
     filter(gene_name==gene, 
            transcript_id %in% meta$transcript_id,
            type=="exon") %>%
     left_join(meta, by="transcript_id")
   
   # force color mapping for each feature
   manual_colors <- deframe(dplyr::select(meta, external_transcript_name, color))
   
   p1 <- ggplot(exons, aes(xstart = start, xend = end, y = factor(external_transcript_name, levels=meta$external_transcript_name))) +
     geom_range(aes(fill = external_transcript_name) ) +
     scale_fill_manual(values = manual_colors) + 
     scale_y_discrete(limits=rev) +
     geom_intron(data = to_intron(exons, "transcript_id"), aes(strand = strand)) +
     labs(y=NULL, fill="Transcript") + 
     theme_minimal() 
   
   if(legend)
     p1 <- p1 +  theme(legend.key.height= unit(0.15, 'cm'),
                       legend.key.width= unit(0.40, 'cm'))
   else
     p1 <- p1 + theme(legend.position = "none")
   
   return(p1)
}



 ._isoswitch_report.junctions <- function(obj, assay, gtf_df, gene, meta){
   
   junction_counts <- obj@assays[[assay]]@counts
   idx <- grep(paste("^", gene,"\\.", sep=""), rownames(junction_counts), value=TRUE)
   gene_junctions <- junction_counts[idx, ]
   
   # chr/strand lookup
   gene_location <- gtf_df %>% 
     dplyr::select(gene_name, seqnames, strand) %>%
     filter(gene_name==gene) %>% head(1)  
   
   # get junction data (start, end, quantif), keep only jcts over 0.1 UMI/cell
   jct_df <- data.frame(junction = rownames(gene_junctions)) %>%
     separate(junction, into=c("gene_id","coords"), sep="\\.\\.") %>%
     separate(coords, into=c("start","end"), sep="-", convert=TRUE) %>%
     mutate(avg = rowSums(gene_junctions)/ncol(gene_junctions)) %>% 
     left_join(gene_location, by=c("gene_id"="gene_name")) %>%
     mutate(transcript_id = "ALL_EXONS")%>%
     filter(avg >= 0.1)
   
   # collapse exons for this gene (only transcripts shown) into a fake single id
   exons <- gtf_df %>%
     filter(gene_name==gene, 
            type=="exon", 
            transcript_id %in% meta$transcript_id) %>%
     mutate(transcript_id = "ALL_EXONS")
   
   ggplot(exons, aes(xstart = start, xend = end, y = transcript_id)) +
     geom_range() +
     geom_intron(data = to_intron(exons, "transcript_name"), aes(strand = strand)) +
     geom_junction(data = jct_df, junction.y.max = 0.55) +
     geom_junction_label_repel(data = jct_df, aes(label = round(avg, 1)), force=3, junction.y.max = 1, label.size=0.12, size=2.4) +
     theme_minimal() + 
     labs(y=NULL)
 }

 ._report.umi_counts <- function(obj, gene, meta, legend=FALSE) { 
   
   # get celltype_features data for each cell type
   df <- map_df(levels(obj@active.ident), 
                celltype_features, obj=obj, count_matrix=obj@assays$multi@counts, features=meta$feature)
   
   # sort cell types by expresion of major isoform - reuse sorting in meta
   celltype_order <- filter(df, transcript_id == meta$transcript_id[[1]]) %>% 
     arrange(avg) %>% 
     pull(cell_type) 
   
   # relevel factors, isoforms by average total UMI sum
   df <- mutate(df, 
                transcript_id = forcats::fct_reorder(transcript_id, as.numeric(avg), .fun = mean),
                cell_type = fct_relevel(cell_type, levels = celltype_order)) 
   
   # force color mapping for each feature
   color_mapping <- dplyr::select(meta, transcript_id, color) %>% deframe()
   
   p1<- ggplot(df, aes(y=cell_type, x=avg, fill=transcript_id)) +
     geom_bar(position="stack", stat="identity") +
     scale_fill_manual(values = color_mapping, labels=meta$external_transcript_name) + 
     theme_minimal() +
     theme(axis.text.x = element_text(angle=45, hjust=1, size=7),
           axis.text.y = element_text(size=7),
           axis.title.x = element_text(size=7)) + 
     guides(fill = guide_legend(nrow = 1, reverse = TRUE)) +
     labs(x="Avg UMI per cell", y=NULL, fill="") + 
     theme(legend.position = "none")

   return(p1)
}

 
._report.dotpot <- function(obj, obj_assay, meta, celltype_order=NULL, switch=NULL) {

   normal_data <- obj@assays[[obj_assay]]@data
   scaled_data <- obj@assays[[obj_assay]]@scale.data

   df <- data.frame()
   for(cell_type in levels(obj@active.ident)) {


     cell_list <- WhichCells(obj, ident = cell_type)
     n_cells = length(cell_list)
     isofs <- meta$feature

     normal_counts <- normal_data[isofs, cell_list, drop=FALSE]

     cell_df <- data.frame(feature = rownames(normal_counts)) %>%
       separate(feature, into=c("gene_id", "transcript_id"), sep="\\.\\.", remove=FALSE) %>%
       mutate(avg_scaled_value = rowSums(scaled_data[isofs, cell_list, drop=FALSE]) / n_cells,
              cell_expr  = rowSums(normal_counts > 0),
              cell_count = n_cells,
              type = cell_type)

     df <- rbind(df, cell_df)
   }

   # Apply same logic on which cell_types to show as ._isoswitch_report.umi_counts
   # -> cell_types with NO counts are removed
   # -> For cts with counts < 1%, keep them but set to 0 to avoid drawing dot on dotplot
   d2 <- df %>%
     filter(cell_expr > 0 ) %>%
     mutate(perc_expr = (cell_expr/cell_count)*100) %>%
     mutate(perc_expr = if_else(perc_expr < 1, 0, perc_expr)) %>%
     left_join(meta, by="transcript_id")

   # limits for the expression scale, ensures it's symmetric around zero
   top_value <- max(abs(df$avg_scaled_value))

   # main dotplot figure
   p2 <- ggplot(d2,
                aes(x=factor(external_transcript_name, levels=meta$external_transcript_name),
                    y=factor(type, levels=celltype_order),
                    fill = avg_scaled_value,
                    size = perc_expr)) +
     geom_point(shape=21, color="#333333") +
     scale_fill_distiller(palette="RdBu", limits=c(-top_value, top_value)) +
     theme_minimal() +
     theme(axis.text.x = element_text(angle=0, hjust=0.5, size=8),
           axis.text.y = element_text(size=8),
           legend.title = element_text(size=9),
           panel.grid.major = element_blank(),
           panel.grid.minor = element_blank(),
           axis.line = element_blank()) +
     labs(x=NULL, y=NULL, fill="Expression", size="% cells")

   return(p2)
 }

```

# Isoforms
```{r echo=FALSE, fig.width=10, fig.height=1} 
 DefaultAssay(seurat) <- "multi"
 normal_data <- seurat@assays$multi@data
 scaled_data <- seurat@assays$multi@scale.data
 
 # extract isoforms for this gene from matrix count, order by expression
 isofs <- grep(paste("^", params$gene,"\\.", sep=""), rownames(normal_data), value=TRUE)  
 ordered_isofs <- data.frame(isofs = isofs, expr = rowSums(normal_data[isofs, ])) %>%
   arrange(desc(expr)) %>%
   pull(isofs)
 
 # custom scale, brewer.pal(n=10, name="Set3") reordered
 custom_colors = c("#FB8072","#80B1D3","#8DD3C7","#FFFFB3","#BEBADA",
                   "#FDB462","#B3DE69","#FCCDE5","#D9D9D9","#BC80BD")
   
 # build feature metadata (feature => short_name, color, order) shared by all panels 
 meta <- data.frame( feature = ordered_isofs ) %>%
   separate(feature, into=c("gene_id", "transcript_id"), sep="\\.\\.", remove=FALSE) %>%
   left_join(transcript_metadata, by=c("transcript_id"="ensembl_transcript_id")) %>%
   mutate(color = custom_colors[1:length(isofs)])
 
 
 loc_plot <- ._report.locus(gtf_df, params$gene, meta)
 print(loc_plot)
```

### Stats on raw UMI counts:
```{r fig.width=10, fig.height=1}
stats <- iso_compute_stats(seurat@assays$multi@counts) %>%
  filter(gene_id == params$gene) %>%
  dplyr::select(-c(gene_id, feature,max_sum, n_isofs)) %>%
  left_join(dplyr::select(meta, transcript_id, external_transcript_name, transcript_biotype, cds_length), by="transcript_id") %>%
  rename(id = external_transcript_name) %>%
  rename(biotype = transcript_biotype) %>%
  relocate(c(biotype,cds_length), .after=transcript_id) %>%
  relocate(id)

knitr::kable(stats)
                         
```

### Cell-type expression:
```{r fig.height=3.5, fig.width=12, warning=FALSE}
umi_plot <- ._report.umi_counts(seurat, params$gene, meta)
dot_plot <- ._report.dotpot(seurat, "multi", meta, celltype_order=levels(umi_plot$data$cell_type))
print(umi_plot | dot_plot)
```


```{r fig.width=12, fig.height=4}
DefaultAssay(seurat) <- "multi"
print(FeaturePlot(seurat, features = meta$feature, ncol=4, raster=TRUE))
```


# Switches
   
```{r }
gene_switches <- compute_switches(params$markers, gene=params$gene)
```

`r nrow(gene_switches)` switches found for `r params$gene`
```{r}
knitr::kable(format_switch_table(gene_switches))
```

