Immune predictions correlated with metaviper protein activity

First we get the metaviper predictions and deconvolution predictions from Synapse

#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',deconv_scores))$asDataFrame()%>%
  rename(immScore='score')
## 
 [####################]100.00%   1/1   Done...    
Downloading  [####################]100.00%   1.1MB/1.1MB (2.3MB/s) Job-9912014614742096023982503.csv Done...
##get metaviper scores
mtab<-read.csv(synapser::synGet(metaviper_scores)$path,sep='\t')%>%
  rename(specimenID='sample')%>%
  rename(protScore='counts')


dtab<-subset(dtab,method!='xcell')
combined=dtab%>%select(c(cell_type,method,specimenID,immScore))%>%
  left_join(mtab,by='specimenID')

combined<-subset(combined,!conditions%in%c('High Grade Glioma','Low Grade Glioma'))

Then we can compute the correlation of each cell type measurement with each protein

corVals=combined%>%group_by(cell_type,gene,method)%>%summarize(corVal=cor(immScore,protScore,use='pairwise.complete.obs'))

corVals
## # A tibble: 195,264 x 4
## # Groups:   cell_type, gene [176,958]
##    cell_type gene   method       corVal
##    <chr>     <fct>  <chr>         <dbl>
##  1 B cell    AATF   mcp_counter  0.182 
##  2 B cell    ABCA1  mcp_counter  0.156 
##  3 B cell    ABCC8  mcp_counter  0.0404
##  4 B cell    ABCC9  mcp_counter -0.125 
##  5 B cell    ABCG1  mcp_counter  0.0283
##  6 B cell    ABCG4  mcp_counter -0.361 
##  7 B cell    ABI1   mcp_counter -0.0101
##  8 B cell    ABL1   mcp_counter  0.131 
##  9 B cell    ABL2   mcp_counter  0.300 
## 10 B cell    ABLIM3 mcp_counter -0.182 
## # … with 195,254 more rows

Plotting protein correlations

We are interested in particular protein signaling activity that might give rise to specific immune phonetypes.

##now how do we bracket them?
##plot correlation distributions by cell type and method. 
require(ggplot2)
p<-ggplot(corVals)+geom_boxplot(aes(x=cell_type,y=corVal,fill=method))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of metaviper proteins with predicted cell type")
print(p)

There are some proteins that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.

corthresh=0.65

##now filter to the cell types with correlated proteins
cor_cell_types=subset(corVals,corVal>corthresh)%>%ungroup()%>%
  select(cell_type,method)%>%unique()
print(paste('we found',nrow(cor_cell_types),'cell types with some protein correlation greater than',corthresh))
## [1] "we found 12 cell types with some protein correlation greater than 0.65"
apply(cor_cell_types,1,function(x){
  ct=x[['cell_type']]
  m=x[['method']]

  #for each gene and cell type
  genes=subset(corVals,cell_type==ct)%>%
        subset(corVal>corthresh)%>%
    subset(method==m)%>%arrange(desc(corVal))%>%
      ungroup()

    if(nrow(genes)>12){
    new.corthresh=format(genes$corVal[15],digits=3)
    genes=genes[1:12,]
  }else{
    new.corthresh=corthresh
  }

  scores=subset(combined,gene%in%genes$gene)%>%subset(cell_type==ct)%>%subset(method==m)

  p2<- ggplot(scores)+
      geom_point(aes(x=immScore,y=protScore,
          col=gene,shape=conditions))+
    scale_x_log10()+
      ggtitle(paste(m,'predictions of',ct,'correlation >',new.corthresh))
  print(p2)
 # ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
#  synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)