Latent Variables correlated with metaviper protein activity

First we get the metaviper predictions and LV scores from Synapse

#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',mp_scores))$asDataFrame()%>%
  subset(isCellLine!='TRUE')
## 
Create CSV FileHandle [##########----------]50.38%   79481/157768       
Create CSV FileHandle [####################]100.00%   157768/157768   Done...    
Downloading  [#-------------------]7.43%   2.0MB/26.9MB (3.4MB/s) Job-99866576920768443768110964.csv     
Downloading  [###-----------------]14.85%   4.0MB/26.9MB (5.2MB/s) Job-99866576920768443768110964.csv     
Downloading  [####----------------]22.28%   6.0MB/26.9MB (6.3MB/s) Job-99866576920768443768110964.csv     
Downloading  [######--------------]29.71%   8.0MB/26.9MB (6.9MB/s) Job-99866576920768443768110964.csv     
Downloading  [#######-------------]37.13%   10.0MB/26.9MB (7.1MB/s) Job-99866576920768443768110964.csv     
Downloading  [#########-----------]44.56%   12.0MB/26.9MB (7.5MB/s) Job-99866576920768443768110964.csv     
Downloading  [##########----------]51.99%   14.0MB/26.9MB (7.8MB/s) Job-99866576920768443768110964.csv     
Downloading  [############--------]59.41%   16.0MB/26.9MB (8.0MB/s) Job-99866576920768443768110964.csv     
Downloading  [#############-------]66.84%   18.0MB/26.9MB (7.3MB/s) Job-99866576920768443768110964.csv     
Downloading  [###############-----]74.27%   20.0MB/26.9MB (7.3MB/s) Job-99866576920768443768110964.csv     
Downloading  [################----]81.69%   22.0MB/26.9MB (7.4MB/s) Job-99866576920768443768110964.csv     
Downloading  [##################--]89.12%   24.0MB/26.9MB (7.4MB/s) Job-99866576920768443768110964.csv     
Downloading  [###################-]96.54%   26.0MB/26.9MB (7.5MB/s) Job-99866576920768443768110964.csv     
Downloading  [####################]100.00%   26.9MB/26.9MB (7.5MB/s) Job-99866576920768443768110964.csv Done...
##get metaviper scores
mtab<-read.csv(synapser::synGet(metaviper_scores)$path,sep='\t')%>%
  rename(specimenID='sample')%>%
  rename(protScore='counts')


samps<-intersect(dtab$specimenID,mtab$specimenID)

mp_res<-dtab%>%
  subset(specimenID%in%samps)%>%
  group_by(latent_var) %>%
  mutate(sd_value = sd(value)) %>%
  filter(sd_value > 0.05) %>%
  ungroup()%>%
  select(latent_var,value,tumorType,specimenID,sd_value,diagnosis)

combined<-mp_res%>%inner_join(mtab,by='specimenID')

Then we can compute the correlation of each cell type measurement with each protein

corVals=combined%>%group_by(latent_var,gene)%>%summarize(corVal=cor(value,protScore,use='pairwise.complete.obs'))

corVals
## # A tibble: 604,098 x 3
## # Groups:   latent_var [99]
##    latent_var               gene    corVal
##    <chr>                    <fct>    <dbl>
##  1 1,REACTOME_MRNA_SPLICING AATF    0.561 
##  2 1,REACTOME_MRNA_SPLICING ABCA1  -0.506 
##  3 1,REACTOME_MRNA_SPLICING ABCC8   0.0123
##  4 1,REACTOME_MRNA_SPLICING ABCC9  -0.557 
##  5 1,REACTOME_MRNA_SPLICING ABCG1  -0.646 
##  6 1,REACTOME_MRNA_SPLICING ABCG4   0.232 
##  7 1,REACTOME_MRNA_SPLICING ABI1   -0.770 
##  8 1,REACTOME_MRNA_SPLICING ABL1    0.254 
##  9 1,REACTOME_MRNA_SPLICING ABL2   -0.0351
## 10 1,REACTOME_MRNA_SPLICING ABLIM3 -0.676 
## # … with 604,088 more rows

Plotting protein correlations

We are interested in particular protein signaling activity that might give rise to specific immune phenotypes.

##now how do we bracket them?
##plot correlation distributions by cell type and method.
require(ggplot2)
p<-corVals%>%
              ungroup()%>%
          #    mutate(LatentVariable = stringr::str_trim(as.character(latent_var), 20))%>%
              ggplot()+geom_boxplot(aes(x=latent_var,y=corVal))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of metaviper proteins with lv")
print(p)

There are some proteins that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.

corthresh=0.65

##now filter to the cell types with correlated proteins
cor_cell_types=subset(corVals,corVal>corthresh)%>%ungroup()%>%
  select(latent_var)%>%unique()
print(paste('we found',nrow(cor_cell_types),'cell types with some protein correlation greater than',corthresh))
## [1] "we found 95 cell types with some protein correlation greater than 0.65"
apply(cor_cell_types,1,function(x){
  ct=x[['latent_var']]
#  m=x[['method']]

  #for each gene and cell type
  genes=subset(corVals,latent_var==ct)%>%
        subset(corVal>corthresh)%>%
   arrange(desc(corVal))%>%
      ungroup()

    if(nrow(genes)>12){
    new.corthresh=format(genes$corVal[15],digits=3)
    genes=genes[1:12,]
  }else{
    new.corthresh=corthresh
  }

  scores=subset(combined,gene%in%genes$gene)%>%subset(latent_var==ct)

  p2<- ggplot(scores)+
      geom_point(aes(x=value,y=protScore,
          col=gene,shape=conditions))+
    scale_x_log10()+
      ggtitle(paste(ct,'correlation >',new.corthresh))
  print(p2)
 # ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

## 
## [[33]]

## 
## [[34]]

## 
## [[35]]

## 
## [[36]]

## 
## [[37]]

## 
## [[38]]

## 
## [[39]]

## 
## [[40]]

## 
## [[41]]

## 
## [[42]]

## 
## [[43]]

## 
## [[44]]

## 
## [[45]]

## 
## [[46]]

## 
## [[47]]

## 
## [[48]]

## 
## [[49]]

## 
## [[50]]

## 
## [[51]]

## 
## [[52]]

## 
## [[53]]

## 
## [[54]]

## 
## [[55]]

## 
## [[56]]

## 
## [[57]]

## 
## [[58]]

## 
## [[59]]

## 
## [[60]]

## 
## [[61]]

## 
## [[62]]

## 
## [[63]]

## 
## [[64]]

## 
## [[65]]

## 
## [[66]]

## 
## [[67]]

## 
## [[68]]

## 
## [[69]]

## 
## [[70]]

## 
## [[71]]

## 
## [[72]]

## 
## [[73]]

## 
## [[74]]

## 
## [[75]]

## 
## [[76]]

## 
## [[77]]

## 
## [[78]]

## 
## [[79]]

## 
## [[80]]

## 
## [[81]]

## 
## [[82]]

## 
## [[83]]

## 
## [[84]]

## 
## [[85]]

## 
## [[86]]

## 
## [[87]]

## 
## [[88]]

## 
## [[89]]

## 
## [[90]]

## 
## [[91]]

## 
## [[92]]

## 
## [[93]]

## 
## [[94]]

## 
## [[95]]

#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
#  synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)