Immune predictions correlated with multiplier latent variable expression

First we get the LV multiplier values from the nf-lv-viz repo and deconvolution predictions from Synapse

#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',deconv_scores))$asDataFrame()%>%
  rename(immScore='score')
## 
 [####################]100.00%   1/1   Done...    
Downloading  [####################]100.00%   1.1MB/1.1MB (2.3MB/s) Job-99654986140571315169200139.csv Done...
##get latent variable scores
mp_res<-synTableQuery("SELECT * FROM syn21046991")$filepath %>% 
  readr::read_csv() %>% 
  filter(!is.na(tumorType),
         !grepl('xenograft', specimenID, ignore.case = T),
         !specimenID %in% c("BI386-004","CW225-001","DW356-002",
                            "JK368-003", "SK436-005"))
## 
Create CSV FileHandle [##########----------]50.09%   79022/157768       
Create CSV FileHandle [####################]100.00%   157768/157768   Done...    
Downloading  [#-------------------]7.43%   2.0MB/26.9MB (3.6MB/s) Job-9965499168163671731018352.csv     
Downloading  [###-----------------]14.85%   4.0MB/26.9MB (4.9MB/s) Job-9965499168163671731018352.csv     
Downloading  [####----------------]22.28%   6.0MB/26.9MB (5.7MB/s) Job-9965499168163671731018352.csv     
Downloading  [######--------------]29.71%   8.0MB/26.9MB (6.5MB/s) Job-9965499168163671731018352.csv     
Downloading  [#######-------------]37.13%   10.0MB/26.9MB (6.7MB/s) Job-9965499168163671731018352.csv     
Downloading  [#########-----------]44.56%   12.0MB/26.9MB (6.9MB/s) Job-9965499168163671731018352.csv     
Downloading  [##########----------]51.99%   14.0MB/26.9MB (7.0MB/s) Job-9965499168163671731018352.csv     
Downloading  [############--------]59.41%   16.0MB/26.9MB (7.2MB/s) Job-9965499168163671731018352.csv     
Downloading  [#############-------]66.84%   18.0MB/26.9MB (7.2MB/s) Job-9965499168163671731018352.csv     
Downloading  [###############-----]74.27%   20.0MB/26.9MB (7.3MB/s) Job-9965499168163671731018352.csv     
Downloading  [################----]81.69%   22.0MB/26.9MB (7.4MB/s) Job-9965499168163671731018352.csv     
Downloading  [##################--]89.12%   24.0MB/26.9MB (7.4MB/s) Job-9965499168163671731018352.csv     
Downloading  [###################-]96.54%   26.0MB/26.9MB (7.6MB/s) Job-9965499168163671731018352.csv     
Downloading  [####################]100.00%   26.9MB/26.9MB (7.5MB/s) Job-9965499168163671731018352.csv Done...
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   ROW_ID = col_double(),
##   ROW_VERSION = col_double(),
##   value = col_double(),
##   isCellLine = col_logical()
## )
## See spec(...) for full column specifications.
dtab<-subset(dtab,method!='xcell')

combined=dtab%>%select(c(cell_type,method,specimenID,immScore))%>%
  left_join(mp_res,by='specimenID')

Then we can compute the correlation of each cell type measurement with each protein

corVals=combined%>%group_by(cell_type,latent_var,method)%>%summarize(corVal=cor(immScore,value,method = "spearman"))

DT::datatable(corVals)

Plotting LV correlations

We are interested in particular protein signaling activity that might give rise to specific immune phonetypes.

##now how do we bracket them?
##plot correlation distributions by cell type and method. 
require(ggplot2)
p<-ggplot(corVals)+geom_boxplot(aes(x=cell_type,y=corVal,fill=method))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of MultiPLIER Latent Variables with predicted cell type")
print(p)

There are some latent variables that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.

corthresh=0.6

##now filter to the cell types with correlated latent variables (or anticorrelated)
cor_cell_types=subset(corVals,abs(corVal)>corthresh)%>%ungroup()%>%
  select(cell_type,method)%>%unique()
print(paste('we found',nrow(cor_cell_types),'cell types with some latent variable correlation greater than',corthresh))
## [1] "we found 13 cell types with some latent variable correlation greater than 0.6"
apply(cor_cell_types,1,function(x){
  ct=x[['cell_type']]
  m=x[['method']]

  #for each gene and cell type
  lvs=subset(corVals,cell_type==ct)%>%
        subset(abs(corVal)>corthresh)%>%
    subset(method==m)%>%arrange(desc(corVal))%>%
      ungroup()

    if(nrow(lvs)>12){
    new.corthresh=format(lvs$corVal[15],digits=3)
    lvs=lvs[1:12,]
  }else{
    new.corthresh=corthresh
  }

  scores=subset(combined,latent_var%in%lvs$latent_var)%>%subset(cell_type==ct)%>%subset(method==m)

  p2<- ggplot(scores %>% mutate(latent_var = stringr::str_trunc(latent_var, 30)))+
      geom_point(aes(x=immScore,y=value,
          col=latent_var,shape=tumorType))+
    scale_x_log10()+
      ggtitle(paste(m,'predictions of',ct,'correlation >',new.corthresh))
  print(p2)
 # ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
#  synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)