Immune predictions correlated with multiplier latent variable expression
First we get the LV multiplier values from the nf-lv-viz repo and deconvolution predictions from Synapse
#get immune predictions
dtab<-synapser::synTableQuery(paste('select * from',deconv_scores))$asDataFrame()%>%
rename(immScore='score')
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 1.1MB/1.1MB (2.3MB/s) Job-99654986140571315169200139.csv Done...
##get latent variable scores
mp_res<-synTableQuery("SELECT * FROM syn21046991")$filepath %>%
readr::read_csv() %>%
filter(!is.na(tumorType),
!grepl('xenograft', specimenID, ignore.case = T),
!specimenID %in% c("BI386-004","CW225-001","DW356-002",
"JK368-003", "SK436-005"))
##
Create CSV FileHandle [##########----------]50.09% 79022/157768
Create CSV FileHandle [####################]100.00% 157768/157768 Done...
Downloading [#-------------------]7.43% 2.0MB/26.9MB (3.6MB/s) Job-9965499168163671731018352.csv
Downloading [###-----------------]14.85% 4.0MB/26.9MB (4.9MB/s) Job-9965499168163671731018352.csv
Downloading [####----------------]22.28% 6.0MB/26.9MB (5.7MB/s) Job-9965499168163671731018352.csv
Downloading [######--------------]29.71% 8.0MB/26.9MB (6.5MB/s) Job-9965499168163671731018352.csv
Downloading [#######-------------]37.13% 10.0MB/26.9MB (6.7MB/s) Job-9965499168163671731018352.csv
Downloading [#########-----------]44.56% 12.0MB/26.9MB (6.9MB/s) Job-9965499168163671731018352.csv
Downloading [##########----------]51.99% 14.0MB/26.9MB (7.0MB/s) Job-9965499168163671731018352.csv
Downloading [############--------]59.41% 16.0MB/26.9MB (7.2MB/s) Job-9965499168163671731018352.csv
Downloading [#############-------]66.84% 18.0MB/26.9MB (7.2MB/s) Job-9965499168163671731018352.csv
Downloading [###############-----]74.27% 20.0MB/26.9MB (7.3MB/s) Job-9965499168163671731018352.csv
Downloading [################----]81.69% 22.0MB/26.9MB (7.4MB/s) Job-9965499168163671731018352.csv
Downloading [##################--]89.12% 24.0MB/26.9MB (7.4MB/s) Job-9965499168163671731018352.csv
Downloading [###################-]96.54% 26.0MB/26.9MB (7.6MB/s) Job-9965499168163671731018352.csv
Downloading [####################]100.00% 26.9MB/26.9MB (7.5MB/s) Job-9965499168163671731018352.csv Done...
## Parsed with column specification:
## cols(
## .default = col_character(),
## ROW_ID = col_double(),
## ROW_VERSION = col_double(),
## value = col_double(),
## isCellLine = col_logical()
## )
## See spec(...) for full column specifications.
dtab<-subset(dtab,method!='xcell')
combined=dtab%>%select(c(cell_type,method,specimenID,immScore))%>%
left_join(mp_res,by='specimenID')
Then we can compute the correlation of each cell type measurement with each protein
corVals=combined%>%group_by(cell_type,latent_var,method)%>%summarize(corVal=cor(immScore,value,method = "spearman"))
DT::datatable(corVals)
Plotting LV correlations
We are interested in particular protein signaling activity that might give rise to specific immune phonetypes.
##now how do we bracket them?
##plot correlation distributions by cell type and method.
require(ggplot2)
p<-ggplot(corVals)+geom_boxplot(aes(x=cell_type,y=corVal,fill=method))+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ggtitle("Correlation of MultiPLIER Latent Variables with predicted cell type")
print(p)

There are some latent variables that show up as highly correlated. By choosing a threshold, we can evaluate what they are in more detail.
corthresh=0.6
##now filter to the cell types with correlated latent variables (or anticorrelated)
cor_cell_types=subset(corVals,abs(corVal)>corthresh)%>%ungroup()%>%
select(cell_type,method)%>%unique()
print(paste('we found',nrow(cor_cell_types),'cell types with some latent variable correlation greater than',corthresh))
## [1] "we found 13 cell types with some latent variable correlation greater than 0.6"
apply(cor_cell_types,1,function(x){
ct=x[['cell_type']]
m=x[['method']]
#for each gene and cell type
lvs=subset(corVals,cell_type==ct)%>%
subset(abs(corVal)>corthresh)%>%
subset(method==m)%>%arrange(desc(corVal))%>%
ungroup()
if(nrow(lvs)>12){
new.corthresh=format(lvs$corVal[15],digits=3)
lvs=lvs[1:12,]
}else{
new.corthresh=corthresh
}
scores=subset(combined,latent_var%in%lvs$latent_var)%>%subset(cell_type==ct)%>%subset(method==m)
p2<- ggplot(scores %>% mutate(latent_var = stringr::str_trunc(latent_var, 30)))+
geom_point(aes(x=immScore,y=value,
col=latent_var,shape=tumorType))+
scale_x_log10()+
ggtitle(paste(m,'predictions of',ct,'correlation >',new.corthresh))
print(p2)
# ggsave(paste0(m,'predictions of',gsub(" ","",gsub("/","",ct)),'cor',new.corthresh,'.pdf'))
})













## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

##
## [[13]]

#parentid='syn20710537'
#for(fi in list.files('.')[grep('tions',list.files('.'))])
# synapser::synStore(synapser::File(fi,parentId=parentid,annotations=list(resourceType='analysis',isMultiSpecimen='TRUE',isMultiIndividual='TRUE')),used=c(deconv_scores,metaviper_scores),executed=this.script)