The data used in this blog is taken from 2 resources: [1] drug repurposing hub tab of the CLUE.IO (https://clue.io/repurposing#download-data) and [2] cell surface proteins (CSPs) from the SurfaceomeDB.

CLUE.IO: The Drug Repurposing Hub is a curated and annotated collection of FDA-approved drugs, clinical trial drugs, and pre-clinical tool compounds with a companion information resource. Current dataset is downloaded on May 11, 2020.

# required packages
rm(list = ls())
library(tidyverse)

Out of 6798 total drugs, 2427 are already launched, while 458 are in Phase 3 of the clinical trials.

# Drugs in different phases of clinical trials
df_phases <- drug_gene %>% group_by(clinical_phase) %>% summarise(count=n())
head(df_phases)
## # A tibble: 6 x 2
##   clinical_phase  count
##   <fct>           <int>
## 1 Launched         2427
## 2 Phase 1           566
## 3 Phase 1/Phase 2    85
## 4 Phase 2           813
## 5 Phase 2/Phase 3    44
## 6 Phase 3           458

Out of 6798 total drugs, 424 drugs are classified in “infectious disease”. Most of the infectious disease related drugs (423/424) are already launched. 4576 drugs are still in different phases of the clinical trials.

# Drugs in different disease areas
df_disease_area <- drug_gene %>% group_by(disease_area) %>% summarise(count=n())
df_disease_area_order <- df_disease_area[order(-df_disease_area$count),]
head(df_disease_area_order)
## # A tibble: 6 x 2
##   disease_area           count
##   <fct>                  <int>
## 1 ""                      4576
## 2 "infectious disease"     424
## 3 "neurology/psychiatry"   346
## 4 "cardiology"             205
## 5 "gastroenterology"       124
## 6 "endocrinology"          122
# Drugs in different disease areas
df_disease_area_launched <- drug_gene %>% group_by(disease_area) %>% filter(clinical_phase == "Launched") %>% summarise(count=n())
df_disease_area_launched_order <- df_disease_area_launched[order(-df_disease_area_launched$count),]
head(df_disease_area_launched_order)
## # A tibble: 6 x 2
##   disease_area           count
##   <fct>                  <int>
## 1 "infectious disease"     423
## 2 "neurology/psychiatry"   346
## 3 ""                       213
## 4 "cardiology"             204
## 5 "gastroenterology"       124
## 6 "endocrinology"          122

Total number of gene targets in the entire list is 2183.

# Distribution of genes (unique and all)
all_genes <- unlist(strsplit(as.character(drug_gene$target),split='|',fixed=TRUE))
unique_genes <- unique(all_genes)

If we focus only on the infectious disease drug targets, there are 149 unique gene targets.

# Distribution of genes (unique and all)
drug_gene_inf_dis <- subset(drug_gene, disease_area == "infectious disease")
all_genes_inf_dis <- unlist(strsplit(as.character(drug_gene_inf_dis$target),split='|',fixed=TRUE))
unique_genes_inf_dis <- unique(all_genes_inf_dis)
unique_genes_inf_dis
##   [1] "GABBR1"  "GABBR2"  "MMP12"   "HIF1A"   "PNP"     "TUBA1A"  "TUBB"   
##   [8] "TUBB4B"  "ADRA1A"  "ADRA2A"  "HNMT"    "CYP3A4"  "ATP1A1"  "CYP2B6"
##  [15] "HTR3A"   "HTR3B"   "MLNR"    "IDE"     "SCN10A"  "DAO"     "HRSP12"
##  [22] "PRDX5"   "RAB9A"   "HSPA1A"  "HSPB1"   "CMA1"    "CTSA"    "CTSF"   
##  [29] "CTSK"    "CTSL"    "CTSS"    "SQLE"    "TP53"    "PON1"    "FASN"   
##  [36] "MRGPRX1" "DHFR"    "KCNN4"   "NR1I2"   "NR1I3"   "TRPM2"   "TRPM4"  
##  [43] "TRPM8"   "CYP3A43" "CYP3A5"  "CYP3A7"  "PGR"     "GRIN1"   "PNLIP"  
##  [50] "KCNN1"   "KCNN3"   "ALOX5"   "PTGS1"   "ACHE"    "POU2F2"  "UGT1A1"
##  [57] "DRD2"    "DRD3"    "DPEP1"   "NPY1R"   "NPY2R"   "TRPV5"   "CYP1A2"
##  [64] "CYP2C19" "CYP2C9"  "CYP2D6"  "CYP51A1" "CYP19A1" "CYP2J2"  "PPARA"  
##  [71] "PTGER2"  "TOP2A"   "ABCB1"   "ALB"     "KCNH2"   "SLC47A1" "DNMT1"  
##  [78] "METAP2"  "KRT12"   "GLUD1"   "SDHD"    "TYR"     "TLR7"    "TLR9"   
##  [85] "CYP17A1" "CYP2C8"  "CYP2E1"  "CHRNA7"  "P2RX7"   "GABRB1"  "GLRA1"  
##  [92] "GLRA2"   "GLRA3"   "GLRB"    "MAOA"    "MAOB"    "CA12"    "CA14"   
##  [99] "CA2"     "CA4"     "CA6"     "CA9"     "CCR5"    "PLA2G1B" "SLC22A6"
## [106] "CYP2A6"  "STAT3"   "CES1"    "NEU1"    "NEU2"    "TRDMT1"  "SCN1A"  
## [113] "GABRB3"  "IGF1R"   "TUBA4A"  "CCR2"    "AR"      "GP9"     "KCNB2"  
## [120] "SLC29A4" "ADK"     "ENPP1"   "IMPDH1"  "IMPDH2"  "NT5C2"   "P4HB"   
## [127] "SLCO1A2" "SLCO1B1" "SLCO1B3" "SLCO2B1" "PTPN6"   "DHPS"    "F2"     
## [134] "FSHR"    "P2RY1"   "P2RY11"  "P2RY13"  "P2RY2"   "PLA2G2A" "RYR1"   
## [141] "RYR2"    "SIRT5"   "ALPPL2"  "CHRNA3"  "OXCT1"   "TRPA1"   "TYMS"   
## [148] "ADA"     "TERT"

I used the uniprot mapping tool to map the UniProt IDs in the Surfaceome DB to gene names. In total, there are 1247 unique cell surface receptors that are identified by mass-spec and other proteomics based assays.

# cell surface receptors
csr <- read.csv("C:\\Users\\Viswa\\Downloads\\GSE148729\\csr_proteinID_geneName.txt", sep = "\t")
csr_genenames <- toupper(csr$To)
head(csr)
##     From       To
## 1 A2A699 Fam171a2
## 2 A2A863    Itgb4
## 3 A2A8L5    Ptprf
## 4 A2AFS3 Kiaa1324
## 5 A2AJN7  Slc4a11
## 6 A2AJQ3  Dpy19l4

Out of 1247 cell surface receptors, 20 receptors were classified as the drug targets for the infectious disease.

inf_dis_csrs <- unique_genes_inf_dis[unique_genes_inf_dis %in% csr_genenames]
inf_dis_csrs
##  [1] "GABBR1"  "GABBR2"  "ADRA2A"  "ATP1A1"  "CMA1"    "CTSA"    "CTSF"   
##  [8] "CTSL"    "PON1"    "GRIN1"   "DPEP1"   "KCNH2"   "P2RX7"   "CA12"   
## [15] "CA4"     "GABRB3"  "IGF1R"   "GP9"     "SLC29A4" "ENPP1"

Subsetting the approved drug metadata to focus only on these 20 cell surface receptors:

gene2drug <- function(gene, df){
  #df_subset <- subset(df, target %in% gene)
  gene <- unlist(gene)
  df_subset <- df[grep(gene, df$target),]
  df_subset <- data.frame(df_subset)
  return(df_subset)
}

geneList <- as.list(inf_dis_csrs)
df_csr_inf_dis_subset <- lapply(geneList, gene2drug, df = drug_gene_inf_dis)
cell_surface_infectious_disease_drugs <- do.call(rbind, df_csr_inf_dis_subset)
csr_inf_drugs <- unique(cell_surface_infectious_disease_drugs)
csr_inf_drugs[c(1,3,4,6)]
##                       pert_iname
## 35                     abamectin
## 330                      amitraz
## 5801                  talipexole
## 466                   artemether
## 1387                  ciclopirox
## 3516                lumefantrine
## 934                   boceprevir
## 5903                  telaprevir
## 1203                   cefazolin
## 1648             cycloserine-(D)
## 1981                   doripenem
## 2212       erythromycin-estolate
## 2213 erythromycin-ethylsuccinate
## 3150                  ivermectin
## 3599                    mafenide
## 4761                  piperazine
## 4828             podophyllotoxin
## 5028                     quinine
## 5130                   ribavirin
##                                                         moa
## 35                          benzodiazepine receptor agonist
## 330                             adrenergic receptor agonist
## 5801  adrenergic receptor agonist|dopamine receptor agonist
## 466                                      antimalarial agent
## 1387                           membrane integrity inhibitor
## 3516                                     antimalarial agent
## 934                                           HCV inhibitor
## 5903                                          HCV inhibitor
## 1203                bacterial cell wall synthesis inhibitor
## 1648                bacterial cell wall synthesis inhibitor
## 1981                bacterial cell wall synthesis inhibitor
## 2212              bacterial 50S ribosomal subunit inhibitor
## 2213  cytochrome P450 inhibitor|protein synthesis inhibitor
## 3150                        benzodiazepine receptor agonist
## 3599                           carbonic anhydrase inhibitor
## 4761                        benzodiazepine receptor agonist
## 4828 microtubule inhibitor|tubulin polymerization inhibitor
## 5028                  hemozoin biocrystallization inhibitor
## 5130                                              antiviral
##                                           target
## 35                                 GABBR1|GABBR2
## 330                                ADRA1A|ADRA2A
## 5801                           ADRA2A|DRD2|HTR3A
## 466                                       ATP1A1
## 1387                                      ATP1A1
## 3516                                      ATP1A1
## 934                CMA1|CTSA|CTSF|CTSK|CTSL|CTSS
## 5903                                    CTSA|PGR
## 1203                                        PON1
## 1648                                       GRIN1
## 1981                                       DPEP1
## 2212 ABCB1|ALB|CYP3A4|CYP51A1|KCNH2|MLNR|SLC47A1
## 2213 ABCB1|ALB|CYP3A4|CYP51A1|KCNH2|MLNR|SLC47A1
## 3150                                CHRNA7|P2RX7
## 3599                   CA12|CA14|CA2|CA4|CA6|CA9
## 4761                                      GABRB3
## 4828                     IGF1R|TOP2A|TUBA4A|TUBB
## 5028                     GP9|KCNB2|KCNN4|SLC29A4
## 5130               ADK|ENPP1|IMPDH1|IMPDH2|NT5C2
##                                                                                                                                indication
## 35                                                                                                             gastrointestinal parasites
## 330                                                                                                               generalized demodicosis
## 5801                                                                                                                      genitial herpes
## 466                                                                                                                               malaria
## 1387                                                                                                                        onychomycosis
## 3516                                                                                                                              malaria
## 934                                                                                                                           hepatitis C
## 5903                                                                                                                          hepatitis C
## 1203            urinary tract infections|skin infections|bacterial septicemia|endocarditis|surgical prophylaxis|bone and joint infections
## 1648                                                                                                            tuberculosis|tuberculosis
## 1981                                                                   intra-abdominal infections|urinary tract infections|pyelonephritis
## 2212 listeria|respiratory tract infections|skin infections|syphilis|amebiasis|pelvic inflammatory disease|chlamydia|diphtheria|erythrasma
## 2213 listeria|respiratory tract infections|skin infections|syphilis|amebiasis|pelvic inflammatory disease|chlamydia|diphtheria|erythrasma
## 3150                                                             gastrointestinal roundworms|lungworms|cattle grubs|mites|lice|horn flies
## 3599                                                                                                 first-aid antibiotic|skin infections
## 4761                                                                                                          gastrointestinal roundworms
## 4828                                                                                                                        genital warts
## 5028                                                                                                                              malaria
## 5130                                                                                                                          hepatitis C