---
title: "2. The Sertotype Database API: Search Examples"
author: "William Lane MD, PhD, A(ACHI)"
format: 
  html:
    code-fold: false
    toc: true
    theme: cosmo
execute:
  warning: false
  message: false
---

# Various Query Examples

In this document, we’ll demonstrate several ways to query the Serotype Database `searchData` endpoint:

1.  Returning all data fields for a specific allele.
2.  Getting only certain fields (locus, allele, serotype, antigen, bw4_bw6).
3.  Searching by serotype, antigen, and Bw4/Bw6.
4.  Changing resolution from `two_field` to `full_field`.
5.  Using partial allele name matches (`alleleExactMatch = false`).

## 2.1 Setup Packages

```{r}
#| label: setup-packages

# Clear everything
rm(list = ls())

# Install required packages if not already installed
options(repos = c(CRAN = "https://cloud.r-project.org"))
if (!requireNamespace("httr", quietly = TRUE)) install.packages("httr")
if (!requireNamespace("jsonlite", quietly = TRUE)) install.packages("jsonlite")
if (!requireNamespace("conflicted", quietly = TRUE)) install.packages("conflicted")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("knitr", quietly = TRUE)) install.packages("knitr")
if (!requireNamespace("dotenv", quietly = TRUE)) install.packages("dotenv")

# Load packages
library(httr)
library(jsonlite)
library(conflicted)
library(dplyr)
library(knitr)
library(kableExtra)
library(dotenv)

# Load environment variables
load_dot_env()

# Resolve conflicts
conflict_prefer("filter", "dplyr")
```

## 2.2 Set API Key

To query the Serotype Database API, you will need an API Key, which are available for free by signing up for an account at <https://www.serotype.org/user>.

If you are familiar with how to set R environment variables, you can save the key to your .env file as `SEROTYPE_API_KEY=YOUR_API_KEY`, replacing `YOUR_API_KEY` with your actual API key. This is considered best practice because it keeps your private API key separate from your code, enhancing security and making your code easier to share or collaborate on without exposing sensitive information.

Alternatively, you can set the value directly in the code block below by assigning your API key to `apiKeyOverride`. However, be cautious: if you choose to embed your API key in the code, ensure you remove it before sharing the file, as each user must use their own unique API key for security and proper functionality.

```{r}
# Check for the API key in environment variables
apiKey <- Sys.getenv("SEROTYPE_API_KEY", unset = NA)

# Allow manual override of the API key by user here
apiKeyOverride <- ""  # Set this to your API manually if not using environment variables

# Use the override if provided, otherwise use the environment variable value
if (!is.null(apiKeyOverride) && nzchar(apiKeyOverride)) {
  apiKey <- apiKeyOverride
}
```

## 2.3 Allele: `A*02:01` returning all data fields

```{r}
#| label: allele-all-fields
url <- "https://serotype.org/api/graphql"

query_allele_all_fields <- '
query {
  alleleToSerotype(
    alleles: ["A*02:01"]
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    broadAntigen
    bw4_bw6
    ciwd3
    cwd2
    eurcwd
  }
}
'

resp_all_fields <- POST(
  url,
  body = list(query = query_allele_all_fields),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
data_all_fields <- fromJSON(content(resp_all_fields, "text"), flatten = TRUE)
df_all_fields <- data_all_fields$data$alleleToSerotype
kable(df_all_fields, caption = "All fields returned for A*02:01")
```

## 2.4 Just specific fields: `locus`, `allele`, `score`, `serotype`

```{r}
#| label: allele-few-fields
query_few_fields <- '
query {
  alleleToSerotype(
    alleles: ["A*02:01"]
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
  }
}
'

resp_few_fields <- POST(
  url,
  body = list(query = query_few_fields),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
data_few_fields <- fromJSON(content(resp_few_fields, "text"), flatten = TRUE)
df_few_fields <- data_few_fields$data$alleleToSerotype
kable(df_few_fields, caption = "Subset of fields for A*02:01")
```

## 2.5 Searching by Serotype

-   **Single serotype**: `A0201`\
-   **Multiple serotypes**: `A0201`, `A0202`

```{r}
#| label: search-serotype
query_serotype <- '
query {
  alleleToSerotype(
    serotypes: ["A0201"]
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    broadAntigen
    bw4_bw6
  }
}
'
resp_serotype <- POST(
  url,
  body = list(query = query_serotype),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
df_serotype <- fromJSON(content(resp_serotype, "text"), flatten = TRUE)$data$alleleToSerotype
kable(df_serotype, "html", caption = "All data for serotype A0201") %>% kable_styling() %>% scroll_box(height = "300px")

# Multiple serotypes
query_serotype_multi <- '
query {
  alleleToSerotype(
    serotypes: ["A0201", "A0202"]
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    broadAntigen
    bw4_bw6
  }
}
'
resp_serotype_multi <- POST(url, body = list(query = query_serotype_multi), encode = "json", add_headers(`x-api-key` = apiKey))
df_serotype_multi <- fromJSON(content(resp_serotype_multi, "text"), flatten = TRUE)$data$alleleToSerotype
kable(df_serotype_multi, "html", caption = "All data for serotypes A0201 and A0202") %>% kable_styling() %>% scroll_box(height = "300px")
```

## 2.6 Searching by Antigen

**Example**: `antigens: ["A2"]`

```{r}
#| label: search-antigen
query_antigen <- '
query {
  alleleToSerotype(
    antigens: ["A2"]
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    broadAntigen
    bw4_bw6
  }
}
'
resp_antigen <- POST(
  url,
  body = list(query = query_antigen),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
df_antigen <- fromJSON(content(resp_antigen, "text"), flatten = TRUE)$data$alleleToSerotype
kable(df_antigen, "html", caption = "All data for antigen A2") %>% kable_styling() %>% scroll_box(height = "300px")
```

## 2.7 Searching by Bw4/Bw6

-   **Bw4** only\
-   Then filter results by **locus A** and **locus B**

```{r}
#| label: search-bw4
query_bw4 <- '
query {
  alleleToSerotype(
    bw4_bw6: ["Bw4"]
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    broadAntigen
    bw4_bw6
  }
}
'
resp_bw4 <- POST(
  url,
  body = list(query = query_bw4),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
df_bw4 <- fromJSON(content(resp_bw4, "text"), flatten = TRUE)$data$alleleToSerotype

df_bw4_A <- df_bw4 %>% filter(locus == "A")
df_bw4_B <- df_bw4 %>% filter(locus == "B")

cat("Total Bw4 across all loci:", nrow(df_bw4), "\n")
cat("Bw4 at locus A:", nrow(df_bw4_A), "\n")
cat("Bw4 at locus B:", nrow(df_bw4_B), "\n")
```

## 2.8 Partial allele name search: `A*24:172`

At `two_field` vs `full_field`, with `alleleExactMatch = false`

```{r}
#| label: partial-two_field
query_partial_two_field <- '
query {
  alleleToSerotype(
    alleles: ["A*24:172"]
    alleleExactMatch: false
    resolution: two_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    bw4_bw6
  }
}
'
resp_partial_two_field <- POST(
  url,
  body = list(query = query_partial_two_field),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
df_partial_two_field <- fromJSON(content(resp_partial_two_field, "text"), flatten = TRUE)$data$alleleToSerotype
kable(df_partial_two_field, caption = "Two-field resolution, partial search for A*24:172")

# full_field resolution, partial match
query_partial_full_field <- '
query {
  alleleToSerotype(
    alleles: ["A*24:172"]
    alleleExactMatch: false
    resolution: full_field
  ) {
    locus
    allele
    score
    serotype
    antigen
    bw4_bw6
  }
}
'
resp_partial_full_field <- POST(
  url,
  body = list(query = query_partial_full_field),
  encode = "json",
  add_headers(`x-api-key` = apiKey)
)
resp_partial_full_field
df_partial_full_field <- fromJSON(content(resp_partial_full_field, "text"), flatten = TRUE)$data$alleleToSerotype
kable(df_partial_full_field, caption = "full_field resolution, partial search for A*24:172")
```
