Introduction
This vignette contains sample code showing how to use the Good
Statistical Monitoring {gsm}
suite of packages using sample
data from gsm.core. For more information on the
{gsm}
suite of packages see the package
homepage.
Setup and Installation
Run the following:
## Install devtools
install.packages('devtools')
## Install and load gsm
devtools::install_github("Gilead-BioStats/gsm.core", ref = "main")
library(gsm.core)
## Install and load gsm.mapping
devtools::install_github("Gilead-BioStats/gsm.mapping", ref = "main")
library(gsm.mapping)
## Install and load gsm.kri
devtools::install_github("Gilead-BioStats/gsm.kri", ref = "main")
library(gsm.kri)
## Install and load gsm.reporting
devtools::install_github("Gilead-BioStats/gsm.reporting", ref = "main")
library(gsm.kri)
Example 1 - Adverse Events Metric - Scripted
This example uses the standard {gsm} analysis workflows to creates site-level Adverse Event scripts. See the Data Analysis Vignette for more detail.
- Example 1.1 calculates the Site-level AE rates.
- Example 1.2 adds a filter to include only Serious Adverse Events (SAEs) and implements pipes to run through the workflow.
- Example 1.3 generates bar charts showing SAE rates and z-scores by study using gsm.kri.
- Example 1.4 generates a scatter plot with confidence bound for SAE rates using gsm.kri.
#### Example 1.1 - Generate an Adverse Event Metric using the standard {gsm.core} workflow
dfInput <- Input_Rate(
dfSubjects= gsm.core::lSource$Raw_SUBJ,
dfNumerator= gsm.core::lSource$Raw_AE,
dfDenominator = gsm.core::lSource$Raw_SUBJ,
strSubjectCol = "subjid",
strGroupCol = "invid",
strNumeratorMethod= "Count",
strDenominatorMethod= "Sum",
strDenominatorCol= "timeonstudy"
)
dfTransformed <- Transform_Rate(dfInput)
dfAnalyzed <- Analyze_NormalApprox(dfTransformed, strType = "rate")
dfFlagged <- Flag_NormalApprox(dfAnalyzed, vThreshold = c(-3,-2,2,3))
dfSummarized <- Summarize(dfFlagged)
table(dfSummarized$Flag)
#### Example 1.2 - Make an SAE Metric by adding a filter. Also works with pipes.
SAE_KRI <- Input_Rate(
dfSubjects= gsm.core::lSource$Raw_SUBJ,
dfNumerator= gsm.core::lSource$Raw_AE %>% filter(aeser=="Y"),
dfDenominator = gsm.core::lSource$Raw_SUBJ,
strSubjectCol = "subjid",
strGroupCol = "invid",
strNumeratorMethod= "Count",
strDenominatorMethod= "Sum",
strDenominatorCol= "timeonstudy"
) %>%
Transform_Rate %>%
Analyze_NormalApprox(strType = "rate") %>%
Flag_NormalApprox(vThreshold = c(-3,-2,2,3)) %>%
Summarize
table(SAE_KRI$Flag)
### Example 1.3 - Visualize Metric distribution using Bar Charts using provided htmlwidgets
library(gsm.kri)
labels <- list(
Metric= "Serious Adverse Event Rate",
Numerator= "Serious Adverse Events",
Denominator= "Days on Study"
)
gsm.kri::Widget_BarChart(dfResults = SAE_KRI, lMetric=labels, strOutcome="Metric")
gsm.kri::Widget_BarChart(dfResults = SAE_KRI, lMetric=labels, strOutcome="Score")
gsm.kri::Widget_BarChart(dfResults = SAE_KRI, lMetric=labels, strOutcome="Numerator")
### Example 1.4 - Create Scatter plot with confidence bounds
dfBounds <- Analyze_NormalApprox_PredictBounds(SAE_KRI, vThreshold = c(-3,-2,2,3))
gsm.kri::Widget_ScatterPlot(SAE_KRI, lMetric = labels, dfBounds = dfBounds)
Example 2 - Adverse Events Metrics - Workflow
This examples introduces YAML workflows to re-generate the same results as in Example 1 via a reusable pipeline. See the Data Model Vignette for more detail.
- Example 2.1 runs the AE KRI workflow.
- Example 2.2 updates the metadata to run country-level metrics.
- Example 2.3 adds a filtering step to the workflow to generate the SAE metric.
library(gsm.mapping)
library(gsm.kri)
#### Example 2.1 - Configurable Adverse Event Workflow
# Define YAML workflow
AE_workflow <- read_yaml(text=
'meta:
Type: Analysis
ID: kri0001
GroupLevel: Site
Abbreviation: AE
Metric: Adverse Event Rate
Numerator: Adverse Events
Denominator: Days on Study
Model: Normal Approximation
Score: Adjusted Z-Score
AnalysisType: rate
Threshold: -2,-1,2,3
AccrualThreshold: 30
AccrualMetric: Denominator
spec:
Mapped_AE:
subjid:
type: character
Mapped_SUBJ:
subjid:
type: character
invid:
type: character
timeonstudy:
type: integer
steps:
- output: vThreshold
name: ParseThreshold
params:
strThreshold: Threshold
- output: Analysis_Input
name: Input_Rate
params:
dfSubjects: Mapped_SUBJ
dfNumerator: Mapped_AE
dfDenominator: Mapped_SUBJ
strSubjectCol: subjid
strGroupCol: invid
strGroupLevel: GroupLevel
strNumeratorMethod: Count
strDenominatorMethod: Sum
strDenominatorCol: timeonstudy
- output: Analysis_Transformed
name: Transform_Rate
params:
dfInput: Analysis_Input
- output: Analysis_Analyzed
name: Analyze_NormalApprox
params:
dfTransformed: Analysis_Transformed
strType: AnalysisType
- output: Analysis_Flagged
name: Flag_NormalApprox
params:
dfAnalyzed: Analysis_Analyzed
vThreshold: vThreshold
nAccrualThreshold: AccrualThreshold
strAccrualMetric: AccrualMetric
- output: Analysis_Summary
name: Summarize
params:
dfFlagged: Analysis_Flagged
- output: lAnalysis
name: list
params:
ID: ID
Analysis_Input: Analysis_Input
Analysis_Transformed: Analysis_Transformed
Analysis_Analyzed: Analysis_Analyzed
Analysis_Flagged: Analysis_Flagged
Analysis_Summary: Analysis_Summary
')
# Run the workflow
lMappingWorkflows <- MakeWorkflowList(
strNames = c("AE", "SUBJ"),
strPath = "workflow/1_mappings",
strPackage = "gsm.mapping",
bExact = TRUE
)
mappings_spec <- gsm.mapping::CombineSpecs(lMappingWorkflows)
lRawData <- gsm.mapping::Ingest(gsm.core::lSource, mappings_spec)
AE_data <-list(
Mapped_SUBJ= lRawData$Raw_SUBJ,
Mapped_AE= lRawData$Raw_AE
)
AE_KRI <- RunWorkflow(lWorkflow = AE_workflow, lData = AE_data)
# Create Barchart from workflow
Widget_BarChart(dfResults = AE_KRI$Analysis_Summary)
#### Example 2.2 - Run Country-Level Metric
AE_country_workflow <- AE_workflow
AE_country_workflow$meta$GroupLevel <- "Country"
AE_country_workflow$steps[[2]]$params$strGroupCol <- "country"
AE_country_KRI <- RunWorkflow(lWorkflow = AE_country_workflow, lData = AE_data)
gsm.kri::Widget_BarChart(dfResults = AE_country_KRI$Analysis_Summary, lMetric = AE_country_workflow$meta)
#### Example 2.3 - Create SAE workflow
# Tweak AE workflow metadata
SAE_workflow <- AE_workflow
SAE_workflow$meta$File <- "SAE_KRI"
SAE_workflow$meta$Metric <- "Serious Adverse Event Rate"
SAE_workflow$meta$Numerator <- "Serious Adverse Events"
# Add a step to filter out non-serious AEs `RunQuery`
filterStep <- list(list(
name = "RunQuery",
output = "Mapped_AE",
params= list(
df= "Mapped_AE",
strQuery = "SELECT * FROM df WHERE aeser = 'Y'"
))
)
SAE_workflow$steps <- SAE_workflow$steps %>% append(filterStep, after=0)
# Run the updated workflow
SAE_KRI <- RunWorkflow(lWorkflow = SAE_workflow, lData = AE_data )
gsm.kri::Widget_BarChart(dfResults = SAE_KRI$Analysis_Summary, lMetric = SAE_workflow$meta)
Example 3 - Study-Level Reporting Workflows
This example extends the previous examples to generate charts and reports for multiple KRIs. See the Data Reporting Vignette for more detail.
- Example 3.1 steps through several workflows to generate a report for all 12 standard site-level KRIs.
-
Example 3.2 automates data ingestion using
gsm.mapping::Ingest()
andgsm.mapping::CombineSpecs()
. -
Example 3.3 generates a report using
gsm.kri incorporating multiple timepoints using the
sample
reporting
data saved as part of gsm.core.
library(gsm.core)
library(gsm.mapping)
library(gsm.kri)
library(gsm.reporting)
library(dplyr)
#### 3.1 - Create a KRI Report using 12 standard metrics in a step-by-step workflow
core_mappings <- c("AE", "COUNTRY", "DATACHG", "DATAENT", "ENROLL", "LB", "PK",
"PD", "QUERY", "STUDY", "STUDCOMP", "SDRGCOMP", "SITE", "SUBJ")
# Step 0 - Create Raw Data from Source Data
lRaw <- list(
Raw_SUBJ = gsm.core::lSource$Raw_SUBJ,
Raw_AE = gsm.core::lSource$Raw_AE,
Raw_PD = gsm.core::lSource$Raw_PD %>%
rename(subjid = subjectenrollmentnumber),
Raw_PK = gsm.core::lSource$Raw_PK %>%
rename(visit = foldername),
Raw_LB = gsm.core::lSource$Raw_LB,
Raw_STUDCOMP = gsm.core::lSource$Raw_STUDCOMP %>%
select(subjid, compyn),
Raw_SDRGCOMP = gsm.core::lSource$Raw_SDRGCOMP,
Raw_DATACHG = gsm.core::lSource$Raw_DATACHG %>%
rename(subject_nsv = subjectname),
Raw_DATAENT = gsm.core::lSource$Raw_DATAENT %>%
rename(subject_nsv = subjectname),
Raw_QUERY = gsm.core::lSource$Raw_QUERY %>%
rename(subject_nsv = subjectname),
Raw_ENROLL = gsm.core::lSource$Raw_ENROLL,
Raw_SITE = gsm.core::lSource$Raw_SITE %>%
rename(studyid = protocol) %>%
rename(invid = pi_number) %>%
rename(InvestigatorFirstName = pi_first_name) %>%
rename(InvestigatorLastName = pi_last_name) %>%
rename(City = city) %>%
rename(State = state) %>%
rename(Country = country) %>%
rename(Status = site_status),
Raw_STUDY = gsm.core::lSource$Raw_STUDY %>%
rename(studyid = protocol_number) %>%
rename(Status = status),
Raw_VISIT = gsm.core::lSource$Raw_VISIT %>%
mutate(visit_folder = foldername) %>%
rename(visit = foldername)
)
# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer
mappings_wf <- gsm.core::MakeWorkflowList(strNames = core_mappings, strPath = "workflow/1_mappings", strPackage = "gsm.mapping")
mapped <- gsm.core::RunWorkflows(mappings_wf, lRaw)
# Step 2 - Create Metrics - calculate metrics using mapped data
metrics_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/2_metrics", strPackage = "gsm.kri")
analyzed <- gsm.core::RunWorkflows(metrics_wf, mapped)
# Step 3 - Create Reporting Layer - create reports using metrics data
reporting_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/3_reporting", strPackage = "gsm.reporting")
reporting <- gsm.core::RunWorkflows(reporting_wf, c(mapped, list(lAnalyzed = analyzed,
lWorkflows = metrics_wf)))
# Step 4 - Create KRI Reports - create KRI report using reporting data
module_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/4_modules", strPackage = "gsm.kri")
lReports <- gsm.core::RunWorkflows(module_wf, reporting)
#### 3.2 - Automate data ingestion using Ingest() and CombineSpecs()
# Step 0 - Data Ingestion - standardize tables/columns names
mappings_wf <- gsm.core::MakeWorkflowList(strNames = core_mappings, strPath = "workflow/1_mappings", strPackage = "gsm.mapping")
mappings_spec <- gsm.mapping::CombineSpecs(mappings_wf)
lRaw <- gsm.mapping::Ingest(gsm.core::lSource, mappings_spec)
# Step 1 - Create Mapped Data Layer - filter, aggregate and join raw data to create mapped data layer
mapped <- gsm.core::RunWorkflows(mappings_wf, lRaw)
# Step 2 - Create Metrics - calculate metrics using mapped data
metrics_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/2_metrics", strPackage = "gsm.kri")
analyzed <- gsm.core::RunWorkflows(metrics_wf, mapped)
# Step 3 - Create Reporting Layer - create reports using metrics data
reporting_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/3_reporting", strPackage = "gsm.reporting")
reporting <- gsm.core::RunWorkflows(reporting_wf, c(mapped, list(lAnalyzed = analyzed,
lWorkflows = metrics_wf)))
# Step 4 - Create KRI Report - create KRI report using reporting data
module_wf <- gsm.core::MakeWorkflowList(strPath = "workflow/4_modules", strPackage = "gsm.kri")
lReports <- gsm.core::RunWorkflows(module_wf, reporting)
#### 3.3 Site-Level KRI Report with multiple SnapshotDate
# Below relies on the clindata stuff, do we need to rerun/rewrite reporting datasets?
lCharts <- gsm.kri::MakeCharts(
dfResults = gsm.core::reportingResults,
dfGroups = gsm.core::reportingGroups,
dfMetrics = gsm.core::reportingMetrics,
dfBounds = gsm.core::reportingBounds
)
kri_report_path <- gsm.kri::Report_KRI(
lCharts = lCharts,
dfResults = gsm.kri::FilterByLatestSnapshotDate(reportingResults),
dfGroups = gsm.core::reportingGroups,
dfMetrics = gsm.core::reportingMetrics
)