rsabsence.R

## ----"setup", include = FALSE-------------------------------------------------
## knitr options
library(knitr)
opts_chunk$set(fig.height = 4,
               echo = FALSE,
               warning = FALSE,
               message = FALSE,
               cache = FALSE,
               eval = TRUE)

## should sessionInfo be printed at the end?
Reproducibility <- TRUE

## packages
library(ggplot2) # plotting
library(gridExtra) # combining ggplots
library(dplyr) # data manipulation
library(reporttools) # reporting of p-values

## not show scientific notation for small numbers
options("scipen" = 10)

## the replication Bayes factor under normality
BFr <- function(to, tr, so, sr) {
    bf <- dnorm(x = tr, mean = 0, sd = so) /
        dnorm(x = tr, mean = to, sd = sqrt(so^2 + sr^2))
    return(bf)
}
## function to format Bayes factors
formatBF. <- function(BF) {
    if (is.na(BF)) {
        BFform <- NA
    } else if (BF > 1) {
        if (BF > 1000) {
            BFform <- "> 1000"
        } else {
            BFform <- as.character(signif(BF, 2))
        }
    } else {
        if (BF < 1/1000) {
            BFform <- "< 1/1000"
        } else {
            BFform <- paste0("1/", signif(1/BF, 2))
        }
    }
    if (!is.na(BFform) && BFform == "1/1") {
        return("1")
    } else {
        return(BFform)
    }
}
formatBF <- Vectorize(FUN = formatBF.)

## Bayes factor under normality with unit-information prior under alternative
BF01 <- function(estimate, se, null = 0, unitvar = 4) {
    bf <- dnorm(x = estimate, mean = null, sd = se) /
        dnorm(x = estimate, mean = null, sd = sqrt(se^2 + unitvar))
    return(bf)
}


## ----"data"-------------------------------------------------------------------
## data
rpcbRaw <- read.csv(file = "../data/rpcb-effect-level.csv")
rpcb <- rpcbRaw %>%
    mutate(
        ## recompute one-sided p-values based on normality
        ## (in direction of original effect estimate)
        zo = smdo/so,
        zr = smdr/sr,
        po1 = pnorm(q = abs(zo), lower.tail = FALSE),
        pr1 = pnorm(q = abs(zr), lower.tail = ifelse(sign(zo) < 0, TRUE, FALSE)),
        ## compute some other quantities
        c = so^2/sr^2, # variance ratio
        d = smdr/smdo, # relative effect size
        po2 = 2*(1 - pnorm(q = abs(zo))), # two-sided original p-value
        pr2 = 2*(1 - pnorm(q = abs(zr))), # two-sided replication p-value
        sm = 1/sqrt(1/so^2 + 1/sr^2), # standard error of fixed effect estimate
        smdm = (smdo/so^2 + smdr/sr^2)*sm^2, # fixed effect estimate
        pm2 = 2*(1 - pnorm(q = abs(smdm/sm))), # two-sided fixed effect p-value
        Q = (smdo - smdr)^2/(so^2 + sr^2), # Q-statistic
        pQ = pchisq(q = Q, df = 1, lower.tail = FALSE), # p-value from Q-test
        BForig = BF01(estimate = smdo, se = so), # unit-information BF for original
        BForigformat = formatBF(BF = BForig),
        BFrep = BF01(estimate = smdr, se = sr), # unit-information BF for replication
        BFrepformat = formatBF(BF = BFrep)
    )

rpcbNull <- rpcb %>%
    filter(resulto == "Null")

## 2 examples
study1 <- "(20, 1, 1)" # evidence of absence
study2 <- "(29, 2, 2)" # absence of evidence
plotDF1 <- rpcbNull %>%
    filter(id %in% c(study1, study2)) %>%
    mutate(label = ifelse(id == study1,
                          "Goetz et al. (2011)\nEvidence of absence",
                          "Dawson et al. (2011)\nAbsence of evidence"))
conflevel <- 0.95


## ----"2-example-studies", fig.height = 3--------------------------------------
## create plot showing two example study pairs with null results
ggplot(data = plotDF1) +
    facet_wrap(~ label) +
    geom_hline(yintercept = 0, lty = 2, alpha = 0.3) +
    geom_pointrange(aes(x = "Original", y = smdo,
                        ymin = smdo - qnorm(p = (1 + conflevel)/2)*so,
                        ymax = smdo + qnorm(p = (1 + conflevel)/2)*so), fatten = 3) +
    geom_pointrange(aes(x = "Replication", y = smdr,
                        ymin = smdr - qnorm(p = (1 + conflevel)/2)*sr,
                        ymax = smdr + qnorm(p = (1 + conflevel)/2)*sr), fatten = 3) +
    geom_text(aes(x = 1.05, y = 2.5,
                  label = paste("italic(n) ==", no)), col = "darkblue",
              parse = TRUE, size = 3.8, hjust = 0) +
    geom_text(aes(x = 2.05, y = 2.5,
                  label = paste("italic(n) ==", nr)), col = "darkblue",
              parse = TRUE, size = 3.8, hjust = 0) +
    geom_text(aes(x = 1.05, y = 3,
                  label = paste("italic(p) ==", formatPval(po))), col = "darkblue",
              parse = TRUE, size = 3.8, hjust = 0) +
    geom_text(aes(x = 2.05, y = 3,
                  label = paste("italic(p) ==", formatPval(pr))), col = "darkblue",
              parse = TRUE, size = 3.8, hjust = 0) +
    labs(x = "", y = "Standardized mean difference") +
    theme_bw() +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major.x = element_blank(),
          strip.text = element_text(size = 12, margin = margin(4), vjust = 1.5),
          strip.background = element_rect(fill = alpha("tan", 0.4)),
          axis.text = element_text(size = 12))


## ----"plot-null-findings-rpcb", fig.height = 8.25, fig.width = "0.95\\linewidth"----
## compute TOST p-values
## Wellek (2010): strict - 0.36 # liberal - .74
# Cohen: small - 0.3 # medium - 0.5 # large - 0.8
## 80-125% convention for AUC and Cmax FDA/EMA
## 1.3 for oncology OR/HR -> log(1.3)*sqrt(3)/pi = 0.1446
margin <- 0.74
conflevel <- 0.9
rpcbNull$ptosto <- with(rpcbNull, pmax(pnorm(q = smdo, mean = margin, sd = so,
                                             lower.tail = TRUE),
                                       pnorm(q = smdo, mean = -margin, sd = so,
                                             lower.tail = FALSE)))
rpcbNull$ptostr <- with(rpcbNull, pmax(pnorm(q = smdr, mean = margin, sd = sr,
                                             lower.tail = TRUE),
                                       pnorm(q = smdr, mean = -margin, sd = sr,
                                             lower.tail = FALSE)))
## highlight the studies from Goetz and Dawson
ex1 <- "(20, 1, 1)"
ind1 <- which(rpcbNull$id == ex1)
ex2 <- "(29, 2, 2)"
ind2 <- which(rpcbNull$id == ex2)
rpcbNull$id <- ifelse(rpcbNull$id == ex1,
                      "(20, 1, 1) - Goetz et al. (2011)", rpcbNull$id)
rpcbNull$id <- ifelse(rpcbNull$id == ex2,
                      "(29, 2, 2) - Dawson et al. (2011)", rpcbNull$id)

## create plots of all study pairs with null results in original study
ggplot(data = rpcbNull) +
    ## order in ascending original paper order and label with id variable
    facet_wrap(~ paper + experiment + effect + id,
               labeller = label_bquote(.(id)), scales = "free", ncol = 3) +
    geom_hline(yintercept = 0, lty = 2, alpha = 0.25) +
    ## equivalence margin
    geom_hline(yintercept = c(-margin, margin), lty = 3, col = 2, alpha = 0.9) +
    ## ## also show the 95% CIs
    ## geom_linerange(aes(x = "Original", y = smdo,
    ##                   ymin = smdo - qnorm(p = (1 + 0.95)/2)*so,
    ##                   ymax = smdo + qnorm(p = (1 + 0.95)/2)*so), size = 0.2, alpha = 0.6) +
    ## geom_linerange(aes(x = "Replication", y = smdr,
    ##                   ymin = smdr - qnorm(p = (1 + 0.95)/2)*sr,
    ##                   ymax = smdr + qnorm(p = (1 + 0.95)/2)*sr), size = 0.2, alpha = 0.6) +
    ## 90% CIs
    geom_pointrange(aes(x = "Original", y = smdo,
                        ymin = smdo - qnorm(p = (1 + conflevel)/2)*so,
                        ymax = smdo + qnorm(p = (1 + conflevel)/2)*so),
                    size = 0.5, fatten = 1.5) +
    geom_pointrange(aes(x = "Replication", y = smdr,
                        ymin = smdr - qnorm(p = (1 + conflevel)/2)*sr,
                        ymax = smdr + qnorm(p = (1 + conflevel)/2)*sr),
                    size = 0.5, fatten = 1.5) +
    annotate(geom = "ribbon", x = seq(0, 3, 0.01), ymin = -margin, ymax = margin,
             alpha = 0.05, fill = 2) +
    labs(x = "", y = "Standardized mean difference") +
    geom_text(aes(x = 1.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("italic(n) ==", no)), col = "darkblue",
              parse = TRUE, size = 2.3, hjust = 0, vjust = 2) +
    geom_text(aes(x = 2.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("italic(n) ==", nr)), col = "darkblue",
              parse = TRUE, size = 2.3, hjust = 0, vjust = 2) +
    geom_text(aes(x = 1.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("italic(p)",
                                ifelse(po < 0.0001, "", "=="),
                                formatPval(po))), col = "darkblue",
              parse = TRUE, size = 2.3, hjust = 0) +
    geom_text(aes(x = 2.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("italic(p)",
                                ifelse(pr < 0.0001, "", "=="),
                                formatPval(pr))), col = "darkblue",
              parse = TRUE, size = 2.3, hjust = 0) +
    geom_text(aes(x = 1.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("italic(p)['TOST']",
                                ifelse(ptosto < 0.0001, "", "=="),
                                formatPval(ptosto))),
              col = "darkblue", parse = TRUE, size = 2.3, hjust = 0, vjust = 3) +
    geom_text(aes(x = 2.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("italic(p)['TOST']",
                                ifelse(ptostr < 0.0001, "", "=="),
                                formatPval(ptostr))),
              col = "darkblue", parse = TRUE, size = 2.3, hjust = 0, vjust = 3) +
    geom_text(aes(x = 1.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("BF['01']", ifelse(BForig <= 1/1000, "", "=="),
                                BForigformat)), col = "darkblue", parse = TRUE,
              size = 2.3, hjust = 0, vjust = 4.5) +
    geom_text(aes(x = 2.05, y = pmax(smdo + 2.5*so, smdr + 2.5*sr, 1.1*margin),
                  label = paste("BF['01']", ifelse(BFrep <= 1/1000, "", "=="),
                                BFrepformat)), col = "darkblue", parse = TRUE,
              size = 2.3, hjust = 0, vjust = 4.5) +
    coord_cartesian(x = c(1.1, 2.4)) +
    theme_bw() +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank(),
          strip.text = element_text(size = 8, margin = margin(3), vjust = 2),
          strip.background = element_rect(fill = alpha("tan", 0.4)),
          axis.text = element_text(size = 8))


## ----"successes-RPCB"---------------------------------------------------------
ntotal <- nrow(rpcbNull)

## successes non-significance criterion
nullSuccesses <- sum(rpcbNull$po > 0.05 & rpcbNull$pr > 0.05)

## success equivalence testing criterion
equivalenceSuccesses <- sum(rpcbNull$ptosto <= 0.05 & rpcbNull$ptostr <= 0.05)
ptosto1 <- rpcbNull$ptosto[ind1]
ptostr1 <- rpcbNull$ptostr[ind1]
ptosto2 <- rpcbNull$ptosto[ind2]
ptostr2 <- rpcbNull$ptostr[ind2]

## success BF criterion
bfSuccesses <- sum(rpcbNull$BForig > 3 & rpcbNull$BFrep > 3)
BForig1 <- rpcbNull$BForig[ind1]
BFrep1 <- rpcbNull$BFrep[ind1]
BForig2 <- rpcbNull$BForig[ind2]
BFrep2 <- rpcbNull$BFrep[ind2]


## ----"sensitivity", fig.height = 6.5------------------------------------------
## compute number of successful replications as a function of the equivalence margin
marginseq <- seq(0.01, 4.5, 0.01)
alphaseq <- c(0.01, 0.05, 0.1)
sensitivityGrid <- expand.grid(m = marginseq, a = alphaseq)
equivalenceDF <- lapply(X = seq(1, nrow(sensitivityGrid)), FUN = function(i) {
    m <- sensitivityGrid$m[i]
    a <- sensitivityGrid$a[i]
    rpcbNull$ptosto <- with(rpcbNull, pmax(pnorm(q = smdo, mean = m, sd = so,
                                                 lower.tail = TRUE),
                                           pnorm(q = smdo, mean = -m, sd = so,
                                                 lower.tail = FALSE)))
    rpcbNull$ptostr <- with(rpcbNull, pmax(pnorm(q = smdr, mean = m, sd = sr,
                                                 lower.tail = TRUE),
                                           pnorm(q = smdr, mean = -m, sd = sr,
                                                 lower.tail = FALSE)))
    successes <- sum(rpcbNull$ptosto <= a & rpcbNull$ptostr <= a)
    data.frame(margin = m, alpha = a,
               successes = successes, proportion = successes/nrow(rpcbNull))
}) %>%
    bind_rows()

## plot number of successes as a function of margin
nmax <- nrow(rpcbNull)
bks <- c(0, 3, 6, 9, 11, 15)
labs <- paste0(bks, " (", round(bks/nmax*100, 0), "%)")
rpcbSuccesses <- 11
marbks <- c(0, margin, 1, 2, 3, 4)
plotA <- ggplot(data = equivalenceDF,
                aes(x = margin, y = successes,
                    color = factor(alpha, ordered = TRUE, levels = rev(alphaseq)))) +
    facet_wrap(~ 'italic("p")["TOST"] <= alpha ~ "in original and replication study"',
               labeller = label_parsed) +
    geom_vline(xintercept = margin, lty = 3, alpha = 0.4) +
    annotate(geom = "segment", x = margin + 0.25, xend = margin + 0.01, y = 2, yend = 2,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = margin + 0.28, y = 2, color = "darkgrey",
             label = "margin used in main analysis",
             size = 3, alpha = 0.9, hjust = 0) +
    geom_hline(yintercept = rpcbSuccesses, lty = 2, alpha = 0.4) +
    annotate(geom = "segment", x = 0.1, xend = 0.1, y = 13, yend = 11.2,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = -0.04, y = 13.5, color = "darkgrey",
             label = "non-significance criterion successes",
             size = 3, alpha = 0.9, hjust = 0) +
    geom_step(alpha = 0.8, linewidth = 0.8) +
    scale_y_continuous(breaks = bks, labels = labs) +
    scale_x_continuous(breaks = marbks) +
    coord_cartesian(xlim = c(0, max(equivalenceDF$margin))) +
    labs(x = bquote("Equivalence margin" ~ Delta),
         y = "Successful replications",
         color = bquote("threshold" ~ alpha)) +
    theme_bw() +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank(),
          strip.background = element_rect(fill = alpha("tan", 0.4)),
          strip.text = element_text(size = 12),
          legend.position = c(0.85, 0.25),
          plot.background = element_rect(fill = "transparent", color = NA),
          legend.box.background = element_rect(fill = "transparent", colour = NA))

## compute number of successful replications as a function of the prior scale
priorsdseq <- seq(0, 40, 0.1)
bfThreshseq <- c(3, 6, 10)
sensitivityGrid2 <- expand.grid(s = priorsdseq, thresh = bfThreshseq)
bfDF <- lapply(X = seq(1, nrow(sensitivityGrid2)), FUN = function(i) {
    priorsd <- sensitivityGrid2$s[i]
    thresh <- sensitivityGrid2$thresh[i]
    rpcbNull$BForig <- with(rpcbNull, BF01(estimate = smdo, se = so, unitvar = priorsd^2))
    rpcbNull$BFrep <- with(rpcbNull, BF01(estimate = smdr, se = sr, unitvar = priorsd^2))
    successes <- sum(rpcbNull$BForig >= thresh & rpcbNull$BFrep >= thresh)
    data.frame(priorsd = priorsd, thresh = thresh,
               successes = successes, proportion = successes/nrow(rpcbNull))
}) %>%
    bind_rows()

## plot number of successes as a function of prior sd
priorbks <- c(0, 2, 10, 20, 30, 40)
plotB <- ggplot(data = bfDF,
                aes(x = priorsd, y = successes, color = factor(thresh, ordered = TRUE))) +
    facet_wrap(~ '"BF"["01"] >= gamma ~ "in original and replication study"',
               labeller = label_parsed) +
    geom_vline(xintercept = 2, lty = 3, alpha = 0.4) +
    geom_hline(yintercept = rpcbSuccesses, lty = 2, alpha = 0.4) +
    annotate(geom = "segment", x = 7, xend = 2 + 0.2, y = 0.5, yend = 0.5,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = 7.5, y = 0.5, color = "darkgrey",
             label = "standard deviation used in main analysis",
             size = 3, alpha = 0.9, hjust = 0) +
    annotate(geom = "segment", x = 0.5, xend = 0.5, y = 13, yend = 11.2,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = 0.05, y = 13.5, color = "darkgrey",
             label = "non-significance criterion successes",
             size = 3, alpha = 0.9, hjust = 0) +
    geom_step(alpha = 0.8, linewidth = 0.8) +
    scale_y_continuous(breaks = bks, labels = labs, limits = c(0, nmax)) +
    scale_x_continuous(breaks = priorbks) +
    coord_cartesian(xlim = c(0, max(bfDF$priorsd))) +
    labs(x = "Prior standard deviation",
         y = "Successful replications ",
         color = bquote("threshold" ~ gamma)) +
    theme_bw() +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank(),
          strip.background = element_rect(fill = alpha("tan", 0.4)),
          strip.text = element_text(size = 12),
          legend.position = c(0.85, 0.25),
          plot.background = element_rect(fill = "transparent", color = NA),
          legend.box.background = element_rect(fill = "transparent", colour = NA))

grid.arrange(plotA, plotB, ncol = 1)


## ----"interesting-study"------------------------------------------------------
studyInteresting <- filter(rpcbNull, id == "(48, 2, 4)")
noInteresting <- studyInteresting$no
nrInteresting <- studyInteresting$nr


## ----"sessionInfo2", echo = Reproducibility, results = Reproducibility--------
cat(paste(Sys.time(), Sys.timezone(), "\n"))
sessionInfo()