rsabsence.Rnw


\subsection*{Sensitivity analysis}

As discussed before, the post-hoc specification of equivalence margins and the
choice of the prior distribution for the SMD under the alternative $H_{1}$ is 
debatable and controversial. Commonly used margins in clinical research are much
more stringent; for instance, in oncology, a margin of $\Delta = \log(1.3)$ is 
commonly used for log odds/hazard ratios, whereas in bioequivalence studies a
margin of \mbox{$\Delta = \log(1.25) % = \Sexpr{round(log(1.25), 2)}
  $} is the convention \citep[chapter 22]{Senn2008}. These margins would
translate into margins of $\Delta
= % \log(1.3)\sqrt{3}/\pi =
\Sexpr{round(log(1.3)*sqrt(3)/pi, 2)}$ and $\Delta = % \log(1.25)\sqrt{3}/\pi =
\Sexpr{round(log(1.25)*sqrt(3)/pi, 2)}$ on the SMD scale, respectively, using
the $\text{SMD} = (\surd{3} / \pi) \log\text{OR}$ conversion \citep[p.
233]{Cooper2019}. As for the Bayesian hypothesis testing we specified a normal
unit-information prior under the alternative while other normal priors with
smaller/larger standard deviations could have been considered.
In the sensitivity analysis in the top plot of Figure~\ref{fig:sensitivity} we 
show the number of successful replications as a function of the margin $\Delta$ 
and for different TOST \textit{p}-value thresholds. Such an ``equivalence 
curve'' approach was first proposed by \citet{Hauck1986}. We see that for
realistic margins between $0$ and $1$, the proportion of replication successes 
remains below $50\%$ for the conventional $\alpha = 0.05$ level. To achieve a 
success rate of $11/15 = \Sexpr{round(11/15*100, 0)}\%$, as was achieved with 
the non-significance criterion from the RPCB, unrealistic margins of $\Delta > 
2$ are required, highlighting the paucity of evidence provided by these studies.
Changing the success criterion to a more lenient level ($\alpha = 0.1$) or a
more stringent level ($\alpha = 0.01$) hardly changes this conclusion.

In the bottom plot of Figure~\ref{fig:sensitivity} a sensitivity analysis is 
reported with respect to the choice of the prior standard deviation and the 
Bayes factor threshold. It is uncommon to specify prior standard
deviations larger than the unit-information standard deviation of $2$, as this
corresponds to the assumption of very large effect sizes under the alternatives.
However, to achieve replication success for a larger proportion of replications
than the observed $\Sexpr{bfSuccesses}/\Sexpr{ntotal} = 
\Sexpr{round(bfSuccesses/ntotal*100, 0)}\%$, unreasonably large prior standard
deviations have to be specified. For instance, a standard deviation of roughly
$5$ is required to achieve replication success in $50\%$ of the replications at 
a lenient Bayes factor threshold of $\gamma = 3$. The standard deviation needs 
to be almost $20$ so that the same success rate $11/15 = \Sexpr{round(11/15*100,
0)}\%$ as with the non-significance criterion is achieved. The necessary 
standard deviations are even higher for stricter Bayes factor threshold,
such as $\gamma = 6$ or $\gamma = 10$.

\begin{figure}[!htb]
<< "sensitivity", fig.height = 6.5 >>=
## compute number of successful replications as a function of the equivalence margin
marginseq <- seq(0.01, 4.5, 0.01)
alphaseq <- c(0.01, 0.05, 0.1)
sensitivityGrid <- expand.grid(m = marginseq, a = alphaseq)
equivalenceDF <- lapply(X = seq(1, nrow(sensitivityGrid)), FUN = function(i) {
    m <- sensitivityGrid$m[i]
    a <- sensitivityGrid$a[i]
    rpcbNull$ptosto <- with(rpcbNull, pmax(pnorm(q = smdo, mean = m, sd = so,
                                                 lower.tail = TRUE),
                                           pnorm(q = smdo, mean = -m, sd = so,
                                                 lower.tail = FALSE)))
    rpcbNull$ptostr <- with(rpcbNull, pmax(pnorm(q = smdr, mean = m, sd = sr,
                                                 lower.tail = TRUE),
                                           pnorm(q = smdr, mean = -m, sd = sr,
                                                 lower.tail = FALSE)))
    successes <- sum(rpcbNull$ptosto <= a & rpcbNull$ptostr <= a)
    data.frame(margin = m, alpha = a,
               successes = successes, proportion = successes/nrow(rpcbNull))
}) %>%
    bind_rows()

## plot number of successes as a function of margin
nmax <- nrow(rpcbNull)
bks <- c(0, 3, 6, 9, 11, 15)
labs <- paste0(bks, " (", round(bks/nmax*100, 0), "%)")
rpcbSuccesses <- 11
marbks <- c(0, margin, 1, 2, 3, 4)
plotA <- ggplot(data = equivalenceDF,
                aes(x = margin, y = successes,
                    color = factor(alpha, ordered = TRUE, levels = rev(alphaseq)))) +
    facet_wrap(~ 'italic("p")["TOST"] <= alpha ~ "in original and replication study"',
               labeller = label_parsed) +
    geom_vline(xintercept = margin, lty = 3, alpha = 0.4) +
    annotate(geom = "segment", x = margin + 0.25, xend = margin + 0.01, y = 2, yend = 2,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = margin + 0.28, y = 2, color = "darkgrey",
             label = "margin used in main analysis",
             size = 3, alpha = 0.9, hjust = 0) +
    geom_hline(yintercept = rpcbSuccesses, lty = 2, alpha = 0.4) +
    annotate(geom = "segment", x = 0.1, xend = 0.1, y = 13, yend = 11.2,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = -0.04, y = 13.5, color = "darkgrey",
             label = "non-significance criterion successes",
             size = 3, alpha = 0.9, hjust = 0) +
    geom_step(alpha = 0.8, linewidth = 0.8) +
    scale_y_continuous(breaks = bks, labels = labs) +
    scale_x_continuous(breaks = marbks) +
    coord_cartesian(xlim = c(0, max(equivalenceDF$margin))) +
    labs(x = bquote("Equivalence margin" ~ Delta),
         y = "Successful replications",
         color = bquote("threshold" ~ alpha)) +
    theme_bw() +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank(),
          strip.background = element_rect(fill = alpha("tan", 0.4)),
          strip.text = element_text(size = 12),
          legend.position = c(0.85, 0.25),
          plot.background = element_rect(fill = "transparent", color = NA),
          legend.box.background = element_rect(fill = "transparent", colour = NA))

## compute number of successful replications as a function of the prior scale
priorsdseq <- seq(0, 40, 0.1)
bfThreshseq <- c(3, 6, 10)
sensitivityGrid2 <- expand.grid(s = priorsdseq, thresh = bfThreshseq)
bfDF <- lapply(X = seq(1, nrow(sensitivityGrid2)), FUN = function(i) {
    priorsd <- sensitivityGrid2$s[i]
    thresh <- sensitivityGrid2$thresh[i]
    rpcbNull$BForig <- with(rpcbNull, BF01(estimate = smdo, se = so, unitvar = priorsd^2))
    rpcbNull$BFrep <- with(rpcbNull, BF01(estimate = smdr, se = sr, unitvar = priorsd^2))
    successes <- sum(rpcbNull$BForig >= thresh & rpcbNull$BFrep >= thresh)
    data.frame(priorsd = priorsd, thresh = thresh,
               successes = successes, proportion = successes/nrow(rpcbNull))
}) %>%
    bind_rows()

## plot number of successes as a function of prior sd
priorbks <- c(0, 2, 10, 20, 30, 40)
plotB <- ggplot(data = bfDF,
                aes(x = priorsd, y = successes, color = factor(thresh, ordered = TRUE))) +
    facet_wrap(~ '"BF"["01"] >= gamma ~ "in original and replication study"',
               labeller = label_parsed) +
    geom_vline(xintercept = 2, lty = 3, alpha = 0.4) +
    geom_hline(yintercept = rpcbSuccesses, lty = 2, alpha = 0.4) +
    annotate(geom = "segment", x = 7, xend = 2 + 0.2, y = 0.5, yend = 0.5,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = 7.5, y = 0.5, color = "darkgrey",
             label = "standard deviation used in main analysis",
             size = 3, alpha = 0.9, hjust = 0) +
    annotate(geom = "segment", x = 0.5, xend = 0.5, y = 13, yend = 11.2,
             arrow = arrow(type = "closed", length = unit(0.02, "npc")), alpha = 0.9,
             color = "darkgrey") +
    annotate(geom = "text", x = 0.05, y = 13.5, color = "darkgrey",
             label = "non-significance criterion successes",
             size = 3, alpha = 0.9, hjust = 0) +
    geom_step(alpha = 0.8, linewidth = 0.8) +
    scale_y_continuous(breaks = bks, labels = labs, limits = c(0, nmax)) +
    scale_x_continuous(breaks = priorbks) +
    coord_cartesian(xlim = c(0, max(bfDF$priorsd))) +
    labs(x = "Prior standard deviation",
         y = "Successful replications ",
         color = bquote("threshold" ~ gamma)) +
    theme_bw() +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank(),
          strip.background = element_rect(fill = alpha("tan", 0.4)),
          strip.text = element_text(size = 12),
          legend.position = c(0.85, 0.25),
          plot.background = element_rect(fill = "transparent", color = NA),
          legend.box.background = element_rect(fill = "transparent", colour = NA))

grid.arrange(plotA, plotB, ncol = 1)
@
\caption{Number of successful replications of original null results in the RPCB
  as a function of the margin $\Delta$ of the equivalence test
  ($p_{\text{TOST}} \leq \alpha$ in both studies for
  $\alpha = \Sexpr{rev(alphaseq)}$) or the standard deviation of the zero-mean
  normal prior distribution for the SMD effect size under the alternative
  $H_{1}$ of the Bayes factor test ($\BF_{01} \geq \gamma$ in both studies for
  $\gamma = \Sexpr{bfThreshseq}$).}
\label{fig:sensitivity}
\end{figure}

<< "sessionInfo1", eval = Reproducibility, results = "asis" >>=
## print R sessionInfo to see system information and package versions
## used to compile the manuscript (set Reproducibility = FALSE, to not do that)
cat("\\newpage \\section*{Computational details}")
@

<< "sessionInfo2", echo = Reproducibility, results = Reproducibility >>=
cat(paste(Sys.time(), Sys.timezone(), "\n"))
sessionInfo()
@


\end{document}