9  Simulating error in Frequentist and Bayesian clinical trial designs

Published

January 22, 2026

Abstract

Simulations of error under Frequentist and Bayesian frameworks. Technically, in Bayesian framework, there is no type I error, instead, the error is the probability of making a mistake.

10 Load libraries

library(tidyverse)
library(brms)
library(gt)
library(gtsummary)
brms_summary <- function(x) {
  posterior::summarise_draws(x, "mean", "sd",  ~quantile(.x, probs = c(.025,0.975)))
}

11 Introduction

Type I error in a Frequentist framework is rejecting the null hypothesis when the null hypothesis is true. Type I error does not apply in Bayesian framework, and instead, the focus on making an error when acting. This is making making a mistake in decision making, and can be thought of as 1-pr(benefit) (as an example).

I’ll apply simulations to assessing error rate under frequentistism (type I error) as well as under a Bayesian framework (probability of making a mistake). I want to compare 2 priors: one with large standard deviation (\(Normal(0,100)\)) vs one with a narrower standard deviation (\(Normal(0,5)\)).

Probability of benefit is defined as \(Pr(\beta)>0.95\)

Frequentist approach steps:

  1. Simulate 1000 datasets with a mean difference of 0, sd of 1 with difference sample sizes (50,100,200) 2.Approach for Frequentist type I error: For each dataset, Fit Bayesian model with two priors (N(0,5) and N(0,100))
  2. Obtain 95% credible interval and median
  3. Calculate how many times Pr(Benefit>0.95)

Bayesian approach steps:

  1. Simulate 1000 datasets with a mean difference of 4, sd of 6 with difference sample sizes (50,100,200)
  2. Approach for Bayesian type I error: For each dataset, fit bayesian model with two priors (N(0,5) and N(0,100))
  3. Obtain 95% credible interval and median
  4. Calculate how many times 1-Pr(Benefit>0.95) or Pr(harm)>0.05

Then, compare error for each approach by sample size and prior.

12 Frequentist simulation code

#fit initial model to update
n=100
df<-tibble(treatment=rbinom(n,1,.5))|>
    rowwise()|>
    mutate(y=ifelse(treatment ==0,rnorm(n=1,0,6),
                            rnorm(n=1,0,6)))

ffitf<-brm(y~treatment,data=df,prior=prior(normal(0,5)))# prior with N(0,5)
ffit1000f<-brm(y~treatment,data=df,prior=prior(normal(0,1000)))# prior with N(0,1000)


#simulation function  for N(0,5 ) and N(0,1000) prior
simfit3 <- function(seed, n) {
  
  set.seed(seed)
  
  df<-tibble(treatment=rbinom(n,1,.5))|>
    rowwise()|>
    mutate(y=ifelse(treatment ==0,rnorm(n=1,0,6),
                            rnorm(n=1,0,6)))
  outp<-update(ffitf,
         newdata = df,
         seed = seed,cores=4,recompile=FALSE,refresh=0,prior=prior(normal(0,5))) 
  ot<-outp|>
    brms_summary()|>
  data.frame()|>
  filter(variable=="b_treatment")
  benefit<-hypothesis(outp,"treatment >0",alpha=0.05)
  ot$benefit<-benefit$hypothesis$Post.Prob
  ot$prior<-5
  ###
  outp2<-update(ffit1000f,
         newdata = df,
         seed = seed,cores=4,recompile=FALSE,refresh=0,prior=prior(normal(0,1000))) 
  ot2<-outp2|>
    brms_summary()|>
    data.frame()|>
    filter(variable=="b_treatment")
  benefit2<-hypothesis(outp2,"treatment >0",alpha=0.05)
  ot2$benefit<-benefit2$hypothesis$Post.Prob
  ot2$prior<-100
  
  ot3<-rbind(ot,ot2)
  
  return(ot3)
}

12.1 Frequentist type I error Simulation

Many don’t agree with this, but we’re simulating outcomes under the null hypothesis with Bayesian models. Then, we can identify how often positive results are observed over the simulation.

###--Actual simulation--###
#not run because it takes a very long time
##ss is set to different sample sizes 
#n_sim<-100 # number of simulations
#sim3<-tibble(expand.grid(seed=1:n_sim,ss=c(25,50,100,200,500)))|>
#  group_by(seed,ss)|>
#  do(out=simfit3(seed=.$seed,n=.$ss))|>
#  unnest(out)

#head(sim3)
#saveRDS(sim3,"Power_analysis_simulations_outputs/Type_1_error_Frequentist_framework_priorN05_N01000.rds")
sim3<-readRDS("Power_analysis_simulations_outputs/Type_1_error_Frequentist_framework_priorN05_N01000.rds")

sim3.parse<-sim3|>
  group_by(seed,ss,prior)|>
  mutate(benefit_yesno=if_else(X2.5.>0,1,0))|>
  group_by(ss,prior)|>
  dplyr::summarize(mean=mean(benefit_yesno))
`summarise()` has grouped output by 'ss'. You can override using the `.groups`
argument.
sim3.parse
# A tibble: 10 × 3
# Groups:   ss [5]
      ss prior  mean
   <dbl> <dbl> <dbl>
 1    25     5  0.02
 2    25   100  0.02
 3    50     5  0.04
 4    50   100  0.05
 5   100     5  0.04
 6   100   100  0.04
 7   200     5  0.02
 8   200   100  0.03
 9   500     5  0.04
10   500   100  0.04
ggplot(sim3.parse,aes(x=ss,y=mean,colour=factor(prior)))+geom_point(size=5)+geom_line()+scale_y_continuous(limits=c(0,.06),breaks=seq(0,0.06,.01),labels=seq(0,.06,.01))+geom_hline(lty="dotdash",yintercept = 0.025)+ylab("Type I Error")+xlab("Sample size")+scale_x_continuous(limits=c(0,500),breaks=seq(0,500,50),labels=seq(0,500,50))

13 Bayesian

13.1 The simulation function

#fit initial model to update
n=100
d<-tibble(treatment=rbinom(n,1,.5))|>
    rowwise()|>
    mutate(y=ifelse(treatment ==0,rnorm(n=1,0,6),
                            rnorm(n=1,4,6)))

ffit<-brm(y~treatment,data=d,prior=prior(normal(0,5)))# prior with N(0,5)
ffit1000<-brm(y~treatment,data=d,prior=prior(normal(0,1000)))# prior with N(0,1000)
#hypothesis(ffit,"treatment>0",alpha=0.05)



#simulation function  for N(0,5 ) and N(0,1000) prior
simfit <- function(seed, n) {
  
  set.seed(seed)
  
  d<-tibble(treatment=rbinom(n,1,.5))|>
    rowwise()|>
    mutate(y=ifelse(treatment ==0,rnorm(n=1,0,6),
                            rnorm(n=1,4,6)))
  outp<-update(ffit,
         newdata = d,
         seed = seed,cores=4,recompile=FALSE,refresh=0,prior=prior(normal(0,5))) 
  ot<-outp|>
    brms_summary()|>
  data.frame()|>
  filter(variable=="b_treatment")
  benefit<-hypothesis(outp,"treatment >0",alpha=0.05)
  ot$benefit<-benefit$hypothesis$Post.Prob
  ot$prior<-5
  ###
  outp2<-update(ffit1000,
         newdata = d,
         seed = seed,cores=4,recompile=FALSE,refresh=0,prior=prior(normal(0,1000))) 
  ot2<-outp2|>
    brms_summary()|>
    data.frame()|>
    filter(variable=="b_treatment")
  benefit2<-hypothesis(outp2,"treatment >0",alpha=0.05)
  ot2$benefit<-benefit2$hypothesis$Post.Prob
  ot2$prior<-100
  
  ot3<-rbind(ot,ot2)
  
  return(ot3)
}

13.2 Bayesian simulation

#n_sim<-100 # number of simulations
###--##comenting out simulation code - takes very long time
#sim2<-tibble(expand.grid(seed=1:n_sim,ss=c(25,50,100,200,500)))|>
#  group_by(seed,ss)|>
#  do(out=simfit(seed=.$seed,n=.$ss))|>
#  unnest(out)


#saveRDS(sim2,"Power_analysis_simulations_outputs/Type_1_error_Bayesian_framework_priorN05_N01000.rds")
sim2<-readRDS("Power_analysis_simulations_outputs/Type_1_error_Bayesian_framework_priorN05_N01000.rds")|>
  mutate(prior=if_else(prior==100,1000,prior))
head(sim2)
# A tibble: 6 × 9
   seed    ss variable     mean    sd    X2.5. X97.5. benefit prior
  <int> <dbl> <chr>       <dbl> <dbl>    <dbl>  <dbl>   <dbl> <dbl>
1     1    25 b_treatment  3.14  1.61 -0.0541    6.33   0.973     5
2     1    25 b_treatment  3.50  1.68  0.220     6.85   0.983  1000
3     1    50 b_treatment  3.07  1.55 -0.00978   6.10   0.974     5
4     1    50 b_treatment  3.38  1.60  0.251     6.49   0.982  1000
5     1   100 b_treatment  4.46  1.10  2.27      6.60   1         5
6     1   100 b_treatment  4.69  1.12  2.52      6.91   1      1000

13.2.1 Side quest- Generate a function that can determine power for a given sample size

Grab data and run on your own.

#Get power 
sim2.parse<-sim2|>
  group_by(seed,ss)|>
  mutate(benefit_yesno=if_else(X2.5.>0,1,0))|>
  group_by(ss,prior)|>
  dplyr::summarize(mean=mean(benefit_yesno))

sim2.parse

##plot out the power by 
ggplot(sim2.parse,aes(x=ss,y=mean,colour=factor(prior)))+geom_line()+geom_point()+scale_x_continuous(limits=c(0,500),breaks=seq(0,500,50),labels=seq(0,500,50))


########---### create a power function so we can predict power given a sample size input
sim2.parse5<-sim2.parse|>
  filter(prior==5)
# Hill model: y = ss^n / (K^n + ss^n)
# Use non-linear brms formula syntax

hill_formula <- bf(
  mean ~ ss^n / (K^n + ss^n),
  n + K ~ 1,
  nl = TRUE
)

priors <- c(
  prior(normal(2, 1), nlpar = "n", lb = 0),
  prior(normal(60, 30), nlpar = "K", lb = 0),
  # Use a proper prior for sigma on (0, ∞). Pick one:
  prior(student_t(3, 0, 0.2), class = "sigma", lb = 0)   # robust, fairly tight
)

fit_bayes <- brm(
  formula = hill_formula,
  data = sim2.parse5,
  family = gaussian(),       # still Gaussian
  prior = priors,
  control = list(adapt_delta = 0.95),
  chains = 4, cores = 4
)

summary(fit_bayes)

# Create a tidy posterior prediction function
predict_hill <- function(ss_values) {
  newdata <- data.frame(ss = ss_values)
  posterior_epred(fit_bayes, newdata = newdata)
}

# Example: predict for ss = 75
mean(predict_hill(75))


##predict continuously from 0 to 500 

powerf<-apply(predict_hill(seq(0,500,1)),2,mean)
q025<-apply(predict_hill(seq(0,500,1)),2,quantile,probs=0.025)
q975<-apply(predict_hill(seq(0,500,1)),2,quantile,probs=0.975)
powerf1<-tibble(mean=powerf,ss=seq(0,500,1),lci=q025,uci=q975)
###--plot with simulation points 

ggplot(sim2.parse5,aes(x=ss,y=mean))+geom_point()+scale_x_continuous(limits=c(0,500),breaks=seq(0,500,50),labels=seq(0,500,50))+geom_line(data=powerf1,aes(x=ss,y=mean))+geom_ribbon(data=powerf1,aes(x=ss,ymin=lci,ymax=uci),alpha=.2)+xlab("Sample size")+ylab("Bayesian power Pr(benefit)>=0.95")

powerf1|>
  filter(ss==77)

13.2.2 Back to original goal: figure out pr(harm)

Calculate the pr(harm), which is 1-pr(benefit) for our simulations

harmd<-sim2|>
  mutate(harm=1-benefit)|>
  group_by(ss,prior)|>
  summarize(mean=mean(harm),lcri=quantile(harm,prob=0.025),ucri=quantile(harm,0.975))
`summarise()` has grouped output by 'ss'. You can override using the `.groups`
argument.
harmd
# A tibble: 10 × 5
# Groups:   ss [5]
      ss prior     mean     lcri    ucri
   <dbl> <dbl>    <dbl>    <dbl>   <dbl>
 1    25     5 0.140    0.00100  0.631  
 2    25  1000 0.128    0.000119 0.640  
 3    50     5 0.0629   0        0.386  
 4    50  1000 0.0587   0        0.388  
 5   100     5 0.00561  0        0.0561 
 6   100  1000 0.00469  0        0.0444 
 7   200     5 0.000872 0        0.00329
 8   200  1000 0.000765 0        0.00213
 9   500     5 0        0        0      
10   500  1000 0        0        0      
#plot out 
ggplot(harmd,aes(x=ss,y=mean,colour=factor(prior),fill=factor(prior)))+geom_point()+geom_line()+geom_ribbon(alpha=0.2,aes(ymin=lcri,ymax=ucri),colour=NA)+scale_x_continuous(labels=seq(0,500,50),breaks=seq(0,500,50),limits=c(0,500))+scale_y_continuous(breaks=seq(0,.7,.05),labels=seq(0,.7,.05))+geom_hline(yintercept = 0.025,lty="dotdash")+xlab("Sample size")+ylab("Pr(harm) or Pr(treatment<0)")

So a prior with narrower sd produces a higher error rate than a larger sd prior.

14 Session info

sessionInfo()
R version 4.5.0 (2025-04-11 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)

Matrix products: default
  LAPACK version 3.12.1

locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] gtsummary_2.2.0 gt_1.0.0        brms_2.22.0     Rcpp_1.0.14    
 [5] lubridate_1.9.4 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
 [9] purrr_1.0.4     readr_2.1.5     tidyr_1.3.1     tibble_3.2.1   
[13] ggplot2_3.5.2   tidyverse_2.0.0

loaded via a namespace (and not attached):
 [1] tensorA_0.36.2.1     bridgesampling_1.1-2 utf8_1.2.4          
 [4] generics_0.1.3       xml2_1.3.8           stringi_1.8.7       
 [7] lattice_0.22-6       hms_1.1.3            digest_0.6.37       
[10] magrittr_2.0.3       evaluate_1.0.3       grid_4.5.0          
[13] timechange_0.3.0     mvtnorm_1.3-3        fastmap_1.2.0       
[16] jsonlite_2.0.0       Matrix_1.7-3         backports_1.5.0     
[19] Brobdingnag_1.2-9    scales_1.3.0         abind_1.4-8         
[22] cli_3.6.4            rlang_1.1.6          munsell_0.5.1       
[25] withr_3.0.2          yaml_2.3.10          tools_4.5.0         
[28] parallel_4.5.0       tzdb_0.5.0           rstantools_2.4.0    
[31] checkmate_2.3.2      coda_0.19-4.1        colorspace_2.1-1    
[34] posterior_1.6.1      vctrs_0.6.5          R6_2.6.1            
[37] matrixStats_1.5.0    lifecycle_1.0.4      htmlwidgets_1.6.4   
[40] pkgconfig_2.0.3      RcppParallel_5.1.10  pillar_1.10.2       
[43] gtable_0.3.6         loo_2.8.0            glue_1.8.0          
[46] xfun_0.52            tidyselect_1.2.1     knitr_1.50          
[49] farver_2.1.2         bayesplot_1.12.0     nlme_3.1-168        
[52] htmltools_0.5.8.1    rmarkdown_2.29       compiler_4.5.0      
[55] distributional_0.5.0