180 likes | 302 Vues
This document provides an overview of R programs that use Monte Carlo simulations to estimate the power and actual level of two-sample unpaired t-tests and Wilcoxon tests for the mean and median. It includes detailed parameter initialization and simulation loops, allowing users to generate data, perform statistical tests, and analyze results. By adjusting various parameters such as means, standard deviations, sample sizes, and alpha levels, users can explore the performance of these tests across different scenarios.
E N D
BSTA 670 – Statistical Computing Lecture 14: Some R Simulations
############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # of a two-sample unpaired t-test for the mean, for equal sample sizes. ############################################################################## # Initialize parameters # m1 is mean of normal distribution for group 1 # m2 is mean of normal distribution for group 2 # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test m1<-5 m2<-7 s<-5 n<-100 M<-10000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL # Simulation loop for(i in 1:M) { x1<-rnorm(n,m1,s) # Generate simulated data for group 1 x2<-rnorm(n,m2,s) # Generate simulated data gor group 2 tp<-t.test(x1,x2)$p.value # Perform t-test and obtain p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) # # Theoretical power of test power.t.test(n=n,delta=m1-m2,sd=s,sig.level=a,type="two.sample",alternative="two.sided") Sim_ttest_power1.s
############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate actual # level of a two-sample unpaired t-test with equal sample sizes for the mean. ############################################################################## # Initialize parameters # m is mean of normal distributions # s is standard deviation of normal distributions # n is sample size, number of observations in simulated data sets # M is number of simulations to perform # a is confidence interval level m<-7 s<-5 n<-100 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL # Simulation loop for(i in 1:M) { x1<-rnorm(n,m,s) # Generage data for group 1 x2<-rnorm(n,m,s) # Generate data for group 2 tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Add rejection indicator to results vector } print(paste("Estimated Level of Test: ", mean(reject))) Sim_ttest_alpha.s
############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # curve of a two-sample unpaired t-test for the mean, for equal sample sizes. ############################################################################## # Initialize parameters # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test # d s<-8 n<-100 M<-500 a<-0.05 k<-51 d<-5 ############################################################################### # Create matrix to receive results pcurve<-NULL # Loop over delta values for( delta in seq(from=-d,to=d, by=2*d/k) ){ # Create vector to receive results for current delta reject<-NULL # Loop to estimate power ar specified delta for(i in 1:M) { x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0 x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta tp<-t.test(x1,x2)$p.value # Perform t-test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result for current simulation i } pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta } p<-pcurve[,1] m<-pcurve[,2] plot(m,p, xlab="Delta",ylab="Power") Sim_ttest_power_curve.s
############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate the power # curve of a Wilcoxon test (two-sample unpaired) for the medians, # for equal sample sizes. ############################################################################## # Initialize parameters # s is standard deviation of normal distribution for both groups # n is sample size for both groups, observations in in simulated data set # M is number of simulations to perform # a is alpha level for test # d s<-8 n<-100 M<-500 a<-0.05 k<-51 d<-5 ############################################################################### # Create matrix to receive results pcurve<-NULL # Loop over delta values for( delta in seq(from=-d,to=d, by=2*d/k) ){ # Create vector to receive results for current delta reject<-NULL # Loop to estimate power ar specified delta for(i in 1:M) { x1<-rnorm(n,0,s) # Generate simulated data for group 1, mean=0 x2<-rnorm(n,delta,s) # Generate simulated data for group 2, mean=delta tp<-wilcox.test(x1,x2)$p.value # Perform Wilcoxon test and extract p-value rejecti<-ifelse(tp<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result for current simulation i } pcurve<-rbind( pcurve, c(mean(reject),delta) ) # Save result for current delta } p<-pcurve[,1] m<-pcurve[,2] plot(m,p, xlab="Delta",ylab="Power") Sim_wilcoxon_power_curve.s
############################################################################################################################################################ # R program demonstrating use of Monte Carlo simulation to estimate confidence # interval coverage of asymptotic normal confidence interval for the mean. # The results of the simulated confidence intervals are plotted, with a line # denoting the actual mean. ############################################################################## # Initialize parameters # m is mean of normal distribution # s is standard deviation of normal distribution # n is sample size, number of observations in simulated data sets # M is number of simulations to perform # a is confidence interval level m<-7 s<-5 n<-100 M<-50 a<-0.95 ############################################################################### # Create vectors to receive results covers<-NULL lower<-NULL upper<-NULL yplot<-NULL xplot<-NULL # Simulation loop for(i in 1:M) { x<-rnorm(n,m,s) # Generate data ci<-t.test(x, conf.level=a) # Obtain confidence interval loweri<-ci$conf.int[1] # Extract lower confidence limit upperi<-ci$conf.int[2] # Extract upper confidence limit coveri<-ifelse(m>=loweri & m<=upperi,1,0) # determine if CI includes real mean covers<-c(covers,coveri) # add coverage indicator, coveri, to results vector lower<-c(lower,loweri) # add lower limit, loweri, to results vector upper<-c(upper,upperi) # add upper limit, upperi, to results vector xplot<-c(xplot,c(loweri,upperi,NA)) # update plotting vector for x yplot<-c(yplot, c(i, i, i)) # update plotting vector for y } Sim_coverage_normal.s Continued on next slide
print(paste("Estimated Coverage Probability: ", mean(covers))) plot(xplot,yplot,type='l', xlab="Confidence Interval",ylab="Simulation Number") abline(v=m) Sim_coverage_normal.s Continued from previous slide
Survival_exponential.s ############################################################################## # R program demonstrating generation of survival data with fitting of # parametric survival regression and Cox regression models to verify # relative rates ############################################################################## library(survival) # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution n<-200 m1<-0.5 m2<-0.6 cm<-1.0 ############################################################################### Continued on next slide
Survival_exponential.s # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m1) y2<-rexp(n, rate=1/m2) y<-c(y1,y2) print(paste("Mean of Group 1 survival time: ", mean(y1))) print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit parametric survival regression out<-survreg(Surv(ycen, censored)~x, dist="exponential") print(out) # print(paste("Relative rate from simulation parameters: ", m2/m1)) print(paste("Estimated relative rate from parametric regression: ", exp(out$coefficients[2]))) # # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) print(out) print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) Continued from previous slide
############################################################################################################################################################ # R program demonstrating simulation to obtain power for Cox regression # model based on specified parameters ############################################################################## # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test n<-1000 m1<-0.5 m2<-0.6 cm<-1.0 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL Sim_survival_exponential_power.s Continued on next slide
# Simulation loop for(i in 1:M) { # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m1) y2<-rexp(n, rate=1/m2) y<-c(y1,y2) # print(paste("Mean of Group 1 survival time: ", mean(y1))) # print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) Sim_survival_exponential_power.s Continued from previous slide
############################################################################################################################################################ # R program demonstrating simulation to obtain level for Cox regression # model based on specified parameters ############################################################################## # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test n<-1000 m<-0.5 cm<-1.0 M<-1000 a<-0.05 ############################################################################### # Create vector to receive results reject<-NULL Sim_survival_exponential_level.s Continued on next slide
# Simulation loop for(i in 1:M) { # Create group indicator x<-c( rep(0,n) , rep(1,n) ) # Generate group 1 and group 2 complete survival times. y1<-rexp(n, rate=1/m) y2<-rexp(n, rate=1/m) y<-c(y1,y2) # print(paste("Mean of Group 1 survival time: ", mean(y1))) # print(paste("Mean of Group 2 survival time: ", mean(y2))) # Generate censoring times cen<-rexp(2*n, rate=1/cm) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } print(paste("Estimated Power of Test: ", mean(reject))) Sim_survival_exponential_level.s Continued from previous slide
############################################################################################################################################################ # R program demonstrating simulation to obtain level for Cox regression # model based on specified parameters, WITH TRUNCATION # NOTE: Set tm1=tm2==0.0000001 (something near 0) to get no truncation. # In this case get "a" for level. ############################################################################## library(survival) # Initialize parameters # n is sample size for both groups, observations in in simulated data set # m1 is mean for group 1 survival time distribution # m2 is mean for group 2 survival time distribution # cm is mean for censoring time distribution # M is number of simulations to perform # a is alpha level of test # tm1 is truncation mean for group 1 # tm2 is truncation mean for group 2 # ntrunc1 is number obs truncated from group 1 # ntrunc2 is number obs truncated from group 2 # ptrunc1 is proportion truncated from group 1 # ptrunc2 is proportion truncated from group 2 n<-1000 m1<-0.5 m2<-0.5 cm<-1.0 tm1<-0.10 tm2<-0.10 M<-50 a<-0.05 Continued on next slide Sim_survival_exponential_truncated_level.s
############################################################################################################################################################## # Create vector to receive results reject<-NULL ntrunc1<-NULL ntrunc2<-NULL ptrunc1<-NULL ptrunc2<-NULL # Create group indicator x<-c( rep(0,n) , rep(1,n) ) Continued from previous slide Sim_survival_exponential_truncated_level.s
# Simulation loop for(i in 1:M) { # Create vector to receive results y<-NULL trunc<-NULL cen<-NULL obs1<-0 ntrunci<-0 while(obs1<n) { # Generate group 1 complete survival times. yi<-rexp(1, rate=1/m1) # Generate truncation time trunci<-rexp(1, rate=1/tm1) # Generate censoring times ceni<-rexp(1, rate=1/cm) # Determine if observation is truncated truncated<-as.numeric(yi<trunci) # 1 if true if(truncated==0) { y<-c(y,yi) trunc<-c(trunc,trunci) cen<-c(cen,ceni) obs1<-obs1+1 } if(truncated==1) ntrunci<-ntrunci+1 } ntrunc1<-c(ntrunc1,ntrunci) ptrunc1<-c(ptrunc1, ntrunci/(n+ntrunci) ) Continued from previous slide Sim_survival_exponential_truncated_level.s
obs2<-0 ntrunci<-0 while(obs2<n) { # Generate group 2 complete survival times. yi<-rexp(1, rate=1/m2) # Generate truncation time trunci<-rexp(1, rate=1/tm2) # Generate censoring times ceni<-rexp(1, rate=1/cm) # Determine if observation is truncated truncated<-as.numeric(yi<trunci) # 1 if true if(truncated==0) { y<-c(y,yi) trunc<-c(trunc,trunci) cen<-c(cen,ceni) obs2<-obs2+1 } if(truncated==1) ntrunci<-ntrunci+1 } ntrunc2<-c(ntrunc2,ntrunci) ptrunc2<-c(ptrunc2, ntrunci/(n+ntrunci) ) if(i==1) print(paste("Number truncated from group 2 while simulating: ", notused)) if(i==1) print(paste("Mean of Group 1 survival time: ", mean(y[1:n]))) if(i==1) print(paste("Mean of Group 2 survival time: ", mean(y[(n+1):(2*n)]))) # Create observed censored survival time ycen<-pmin(y,cen) # Create censoring indicator, 0 for censored (y>cen), 1 for complete (y<=cen) censored<-as.numeric(y<=cen) # Fit Cox model out<-coxph(formula = Surv(ycen, censored) ~ x) # print(out) # print(paste("Estimated relative rate from Cox regression: ", exp(-out$coef))) pvaluei<- 2*(1-pnorm(sqrt(out$wald.test))) # Obtain p-value from Wald test rejecti<-ifelse(pvaluei<a,1,0) # Determine if test rejects reject<-c(reject,rejecti) # Save result } Sim_survival_exponential_truncated_level.s Continued from previous slide
print(paste("Estimated Power of Test: ", mean(reject))) print(paste("Average number truncated from group 1 while simulating: ", mean(ntrunc1))) print(paste("Average number truncated from group 2 while simulating: ", mean(ntrunc2))) print(paste("Average proportion truncated from group 1 while simulating: ", mean(ptrunc1))) print(paste("Average proportion truncated from group 2 while simulating: ", mean(ptrunc2))) Continued from previous slide Sim_survival_exponential_truncated_level.s