# Meta-Analysis is a method that allows researchers to merge the scientific results from a number of studies into a single analysis. This is an especially useful method because it allows for a collective estimate of an effect using a pool of data much large than could be achieved with any single study.
n.studies = 50
# This stores the simulation results from all runs varying the true effect.
total.sim.results=matrix(NA, nrow=0, ncol=5)
# This is true we want to detect.
for (true.eff in c(0,.05,.10,.15,.20)) {
ind.sd = 1
s.scalar = 100
# Study scale is drawn from a chi-squared distribution times the scalar + 5
# unit scale. Normal distribution.
mean.scale = 1
# sd scale. Uniform distribution.
sd.scale = 20
# In this analysis I will structure this as a Monte Carlo simulation to test how this method works. Please note, I am just working from class notes on this so I might be formulating the meta-analysis proceedure incorrectly.
n.simulations = 100
# We will save the results of the Monte Carlo in a matrix.
sim.results = matrix(NA, nrow=n.simulations, ncol=5)
print(paste("True Effect:",true.eff))
for (s in 1:n.simulations) {
# First let's generate our study parameters:
s.size = ceiling(rnorm(n.studies)^2 * s.scalar)+15
s.sd = runif(n.studies)*sd.scale
s.mean = rnorm(n.studies)*mean.scale
# For didactic purposes I will simulate the individual studies though this is not neccessary.
ind.treat = ind.data = matrix(NA,nrow=max(s.size), ncol=n.studies)
est.corr = est.y.sd = est.x.sd = est.effect = rep(NA, n.studies)
for (i in 1:n.studies) {
ind.treat[1:s.size[i],i] = rnorm(s.size[i])>0
ind.data[1:s.size[i],i] = (true.eff*ind.treat[1:s.size[i],i] + rnorm(s.size[i])*ind.sd)*s.sd[i] + s.mean[i]
# Estimate effect for each
est.effect[i] = lm(ind.data[1:s.size[i],i]~ind.treat[1:s.size[i],i])$coef[2]
# Estimate the standard deviation for each study
est.y.sd[i] = sd(ind.data[1:s.size[i],i], na.rm=T)
est.x.sd[i] = sd(ind.treat[1:s.size[i],i], na.rm=T)
# Estimate correlation between treatment and outcome
est.corr[i] = cor(ind.data[1:s.size[i],i], ind.treat[1:s.size[i],i])
}
# Because everybody is using a different scale, the estimated affect is unique to that study's scale.
est.effect
# We therefore divide the estimated effect by the
z.est.effect = est.effect/est.y.sd
z.est.effect
# We can convert our standardized estimated effects to correlations:
z.est.effect*est.x.sd
est.corr
# In order to aid analysis, tranform the data to fisher z
z.corr = .5*log((1+est.corr)/(1-est.corr))
mean(z.corr)
# The variance of the inidividual Fisher zs can be approximated with:
z.var = 1/(s.size-3)
# Calculate the between study variance
between.var = var(z.corr)
# In order to construct the test we need to specify weights which are equal to the inverse of the variance the study plus the variance between studies.
weight = 1/(between.var+z.var)
# Now we can estimate to see if our effect is statistically signficant.
(sim.results[s,]=c(true.eff,summary(lm(z.corr~1, weights=weight))$coef))
# Name the columns of the results matrix.
if (s==1) colnames(sim.results)=c("true.eff" ,colnames(summary(lm(z.corr~1, weights=weight))$coef))
# Provide feedback to the user.
print(paste("Number of repetitions completed:",s))
}
total.sim.results = rbind(total.sim.results,sim.results)
}
# We can use the Monte Carlos results to test several features:
# 1. Test if our estimator is overpowered.
# To do this we set the true effect equal to zero and see how frequently we reject at the 10% and 5% level (true.eff=0)
mean(total.sim.results[total.sim.results[,1]==0,5]<.1)
# If this number is substantially higher than 10% or 5% then we should worry about our estimator giving us results when there actually are no results.
# 2. We can test the power of our analysis to reject the null when there is an effect of a particular size (for example: true.eff=.1)
mean(total.sim.results[total.sim.results[,1]==.1,5]<.1)
# true.eff=.2
mean(total.sim.results[total.sim.results[,1]==.2,5]<.1)
boxplot(total.sim.results[,5]~total.sim.results[,1], main="P values", ylab="P value", xlab="True Effect")
abline(h=.1, col=grey(.75))
Your blog is hard to read due to the difficulty in separating text from r code. Why not separate the two instead of comments throughout the code?
ReplyDeletePlease see next post addressing this issue.
Delete