R Code
# This post builds on my recent post that builds a simulation of a computer adaptive test (CAT) selecting items from an infinite item pool using fisher information as the criteria.
http://www.econometricsbysimulation.com/2012/11/computer-adaptive-test-assuming.html
# This post is very similar except that it uses instead the Kullback-Leibler (KL) information criteria to select items.
# More information about the KL can be found at:
http://www.econometricsbysimulation.com/2012/11/selecting-your-first-item-on-computer.html
#########################################################
# Specify the initial conditions:
true.ability = rnorm(1)
# Load three parameter model ICC
PL3 = function(theta,a, b, c) c+(1-c)*exp(a*(theta-b))/(1+exp(a*(theta-b)))
# Load three parameter item information:
PL3.info = function(theta, a, b, c) a^2 *(PL3(theta,a,b,c)-c)^2/(1-c)^2 * (1-PL3(theta,a,b,c))/PL3(theta,a,b,c)
#########################################################
# Mock Computer Adaptive Test
# First let's specify and initial guess
est.start = 0
# How much do we adjust our estimate of person ability when all answer's are either right or wrong.
est.jump = .7
# Number of items on the test. This will be the end condition.
num.items = 50
# Set the other parameters in the 3PL.
a.base = 3
c.base = .1
# We will draw all of the random outcomes in advance so that both methods of selecting items get the "same" draws.
rdraw = runif(num.items)
### Let's generate a vector to hold both ability estimates. Fisher and KL start at the same estimate.
ability.est = est.start
# Let's first generate a empty data frame to hold the set of item taken.
items = data.frame(a=NA,b=NA,c=NA,response=NA,p=NA, ability.est=NA)
i = 1
# For this first mock test we will not select items from a pool but instead assume the pool is infinite and has an item with an a=a.base, c=c.base, and b equal to whatever the current guess is.
# Let's select our first item - a,b,c, response are scalars that will be reused to simplify coding.
a=a.base
c=c.base
### This is the first divergence from the alternative CAT simulation.
### In order to select an item we must specify first a theta distribution to select the best item for.
### I will use 1000 draws from theta~normal()
theta.dist = rnorm(1000)
### Drawing from the second link posted above the KL information is defined as:
KL = function(theta.true,theta.estimate, a, b, c) {
# For the true value
p.true = PL3(theta.true,a,b,c)
q.true = 1-p.true
# For the estimate
p.estimate = PL3(theta.estimate,a,b,c)
q.estimate = 1-p.estimate
# The following line is the value to be returned to the KL function:
p.true*log(p.true/p.estimate) + q.true*log(q.true/q.estimate)
}
### In order to select the best item from all the infinite number of items I will use a maximimization routine to select the b while a and c are held constant.
### I want to define a function to maximize the KL over for a given theta estimate.
KLmax = function(b) mean(KL(theta.true=theta.dist, theta.estimate=ability.est[i], a=a, b=b, c=c))
### Now we want to optimize the KLmax function looking for the choice of b that gives us the highest value.
optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))
b=optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
# Probability of getting the item correct
p=PL3(true.ability, a,b,c)
# Note that all of the ps are already draw
response = p > rdraw[i]
# The Item Characteristic Curve (ICC) gives the probability of getting the item correct.
# Thus, a .9 is the max of the runifrom that should produce a response of TRUE or correct or 1 (as far as R is concerned these TRUE and 1 are the same as is FALSE and 0)
items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])
# We have now successfully administered our first item.
# Should do our first MLE estimation?
# Not quite, unfortunately MLE requires in the bianary case that the student has answered at least one question right and at least one question wrong.
i=1+1
# Instead we will just adjust the ability estimate by the fixed factor (est.jump)
ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump
# Now we administer the second item:
# We will continue this until we get some heterogeneity in the responses
response.v = items$response
response.ave = sum(response.v)/length(response.v)
while ((response.ave == ceiling(response.ave)) & (num.items >= i) ) {
# This condition will no longer be true when at least one of the items is ansered correctly and one of the items answered incorrectly.
ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump
a=a.base
c=c.base
### Using the KL information criteria
b=optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
p=PL3(true.ability, a,b,c)
response = p > rdraw[i]
items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])
response.v = items$response
response.ave = sum(response.v)/length(response.v)
i=i+1
}
items
true.ability
# Now that we have some heterogeneity of responses we can use the MLE estimator
MLE = function(theta) sum(log((items$response==T)*PL3(theta, items$a, items$b, items$c) +
(items$response==F)*(1-PL3(theta, items$a, items$b, items$c))))
optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))
# Okay, it seems to be working properly now we will loop through using the above function.
# The only thing we need change is the ability estimate.
while (num.items >= i) {
ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
a=a.base
c=c.base
### Using the KL information criteria
b=optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
p=PL3(true.ability, a,b,c)
response = p > rdraw[i]
items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])
response.v = items$response
response.ave = sum(response.v)/length(response.v)
i=i+1
}
ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
# Save the paths for comparison with Fisher information
items.KL = items
ability.est.KL = ability.est
#######################################################################
#####
##### Now let's compare this with the Fisher information item selection
#####
#######################################################################
ability.est = est.start
items = data.frame(a=NA,b=NA,c=NA,response=NA,p=NA, ability.est=NA)
i = 1
a=a.base
c=c.base
### Now b just equals the ability estimate
b=ability.est[i]
p=PL3(true.ability, a,b,c)
response = p > rdraw[i]
items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])
i=1+1
ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump
response.v = items$response
response.ave = sum(response.v)/length(response.v)
while ((response.ave == ceiling(response.ave)) & (num.items >= i) ) {
ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump
a=a.base
c=c.base
### Change how b is selected
b=ability.est[i]
p=PL3(true.ability, a,b,c)
response = p > rdraw[i]
items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])
response.v = items$response
response.ave = sum(response.v)/length(response.v)
i=i+1
}
while (num.items >= i) {
ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
a=a.base
c=c.base
### Change how b is selected
b=ability.est[i]
p=PL3(true.ability, a,b,c)
response = p > rdraw[i]
items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])
response.v = items$response
response.ave = sum(response.v)/length(response.v)
i=i+1
}
# One final ability estimate calculation
ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par
ability.est.Fisher = ability.est
items.Fisher = items
#######################################################################
minmax = function(x) c(min(x),max(x))
plot(0:num.items, ability.est.Fisher, type="l", main="CAT Estimates Ideally Converge on True Ability",
, xlab="Number of Items Administered", ylim=minmax(c(ability.est.Fisher,ability.est.KL)),
ylab="Estimated Ability", lwd=3, col="blue")
lines(0:num.items, ability.est.KL, lwd=2, col="green")
abline(h=true.ability, col="red", lwd=2)
## Graph I
# I have included two different graphs because I thought both of these were quite peculiar.
# The first graph is strange because in this chance draw the KL looks like it is doing as well or better than the Fisher.
# In almost all other samples that I ran it seemed to do considerably worse.
## Graph II
# Let's see what items both methods are choosing and how they relate to the true ability estimate.
plot(1:num.items, items.Fisher$b, type="l", main="Item's are selected using different criteria",
, xlab="Number of Items Administered", ylim=minmax(c(items.KL$b,items.Fisher$b)),
ylab="Estimated Ability", lwd=3, col="blue")
lines(1:num.items, items.KL$b, lwd=2, col="green")
abline(h=true.ability, col="red", lwd=2)
# Blue is Fisher
# Green is KL
# I have included two different graphs because I thought both of these were quite peculiar.
# The first graph is strange because in this chance draw the KL looks like it is doing as well or better than the Fisher.
# In almost all other samples that I ran it seemed to do considerably worse.
# Let's see what items both methods are choosing and how they relate to the true ability estimate.
plot(1:num.items, items.Fisher$b, type="l", main="Item's are selected using different criteria",
, xlab="Number of Items Administered", ylim=minmax(c(items.KL$b,items.Fisher$b)),
ylab="Item Difficulty", lwd=3, col="blue")
lines(1:num.items, items.KL$b, lwd=2, col="green")
abline(h=true.ability, col="red", lwd=2)
# The y-axis should read item difficulty.
# We can see the items tend to be closer to zero in difficulty for the KL selection (these items correspond to the most recent graph)
# From this simulation, it appears KL is an inferior item selection criteria.
# It is possible that this simulation is not taking into consideration all of the features of the KL criteria. Perhaps, if the item pool has differences in the parameters a and c then KL will perform better. I will leave that for a later simulation.