# This post builds on my recent post that builds a simulation of a computer adaptive test (CAT) selecting items from an infinite item pool using fisher information as the criteria.

http://www.econometricsbysimulation.com/2012/11/computer-adaptive-test-assuming.html

# This post is very similar except that it uses instead the Kullback-Leibler (KL) information criteria to select items.

# More information about the KL can be found at:

http://www.econometricsbysimulation.com/2012/11/selecting-your-first-item-on-computer.html

#########################################################

# Specify the initial conditions:

true.ability = rnorm(1)

# Load three parameter model ICC

PL3 = function(theta,a, b, c) c+(1-c)*exp(a*(theta-b))/(1+exp(a*(theta-b)))

# Load three parameter item information:

PL3.info = function(theta, a, b, c) a^2 *(PL3(theta,a,b,c)-c)^2/(1-c)^2 * (1-PL3(theta,a,b,c))/PL3(theta,a,b,c)

#########################################################

# Mock Computer Adaptive Test

# First let's specify and initial guess

est.start = 0

# How much do we adjust our estimate of person ability when all answer's are either right or wrong.

est.jump = .7

# Number of items on the test. This will be the end condition.

num.items = 50

# Set the other parameters in the 3PL.

a.base = 3

c.base = .1

# We will draw all of the random outcomes in advance so that both methods of selecting items get the "same" draws.

rdraw = runif(num.items)

### Let's generate a vector to hold both ability estimates. Fisher and KL start at the same estimate.

ability.est = est.start

# Let's first generate a empty data frame to hold the set of item taken.

items = data.frame(a=NA,b=NA,c=NA,response=NA,p=NA, ability.est=NA)

i = 1

# For this first mock test we will not select items from a pool but instead assume the pool is infinite and has an item with an a=a.base, c=c.base, and b equal to whatever the current guess is.

# Let's select our first item - a,b,c, response are scalars that will be reused to simplify coding.

a=a.base

c=c.base

### This is the first divergence from the alternative CAT simulation.

### In order to select an item we must specify first a theta distribution to select the best item for.

### I will use 1000 draws from theta~normal()

theta.dist = rnorm(1000)

### Drawing from the second link posted above the KL information is defined as:

KL = function(theta.true,theta.estimate, a, b, c) {

# For the true value

p.true = PL3(theta.true,a,b,c)

q.true = 1-p.true

# For the estimate

p.estimate = PL3(theta.estimate,a,b,c)

q.estimate = 1-p.estimate

# The following line is the value to be returned to the KL function:

p.true*log(p.true/p.estimate) + q.true*log(q.true/q.estimate)

}

### In order to select the best item from all the infinite number of items I will use a maximimization routine to select the b while a and c are held constant.

### I want to define a function to maximize the KL over for a given theta estimate.

KLmax = function(b) mean(KL(theta.true=theta.dist, theta.estimate=ability.est[i], a=a, b=b, c=c))

### Now we want to optimize the KLmax function looking for the choice of b that gives us the highest value.

optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))

b=optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

# Probability of getting the item correct

p=PL3(true.ability, a,b,c)

# Note that all of the ps are already draw

response = p > rdraw[i]

# The Item Characteristic Curve (ICC) gives the probability of getting the item correct.

# Thus, a .9 is the max of the runifrom that should produce a response of TRUE or correct or 1 (as far as R is concerned these TRUE and 1 are the same as is FALSE and 0)

items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])

# We have now successfully administered our first item.

# Should do our first MLE estimation?

# Not quite, unfortunately MLE requires in the bianary case that the student has answered at least one question right and at least one question wrong.

i=1+1

# Instead we will just adjust the ability estimate by the fixed factor (est.jump)

ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump

# Now we administer the second item:

# We will continue this until we get some heterogeneity in the responses

response.v = items$response

response.ave = sum(response.v)/length(response.v)

while ((response.ave == ceiling(response.ave)) & (num.items >= i) ) {

# This condition will no longer be true when at least one of the items is ansered correctly and one of the items answered incorrectly.

ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump

a=a.base

c=c.base

### Using the KL information criteria

b=optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

p=PL3(true.ability, a,b,c)

response = p > rdraw[i]

items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])

response.v = items$response

response.ave = sum(response.v)/length(response.v)

i=i+1

}

items

true.ability

# Now that we have some heterogeneity of responses we can use the MLE estimator

MLE = function(theta) sum(log((items$response==T)*PL3(theta, items$a, items$b, items$c) +

(items$response==F)*(1-PL3(theta, items$a, items$b, items$c))))

optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))

# Okay, it seems to be working properly now we will loop through using the above function.

# The only thing we need change is the ability estimate.

while (num.items >= i) {

ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

a=a.base

c=c.base

### Using the KL information criteria

b=optim(0,fn=KLmax , method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

p=PL3(true.ability, a,b,c)

response = p > rdraw[i]

items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])

response.v = items$response

response.ave = sum(response.v)/length(response.v)

i=i+1

}

ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

# Save the paths for comparison with Fisher information

items.KL = items

ability.est.KL = ability.est

#######################################################################

#####

##### Now let's compare this with the Fisher information item selection

#####

#######################################################################

ability.est = est.start

items = data.frame(a=NA,b=NA,c=NA,response=NA,p=NA, ability.est=NA)

i = 1

a=a.base

c=c.base

### Now b just equals the ability estimate

b=ability.est[i]

p=PL3(true.ability, a,b,c)

response = p > rdraw[i]

items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])

i=1+1

ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump

response.v = items$response

response.ave = sum(response.v)/length(response.v)

while ((response.ave == ceiling(response.ave)) & (num.items >= i) ) {

ability.est[i] = ability.est[i-1]-(-1)^(response)*est.jump

a=a.base

c=c.base

### Change how b is selected

b=ability.est[i]

p=PL3(true.ability, a,b,c)

response = p > rdraw[i]

items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])

response.v = items$response

response.ave = sum(response.v)/length(response.v)

i=i+1

}

while (num.items >= i) {

ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

a=a.base

c=c.base

### Change how b is selected

b=ability.est[i]

p=PL3(true.ability, a,b,c)

response = p > rdraw[i]

items[i,] = c(a=a, b=b, c=c, response=response, p=p, ability.est=ability.est[i])

response.v = items$response

response.ave = sum(response.v)/length(response.v)

i=i+1

}

# One final ability estimate calculation

ability.est[i] = optim(0,MLE, method="Brent", lower=-6, upper=6, control=list(fnscale = -1))$par

ability.est.Fisher = ability.est

items.Fisher = items

#######################################################################

minmax = function(x) c(min(x),max(x))

plot(0:num.items, ability.est.Fisher, type="l", main="CAT Estimates Ideally Converge on True Ability",

, xlab="Number of Items Administered", ylim=minmax(c(ability.est.Fisher,ability.est.KL)),

ylab="Estimated Ability", lwd=3, col="blue")

lines(0:num.items, ability.est.KL, lwd=2, col="green")

abline(h=true.ability, col="red", lwd=2)

## Graph I

# I have included two different graphs because I thought both of these were quite peculiar.

# The first graph is strange because in this chance draw the KL looks like it is doing as well or better than the Fisher.

# In almost all other samples that I ran it seemed to do considerably worse.

## Graph II

# Let's see what items both methods are choosing and how they relate to the true ability estimate.

plot(1:num.items, items.Fisher$b, type="l", main="Item's are selected using different criteria",

, xlab="Number of Items Administered", ylim=minmax(c(items.KL$b,items.Fisher$b)),

ylab="Estimated Ability", lwd=3, col="blue")

lines(1:num.items, items.KL$b, lwd=2, col="green")

abline(h=true.ability, col="red", lwd=2)

# Blue is Fisher

# Green is KL

# I have included two different graphs because I thought both of these were quite peculiar.

# The first graph is strange because in this chance draw the KL looks like it is doing as well or better than the Fisher.

# In almost all other samples that I ran it seemed to do considerably worse.

# Let's see what items both methods are choosing and how they relate to the true ability estimate.

plot(1:num.items, items.Fisher$b, type="l", main="Item's are selected using different criteria",

, xlab="Number of Items Administered", ylim=minmax(c(items.KL$b,items.Fisher$b)),

ylab="Item Difficulty", lwd=3, col="blue")

lines(1:num.items, items.KL$b, lwd=2, col="green")

abline(h=true.ability, col="red", lwd=2)

# The y-axis should read item difficulty.

# We can see the items tend to be closer to zero in difficulty for the KL selection (these items correspond to the most recent graph)

# From this simulation, it appears KL is an inferior item selection criteria.

# It is possible that this simulation is not taking into consideration all of the features of the KL criteria. Perhaps, if the item pool has differences in the parameters a and c then KL will perform better. I will leave that for a later simulation.

## No comments:

## Post a Comment