In this post I will present code for detecting deviations in variable values for data that has been entered twice. First I will simulate some data. Then I will detecting deviations.

clear

set obs 300

* Define a class ID

gen cID = ceil(_n/20)

* This will generate a variable that counts twenty values per classroom

* Define a student ID.

* We have two entries per student which are defined

* only within each class from 0 to 9

gen sID = mod(ceil(_n/2)-1, 10)

* Imagine we have some variables that are inconsistently

* recorded at different times

gen a = ceil(_n/3)

* a varies every other student

gen b = ceil(_n/5)

* b varies every third student

gen c = ceil(_n/7)

* c varies every fourth student

* Let's also imagine that variables a and c are strings

tostring a c, replace

* Let's get an idea of what this looks like

list in 1/6

* | cID sID a b c |

* |-----------------------|

* 1. | 1 0 1 1 1 |

* 2. | 1 0 1 1 1 |

* 3. | 1 1 1 1 1 |

* 4. | 1 1 2 1 1 |

* 5. | 1 2 2 1 1 |

* 6. | 1 2 2 2 1 |

list in 21/22

* | cID sID a b c |

* |-----------------------|

* 21. | 2 0 7 5 3 |

* 22. | 2 0 8 5 4 |

* ----------------------------------------------------------------

* ----------------------------------------------------------------* Now let's start what we need to detect deviations

* Specify the group variables

global grouping cID sID

* Specify the variables to check

global checkvariables a b c

egen uniqueID = group($grouping)

* This gives us a total of 150 'groups' which is what we expect

* since students vary.

* Let's imagine that twenty of our entries are missing

forv i=1/20 {

drop if _n==`=ceil(runiform()*_N)'

}

* We would like to detect divergences in the variables a b and c

* within a student ID group.

* We would also like to detect if we only have one entry for a student

* First we grab the max uniqueID

quietly: sum uniqueID

* First we will do a student by student level search

forv i=1(1)`r(max)' {

preserve

qui keep if uniqueID == `i'

local divergence

foreach v of varlist ${checkvariables} {

quietly: tabulate `v'

if (r(r)>1) local divergence `divergence' `v'

}

local sID = sID

local cID = cID

if ("`divergence'" != "") ///

di "Student `sID' in class `cID' variable(s): `divergence'"

if (_N==1) ///

di "Student `sID' in class `cID' has only ONE entry"

restore

}

* ----------------------------------------------------------------

* ----------------------------------------------------------------

* Output

Student 1 in class 1 variable(s): a

Student 2 in class 1 variable(s): b

Student 3 in class 1 variable(s): c

Student 4 in class 1 variable(s): a

Student 7 in class 1 variable(s): a b

Student 0 in class 2 variable(s): a c

Student 1 in class 2 has only ONE entry

Student 2 in class 2 variable(s): b

Student 3 in class 2 variable(s): a

Student 6 in class 2 variable(s): a

Student 7 in class 2 variable(s): b c

Student 9 in class 2 variable(s): a

Student 2 in class 3 variable(s): a b

Student 4 in class 3 variable(s): c

Student 5 in class 3 variable(s): a

Student 7 in class 3 has only ONE entry

Student 8 in class 3 variable(s): a

Student 9 in class 3 has only ONE entry

Student 1 in class 4 has only ONE entry

Student 2 in class 4 variable(s): b

Student 3 in class 4 has only ONE entry

Student 4 in class 4 variable(s): a

Student 7 in class 4 variable(s): a b

Student 8 in class 4 has only ONE entry

Student 0 in class 5 variable(s): a

Student 2 in class 5 has only ONE entry

Student 3 in class 5 has only ONE entry

Student 5 in class 5 variable(s): c

Student 6 in class 5 variable(s): a

Student 7 in class 5 variable(s): b

Student 9 in class 5 variable(s): a

Student 2 in class 6 variable(s): a b c

Student 5 in class 6 has only ONE entry

Student 7 in class 6 has only ONE entry

Student 8 in class 6 variable(s): a

Student 9 in class 6 variable(s): c

Student 1 in class 7 variable(s): a

Student 2 in class 7 variable(s): b

Student 4 in class 7 variable(s): a

Student 6 in class 7 variable(s): c

Student 7 in class 7 variable(s): a b

Student 0 in class 8 variable(s): a

Student 2 in class 8 has only ONE entry

Student 3 in class 8 variable(s): a c

Student 6 in class 8 variable(s): a

Student 7 in class 8 variable(s): b

Student 9 in class 8 variable(s): a

Student 0 in class 9 variable(s): c

Student 2 in class 9 variable(s): a b

Student 3 in class 9 has only ONE entry

Student 5 in class 9 variable(s): a

Student 7 in class 9 variable(s): b c

Student 8 in class 9 variable(s): a

Student 1 in class 10 variable(s): a

Student 2 in class 10 has only ONE entry

Student 4 in class 10 has only ONE entry

Student 7 in class 10 variable(s): a b

Student 0 in class 11 variable(s): a

Student 1 in class 11 variable(s): c

Student 2 in class 11 variable(s): b

Student 3 in class 11 variable(s): a

Student 6 in class 11 has only ONE entry

Student 7 in class 11 has only ONE entry

Student 8 in class 11 variable(s): c

Student 9 in class 11 variable(s): a

Student 2 in class 12 variable(s): a b

Student 5 in class 12 variable(s): a c

Student 7 in class 12 variable(s): b

Student 8 in class 12 variable(s): a

Student 1 in class 13 variable(s): a

Student 2 in class 13 variable(s): b c

Student 4 in class 13 variable(s): a

Student 5 in class 13 has only ONE entry

Student 7 in class 13 variable(s): a b

Student 9 in class 13 variable(s): c

Student 0 in class 14 variable(s): a

Student 1 in class 14 has only ONE entry

Student 2 in class 14 variable(s): b

Student 3 in class 14 variable(s): a

Student 6 in class 14 variable(s): a c

Student 7 in class 14 variable(s): b

Student 9 in class 14 variable(s): a

Student 2 in class 15 variable(s): a b

Student 3 in class 15 variable(s): c

Student 5 in class 15 has only ONE entry

Student 6 in class 15 has only ONE entry

Student 7 in class 15 variable(s): b

Student 8 in class 15 variable(s): a

clear

set obs 300

* Define a class ID

gen cID = ceil(_n/20)

* This will generate a variable that counts twenty values per classroom

* Define a student ID.

* We have two entries per student which are defined

* only within each class from 0 to 9

gen sID = mod(ceil(_n/2)-1, 10)

* Imagine we have some variables that are inconsistently

* recorded at different times

gen a = ceil(_n/3)

* a varies every other student

gen b = ceil(_n/5)

* b varies every third student

gen c = ceil(_n/7)

* c varies every fourth student

* Let's also imagine that variables a and c are strings

tostring a c, replace

* Let's get an idea of what this looks like

list in 1/6

* | cID sID a b c |

* |-----------------------|

* 1. | 1 0 1 1 1 |

* 2. | 1 0 1 1 1 |

* 3. | 1 1 1 1 1 |

* 4. | 1 1 2 1 1 |

* 5. | 1 2 2 1 1 |

* 6. | 1 2 2 2 1 |

list in 21/22

* | cID sID a b c |

* |-----------------------|

* 21. | 2 0 7 5 3 |

* 22. | 2 0 8 5 4 |

* ----------------------------------------------------------------

* ----------------------------------------------------------------* Now let's start what we need to detect deviations

* Specify the group variables

global grouping cID sID

* Specify the variables to check

global checkvariables a b c

egen uniqueID = group($grouping)

* This gives us a total of 150 'groups' which is what we expect

* since students vary.

* Let's imagine that twenty of our entries are missing

forv i=1/20 {

drop if _n==`=ceil(runiform()*_N)'

}

* We would like to detect divergences in the variables a b and c

* within a student ID group.

* We would also like to detect if we only have one entry for a student

* First we grab the max uniqueID

quietly: sum uniqueID

* First we will do a student by student level search

forv i=1(1)`r(max)' {

preserve

qui keep if uniqueID == `i'

local divergence

foreach v of varlist ${checkvariables} {

quietly: tabulate `v'

if (r(r)>1) local divergence `divergence' `v'

}

local sID = sID

local cID = cID

if ("`divergence'" != "") ///

di "Student `sID' in class `cID' variable(s): `divergence'"

if (_N==1) ///

di "Student `sID' in class `cID' has only ONE entry"

restore

}

* ----------------------------------------------------------------

* ----------------------------------------------------------------

* Output

Student 1 in class 1 variable(s): a

Student 2 in class 1 variable(s): b

Student 3 in class 1 variable(s): c

Student 4 in class 1 variable(s): a

Student 7 in class 1 variable(s): a b

Student 0 in class 2 variable(s): a c

Student 1 in class 2 has only ONE entry

Student 2 in class 2 variable(s): b

Student 3 in class 2 variable(s): a

Student 6 in class 2 variable(s): a

Student 7 in class 2 variable(s): b c

Student 9 in class 2 variable(s): a

Student 2 in class 3 variable(s): a b

Student 4 in class 3 variable(s): c

Student 5 in class 3 variable(s): a

Student 7 in class 3 has only ONE entry

Student 8 in class 3 variable(s): a

Student 9 in class 3 has only ONE entry

Student 1 in class 4 has only ONE entry

Student 2 in class 4 variable(s): b

Student 3 in class 4 has only ONE entry

Student 4 in class 4 variable(s): a

Student 7 in class 4 variable(s): a b

Student 8 in class 4 has only ONE entry

Student 0 in class 5 variable(s): a

Student 2 in class 5 has only ONE entry

Student 3 in class 5 has only ONE entry

Student 5 in class 5 variable(s): c

Student 6 in class 5 variable(s): a

Student 7 in class 5 variable(s): b

Student 9 in class 5 variable(s): a

Student 2 in class 6 variable(s): a b c

Student 5 in class 6 has only ONE entry

Student 7 in class 6 has only ONE entry

Student 8 in class 6 variable(s): a

Student 9 in class 6 variable(s): c

Student 1 in class 7 variable(s): a

Student 2 in class 7 variable(s): b

Student 4 in class 7 variable(s): a

Student 6 in class 7 variable(s): c

Student 7 in class 7 variable(s): a b

Student 0 in class 8 variable(s): a

Student 2 in class 8 has only ONE entry

Student 3 in class 8 variable(s): a c

Student 6 in class 8 variable(s): a

Student 7 in class 8 variable(s): b

Student 9 in class 8 variable(s): a

Student 0 in class 9 variable(s): c

Student 2 in class 9 variable(s): a b

Student 3 in class 9 has only ONE entry

Student 5 in class 9 variable(s): a

Student 7 in class 9 variable(s): b c

Student 8 in class 9 variable(s): a

Student 1 in class 10 variable(s): a

Student 2 in class 10 has only ONE entry

Student 4 in class 10 has only ONE entry

Student 7 in class 10 variable(s): a b

Student 0 in class 11 variable(s): a

Student 1 in class 11 variable(s): c

Student 2 in class 11 variable(s): b

Student 3 in class 11 variable(s): a

Student 6 in class 11 has only ONE entry

Student 7 in class 11 has only ONE entry

Student 8 in class 11 variable(s): c

Student 9 in class 11 variable(s): a

Student 2 in class 12 variable(s): a b

Student 5 in class 12 variable(s): a c

Student 7 in class 12 variable(s): b

Student 8 in class 12 variable(s): a

Student 1 in class 13 variable(s): a

Student 2 in class 13 variable(s): b c

Student 4 in class 13 variable(s): a

Student 5 in class 13 has only ONE entry

Student 7 in class 13 variable(s): a b

Student 9 in class 13 variable(s): c

Student 0 in class 14 variable(s): a

Student 1 in class 14 has only ONE entry

Student 2 in class 14 variable(s): b

Student 3 in class 14 variable(s): a

Student 6 in class 14 variable(s): a c

Student 7 in class 14 variable(s): b

Student 9 in class 14 variable(s): a

Student 2 in class 15 variable(s): a b

Student 3 in class 15 variable(s): c

Student 5 in class 15 has only ONE entry

Student 6 in class 15 has only ONE entry

Student 7 in class 15 variable(s): b

Student 8 in class 15 variable(s): a

## No comments:

## Post a Comment