* This simulation looks at what happens when the underlying data generating process is not normal (key assumption with Tobit).
* This post is a follow up to the previous post on Bottom Coding and Tobit on May 21st.
set seed 11
* Let's first set up the simulation
clear
* Set the number of observations
set obs 1000
* Set the random seed
set seed 101
* Generate some explanatory variables
gen man_num_sibs = rpoisson(3)
label var man_num_sibs "The number of sibblings that the man has"
gen woman_num_sibs = rpoisson(3)
label var woman_num_sibs "The number of sibblings that the spouse has"
gen income = abs(rnormal())*2
label var income "Family income, 10k/year"
* Generate the number of children each man has with the error being
* drawn from a poisson distribution which has either positive or negative
* signs randomly
gen e1 = rpoisson(3)*(-1)^rbinomial(1,.5)
sum e1
replace e1=e1/r(sd)*2
gen Y1 = .8*man_num_sibs + .6*woman_num_sibs - 2*income + e1
label var Y1 "The true underlying amount of children some men would have"
* Retrict the number of children to the positive range.
gen Nchildren1 = max(Y1,0)
tobit Nchildren1 man_num_sibs woman_num_sibs income, ll(0)
* Despite a very non-normal error the tobit estimator still works quite well
* Generate the number of children each man has with the error being
* drawn from a log normal distribution with random positive or negative signs
gen e2 = exp(rnormal())*(-1)^rbinomial(1,.5)
sum e2
replace e2=e2/r(sd)*2
gen Y2 = .8*man_num_sibs + .6*woman_num_sibs - 2*income + e2
label var Y2 "The true underlying amount of children some men would have"
* Retrict the number of children to the positive range.
gen Nchildren2 = max(Y2,0)
tobit Nchildren2 man_num_sibs woman_num_sibs income, ll(0)
* Despite a very non-normal error the tobit estimator still works quite well
* Generate the number of children each man has with the error being
* drawn from a double log normal distribution with random positive or negative signs
gen e3 = exp(exp(rnormal()))*(-1)^rbinomial(1,.5)
sum e3
replace e3=e3/r(sd)*2
gen Y3 = .8*man_num_sibs + .6*woman_num_sibs - 2*income + e3
label var Y3 "The true underlying amount of children some men would have"
* Retrict the number of children to the positive range.
gen Nchildren3 = max(Y3,0)
tobit Nchildren3 man_num_sibs woman_num_sibs income, ll(0)
* Despite a very non-normal error the tobit estimator still works pretty good.
* Compare with OLS. Hard to tell which is preferred from this. It would be useful to use
* a monte carlo simulation to discover if the tobit seems unbiased. See the previous post
* on using simulations to understand bias.
reg Nchildren3 man_num_sibs woman_num_sibs income
* Generate the number of children each man has with the error being
* drawn from a chi-squared distribution with random positive or negative signs
gen e4 = (-1)^rbinomial(1,.5)*(rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2+ ///
rnormal()^2+rnormal()^2+rnormal()^2+rnormal()^2)
label var e4 "Bimodal error - tobit still works"
sum e4
replace e4=e4/r(sd)*2
gen Y4 = .8*man_num_sibs + .6*woman_num_sibs - 2*income + e4
label var Y3 "The true underlying amount of children some men would have"
* Retrict the number of children to the positive range.
gen Nchildren4 = max(Y4,0)
tobit Nchildren4 man_num_sibs woman_num_sibs income, ll(0)
* Despite a very non-normal error the tobit estimator still works quite well
sum e?
hist e4, kden
No comments:
Post a Comment