## Saturday, September 22, 2012

### OLS w correlation between x and u

* Stata and # R Simulation

set more off
* Turn the scroll lock off (I have it set to permenently off on my computer)

clear
* Clear the old data

set obs 1000
* Tell stata you want 1000 observations available to be used for data generation.

gen x = rnormal()
* This is some random explanatory variable

sort x
* Now the data is ordered from the smallest x to the largest x

gen id = _n
* This will count from 1 to 1000 so that each observation has a unique id

gen u = rnormal()
* u is the unobserved error in the model

sort u
* Now the data is ordered from the smallest u to the largest u

gen x2 = .
* We are going to match up the smallest u with the smallest x.

forv i=1/1000 {
replace x2 = x[`i'] if id[`i']==_n
}

drop x
* Get rid of the original x variable
rename x2 x

corr x u
/*           |        x        u
-------------+------------------
x |   1.0000
u |   0.9980   1.0000  */

gen y = 5 + 2*x + u*5

reg y x

/*

Source |       SS       df       MS              Number of obs =    1000
-------------+------------------------------           F(  1,   998) =       .
Model |  50827.8493     1  50827.8493           Prob > F      =  0.0000
Residual |  55.8351723   998  .055947066           R-squared     =  0.9989
Total |  50883.6844   999  50.9346191           Root MSE      =  .23653

------------------------------------------------------------------------------
y |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
x |   7.145123   .0074963   953.15   0.000     7.130412    7.159833
_cons |   4.858391   .0074869   648.92   0.000     4.843699    4.873083
------------------------------------------------------------------------------
*/

# Now the same thing in R

x = sort(rnorm(1000))
u = sort(rnorm(1000))

y = 5 + 2*x + u*5

summary(lm(y~x))
# This simulation turns out to be extremely easy in R