## Saturday, April 28, 2012

### vlook

* Stata code, custom command
* vlook is a powerful command for rapidly assessing the capability of current data to explain one variable

* This code creates a stata command that does a diagnostic of a single variable in a data set.

* Drops the program vlook if it already is in memory
cap program drop vlook

* Defines the program vlook
program define vlook, rclass

* Summarizes the variable of interest
sum 1', detail

* Creates a histogram of the variable
hist 1'

*** This sections checks the correlation between the variable and all other variables in the dataset
* Declare empty variable holders
local corr_matrix25
local corr_matrix5
local corr_matrix75

* Loops through all possible variables
foreach v of varlist * {
* Excude the variable being examined
if ("v'"!="1'") {
* Correlates each of the variables in the dataset
cap corr v' 1'
* If there is no error _rc and the correlation is greater than .25 and less then .5 add v' to the
* global called corr_matrix25.
if  (_rc==0&abs(r(rho))>=.25&abs(r(rho))<.5) local corr_matrix25 corr_matrix25' v'
* If there is no error _rc and the correlation is greater than .5 and less then .75 add v' to the
* global called corr_matrix5.
if  (_rc==0&abs(r(rho))>=.5&abs(r(rho))<.75) local corr_matrix5'  corr_matrix5' v'
* If there is no error _rc and the correlation is greater than or equal to .5 and less then .75 add
* v' to the global called corr_matrix75.
if  (_rc==0&abs(r(rho))>=.75) local corr_matrix' corr_matrix75' v'
}
}

di "1' is correlated with $corr_matrix25 at (.25<=corr<.5)" corr 1'$corr_matrix25
di "1' is correlated with $corr_matrix5 at (.5<=corr<.75)" corr 1'$corr_matrix5
di "1' is correlated with $corr_matrix75 at (.75<=corr)" corr 1'$corr_matrix75

qui reg 1' $corr_matrix75$corr_matrix5 $corr_matrix25 * This saves the r2 from the previous regression. It is the upper limmit of explainable variance by use * of the available variables which are at lease .25% correlated with the variable of interest local upperlimit = string(e(r2),"%9.2f") *** This sections makes a list of variables with explanatory power to analysze with respect to the variable *** of interest. * Creates an empty global to hold variables. local reg_vars * Loops through only variables that are at least 50% correlated with the variable of interest. foreach v in$corr_matrix75 $corr_matrix5 { * This creates a flag to tell stata to exclude a variable from the regression. local flag=0 * Loops through all of the variables that have already been added to reg_vars. foreach vv in$reg_vars {
qui corr v' vv'
* If any variable is too correlated with any other variable it is excluded from the regression.
if (("v'"!="vv'")&(abs(r(rho))>=.90)) {
local flag=1
di "Excluding v' from regression because of high correlation with vv'"
* Ends the most nested loop
break
}
}
* If the flag has not been sprung add the variable v' to the regression
if (flag'==0)   local reg_vars reg_vars' v'
}

return local regvars regvars'

*** This does a detailed quantile analysis of the explanatory power of the variables
*** around different quantiles.

local quantile_list 125 250 375 500 625 750 875
* First declare a matrix to hold the results of the quantile regression

local nrows = wordcount("reg_vars'")
local ncols = wordcount("quantile_list'")

mata: A=J(strtoreal(st_local("nrows"))+2, strtoreal(st_local("ncols")), 0)

* Assign the quantile levels to the first row of the matrix
mata: i=0
foreach v in quantile_list' {
mata: i++
mata:  A[1,i]=strtoreal(st_local("v"))/1000
}

mata: i=0

foreach q in quantile_list'  {
mata: i++
qui di "qreg 1' reg_vars', quantile(=q'/1000')"
qui qreg 1' reg_vars', quantile(=q'/1000')
mata: ii=1

foreach v in reg_vars' _cons {
mata: ii++
cap local temp=string(_b[v'],"%9.2f")
if _rc!=0 local temp=.
mata: A[ii,i]=strtoreal(st_local("temp"))
}
}

* This displays the variables corresponding to what rows in the quantile regression.
di "Rows of the coefficient matrix on the quantile regression:"
local i = 0
foreach v in quantile reg_vars' _cons{
local i = i'+1
di "i' - v'"
}
mata:A

di _newline "reg 1' reg_vars'"
reg 1' reg_vars'

di "The upper limit on explainable variance is upperlimit'"

end

*******************************************************
*                  Command Example                    *

use http://www.ats.ucla.edu/stat/stata/dae/crime, clear
vlook crime