* Stata code, custom command

* vlook is a powerful command for rapidly assessing the capability of current data to explain one variable

* Drops the program vlook if it already is in memory

cap program drop vlook

* Defines the program vlook

program define vlook, rclass

* Summarizes the variable of interest

sum `1', detail

* Creates a histogram of the variable

hist `1'

*** This sections checks the correlation between the variable and all other variables in the dataset

* Declare empty variable holders

local corr_matrix25

local corr_matrix5

local corr_matrix75

* Loops through all possible variables

foreach v of varlist * {

* Excude the variable being examined

if ("`v'"!="`1'") {

* Correlates each of the variables in the dataset

cap corr `v' `1'

* If there is no error _rc and the correlation is greater than .25 and less then .5 add `v' to the

* global called corr_matrix25.

if (_rc==0&abs(r(rho))>=.25&abs(r(rho))<.5) local corr_matrix25 `corr_matrix25' `v'

* If there is no error _rc and the correlation is greater than .5 and less then .75 add `v' to the

* global called corr_matrix5.

if (_rc==0&abs(r(rho))>=.5&abs(r(rho))<.75) local `corr_matrix5' `corr_matrix5' `v'

* If there is no error _rc and the correlation is greater than or equal to .5 and less then .75 add

* `v' to the global called corr_matrix75.

if (_rc==0&abs(r(rho))>=.75) local `corr_matrix' `corr_matrix75' `v'

}

}

di "`1' is correlated with $corr_matrix25 at (.25<=corr<.5)"

corr `1' $corr_matrix25

di "`1' is correlated with $corr_matrix5 at (.5<=corr<.75)"

corr `1' $corr_matrix5

di "`1' is correlated with $corr_matrix75 at (.75<=corr)"

corr `1' $corr_matrix75

qui reg `1' $corr_matrix75 $corr_matrix5 $corr_matrix25

* This saves the r2 from the previous regression. It is the upper limmit of explainable variance by use

* of the available variables which are at lease .25% correlated with the variable of interest

local upperlimit = string(e(r2),"%9.2f")

*** This sections makes a list of variables with explanatory power to analysze with respect to the variable

*** of interest.

* Creates an empty global to hold variables.

local reg_vars

* Loops through only variables that are at least 50% correlated with the variable of interest.

foreach v in $corr_matrix75 $corr_matrix5 {

* This creates a flag to tell stata to exclude a variable from the regression.

local flag=0

* Loops through all of the variables that have already been added to reg_vars.

foreach vv in $reg_vars {

qui corr `v' `vv'

* If any variable is too correlated with any other variable it is excluded from the regression.

if (("`v'"!="`vv'")&(abs(r(rho))>=.90)) {

local flag=1

di "Excluding `v' from regression because of high correlation with `vv'"

* Ends the most nested loop

break

}

}

* If the flag has not been sprung add the variable `v' to the regression

if (`flag'==0) local reg_vars `reg_vars' `v'

}

return local regvars `regvars'

*** This does a detailed quantile analysis of the explanatory power of the variables

*** around different quantiles.

local quantile_list 125 250 375 500 625 750 875

* First declare a matrix to hold the results of the quantile regression

local nrows = wordcount("`reg_vars'")

local ncols = wordcount("`quantile_list'")

mata: A=J(strtoreal(st_local("nrows"))+2, strtoreal(st_local("ncols")), 0)

* Assign the quantile levels to the first row of the matrix

mata: i=0

foreach v in `quantile_list' {

mata: i++

mata: A[1,i]=strtoreal(st_local("v"))/1000

}

mata: i=0

foreach q in `quantile_list' {

mata: i++

qui di "qreg `1' `reg_vars', quantile(`=`q'/1000')"

qui qreg `1' `reg_vars', quantile(`=`q'/1000')

mata: ii=1

foreach v in `reg_vars' _cons {

mata: ii++

cap local temp=string(_b[`v'],"%9.2f")

if _rc!=0 local temp=.

mata: A[ii,i]=strtoreal(st_local("temp"))

}

}

* This displays the variables corresponding to what rows in the quantile regression.

di "Rows of the coefficient matrix on the quantile regression:"

local i = 0

foreach v in quantile `reg_vars' _cons{

local i = `i'+1

di "`i' - `v'"

}

mata:A

di _newline "reg `1' `reg_vars'"

reg `1' `reg_vars'

di "The upper limit on explainable variance is `upperlimit'"

end

*******************************************************

* Command Example *

use http://www.ats.ucla.edu/stat/stata/dae/crime, clear

vlook crime

