* Stata code, custom command
* vlook is a powerful command for rapidly assessing the capability of current data to explain one variable
* vlook is a powerful command for rapidly assessing the capability of current data to explain one variable
* Drops the program vlook if it already is in memory
cap program drop vlook
* Defines the program vlook
program define vlook, rclass
* Summarizes the variable of interest
sum `1', detail
* Creates a histogram of the variable
hist `1'
*** This sections checks the correlation between the variable and all other variables in the dataset
* Declare empty variable holders
local corr_matrix25
local corr_matrix5
local corr_matrix75
* Loops through all possible variables
foreach v of varlist * {
* Excude the variable being examined
if ("`v'"!="`1'") {
* Correlates each of the variables in the dataset
cap corr `v' `1'
* If there is no error _rc and the correlation is greater than .25 and less then .5 add `v' to the
* global called corr_matrix25.
if (_rc==0&abs(r(rho))>=.25&abs(r(rho))<.5) local corr_matrix25 `corr_matrix25' `v'
* If there is no error _rc and the correlation is greater than .5 and less then .75 add `v' to the
* global called corr_matrix5.
if (_rc==0&abs(r(rho))>=.5&abs(r(rho))<.75) local `corr_matrix5' `corr_matrix5' `v'
* If there is no error _rc and the correlation is greater than or equal to .5 and less then .75 add
* `v' to the global called corr_matrix75.
if (_rc==0&abs(r(rho))>=.75) local `corr_matrix' `corr_matrix75' `v'
}
}
di "`1' is correlated with $corr_matrix25 at (.25<=corr<.5)"
corr `1' $corr_matrix25
di "`1' is correlated with $corr_matrix5 at (.5<=corr<.75)"
corr `1' $corr_matrix5
di "`1' is correlated with $corr_matrix75 at (.75<=corr)"
corr `1' $corr_matrix75
qui reg `1' $corr_matrix75 $corr_matrix5 $corr_matrix25
* This saves the r2 from the previous regression. It is the upper limmit of explainable variance by use
* of the available variables which are at lease .25% correlated with the variable of interest
local upperlimit = string(e(r2),"%9.2f")
*** This sections makes a list of variables with explanatory power to analysze with respect to the variable
*** of interest.
* Creates an empty global to hold variables.
local reg_vars
* Loops through only variables that are at least 50% correlated with the variable of interest.
foreach v in $corr_matrix75 $corr_matrix5 {
* This creates a flag to tell stata to exclude a variable from the regression.
local flag=0
* Loops through all of the variables that have already been added to reg_vars.
foreach vv in $reg_vars {
qui corr `v' `vv'
* If any variable is too correlated with any other variable it is excluded from the regression.
if (("`v'"!="`vv'")&(abs(r(rho))>=.90)) {
local flag=1
di "Excluding `v' from regression because of high correlation with `vv'"
* Ends the most nested loop
break
}
}
* If the flag has not been sprung add the variable `v' to the regression
if (`flag'==0) local reg_vars `reg_vars' `v'
}
return local regvars `regvars'
*** This does a detailed quantile analysis of the explanatory power of the variables
*** around different quantiles.
local quantile_list 125 250 375 500 625 750 875
* First declare a matrix to hold the results of the quantile regression
local nrows = wordcount("`reg_vars'")
local ncols = wordcount("`quantile_list'")
mata: A=J(strtoreal(st_local("nrows"))+2, strtoreal(st_local("ncols")), 0)
* Assign the quantile levels to the first row of the matrix
mata: i=0
foreach v in `quantile_list' {
mata: i++
mata: A[1,i]=strtoreal(st_local("v"))/1000
}
mata: i=0
foreach q in `quantile_list' {
mata: i++
qui di "qreg `1' `reg_vars', quantile(`=`q'/1000')"
qui qreg `1' `reg_vars', quantile(`=`q'/1000')
mata: ii=1
foreach v in `reg_vars' _cons {
mata: ii++
cap local temp=string(_b[`v'],"%9.2f")
if _rc!=0 local temp=.
mata: A[ii,i]=strtoreal(st_local("temp"))
}
}
* This displays the variables corresponding to what rows in the quantile regression.
di "Rows of the coefficient matrix on the quantile regression:"
local i = 0
foreach v in quantile `reg_vars' _cons{
local i = `i'+1
di "`i' - `v'"
}
mata:A
di _newline "reg `1' `reg_vars'"
reg `1' `reg_vars'
di "The upper limit on explainable variance is `upperlimit'"
end
*******************************************************
* Command Example *
use http://www.ats.ucla.edu/stat/stata/dae/crime, clear
vlook crime
Hi,
ReplyDeleteI saved the above as vlook.ado and got this error when I tried to run the example:
system limit exceeded - see manual
Sorry, I have not seen this error before. The command is not as "powerful" as I thought it was since in all of the applications that I have tried so far it has not been particularly useful.
Delete