options mlogic mprint symbolgen; %macro tablew(server,indatay, ywt,yvar,rowvar,colvar,pagevar,rowfmt,colfmt,pagefmt, TC,cel_p,row_p,col_p,whervar,hhwhere,llwhere); /* Use server=1 for webserver. Other, for testing. SAS macro for computing summary statistics and standard errors from NPTS. NOTE: THIS DRAFT MACRO IS NOT READY FOR PUBLIC RELEASE. NOTE: This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. AUTHOR: Rick Schmoyer (ric@ornl.gov) ACKNOWLEDGEMENT: Thanks to Jenny Young for finding bugs. ARGUMENTS: indatay=input data set containing y variable (yvar). Necessary. MUST BE SORTED BY VARSTRAT SUBSTRAT HOUSEID; Unchanged on output. ywt=final sampling weight variable. Necessary. yvar=analysis variable. Optional. rowvar=first class variable (defines rows). Optional, but necessary if colvar is defined. colvar=second class variable (defines columns). Optional, but necessary if pagevar is defined. pagevar=third class variable (defines pages). Optional. rowfmt=name of user-supplied format for row variable (to override default) colfmt=name of user-supplied format for column variable (to override default) pagefmt=name of user-supplied format for page variable (to override default) TC=table code: T for total, other for ratio (if possible). cel_p: 1 for cell percent standard errors row_p: 1 for row percent standard errors col_p: 1 for column percent standard errors whervar=list of 'where' variables hhwhere=Household-level where clause (logically excludes households) llwhere=Lower-lever where clause (where clause at level lower than household, e.g., of person, vehicle, trip) NOTE: Both row and column must be defined for any of the percent SEs. NOTE: Formats entered implicitly along with data must be in library 'LIBRARY'. Formats entered explicitly as macro arguments must be in default work library. NOTE: Must have mautosource options and SASAUTOS= defined right to call SAS system macros (see sasautos). VARIABLES CREATED: NOTE: THESE VARIABLE NAMES ARE RESERVED WORDS. &rowvar Same as input &colvar Same as input &pagevar Same as input &yvar If &yvar is not blank then weighted sum of input &yvar. If &yvar is blank, then macro variale yvar is reassigned value &ywt, and &yvar becomes sum of &yvar (&ywt). &ywt Sum of input &ywt std_erry Standard error of &yvar total se_ywt Standard error of &ywt total ratioy Ratio of &yvar to &ywt totals se_raty Standard error of ratioy--based on Taylor approximation NOTE: If ratio estimate DENOMINATORS turn out nonpositive, ratio estimates and standard errors are not computed. Only one data set is accessed, but it may contain variables merged in a priori from other data sets, which may be used for analysis or where variables. To compute the standard errors for a particular analysis, we need to know the count of households for that analysis. A household should be counted for the analysis unless (1) the household is excluded--at the household level--by a where statement, or (2) for each observation for the household, at least one of the variables needed for the analysis is missing (so that the household itself is effectively missing). Algorithm for counting households: (1) If one or more of the variables for an observation is missing, set the weight for that observation to zero and set CntCode=0. Otherwise set CntCode=1 for that observation. (2) If a lower level where (llwhere) statement excludes an observation, set the weight for that observation to zero and CntCode=1. (3) Sum to household level. If CntCode=0 (all missing for the household), or if observation is excluded by an upper level (household-level) where (hhwhere) clause, then delete. (4) Count up the households. NOTE: To do this we need to know which where statements components are at the household level. Data should be merged a priori to include higher-level variables, and MERGED DATA SHOULD CONTAIN COUNT MARKER OBSERVATIONS WITH WEIGHT . (missing) FOR HOUSEHOLDS WITHOUT LOWER LEVEL OBSERVATIONS (E.G., NO TRIPS), AND NO OTHER OBSERVATIONS SHOULD HAVE WEIGHT EQUAL TO . . This may require specially prepared data sets. (End main comment section.) */ %local dsn dsnx dsncopy dsnh crossx crossy covx covy std_errx dsid rfmt rowtype cfmt coltype pfmt pagetype rc rowlib collib pagelib ycode rowf colf pagf yvarlab ywtlab; %if &rowfmt ne %then %let rowlib=work; %else %let rowlib=library; %if &colfmt ne %then %let collib=work; %else %let collib=library; %if &pagefmt ne %then %let pagelib=work; %else %let pagelib=library; *STEP 1: CREATE MAIN DATA SET; %let dsid=%sysfunc(open(&indatay,i)); %if &dsid=0 %then %put %sysfunc(sysmsg()); %if &rowvar ne %then %let rowtype=%sysfunc(vartype(&dsid, %sysfunc(varnum(&dsid,&rowvar)))); %if &colvar ne %then %let coltype=%sysfunc(vartype(&dsid, %sysfunc(varnum(&dsid,&colvar)))); %if &pagevar ne %then %let pagetype=%sysfunc(vartype(&dsid, %sysfunc(varnum(&dsid,&pagevar)))); %let rc=%sysfunc(close(&dsid)); data; set &indatay (keep=varstrat substrat houseid &yvar &ywt &rowvar &colvar &pagevar &whervar); *Salvage missing class variables; %if rowtype=N %then %str(if &rowvar=. then &rowvar=.M;); %else %if rowtype=C %then %str(if &rowvar='' then &rowvar='.';); %if coltype=N %then %str(if &colvar=. then &colvar=.M;); %else %if coltype=C %then %str(if &colvar='' then &colvar='.';); %if pagetype=N %then %str(if &pagevar=. then &pagevar=.M;); %else %if pagetype=C %then %str(if &pagevar='' then &pagevar='.';); *To handle noshows, marker is . ; if &ywt=. then CntCode=1; *For missing; else do; %if &yvar ne %then %do; if &yvar=. then CntCode=0; else CntCode=1; %end; %else %str(CntCode=1;); end; %if %quote(&llwhere) ne %then %do; if not (&llwhere) then do; &ywt=0; CntCode=1; end; %end; %if &yvar= %then %do; %let yvar=&ywt; %let ycode=NR; *No ratio; if _n_=1 then do; length yvarlab $ 40; call label(&ywt,yvarlab); call symput('yvarlab',trim(left(yvarlab))); drop yvarlab; end; %end; %else %do; &yvar=&yvar*&ywt; %let ycode=R; *Ratio; %let crossy=crossy; %let covy=covy; if _n_=1 then do; length yvarlab ywtlab $ 40; call label(&ywt,ywtlab); call label(&yvar,yvarlab); call symput('ywtlab',trim(left(ywtlab))); call symput('yvarlab',trim(left(yvarlab))); drop ywtlab yvarlab; end; %end; %if &rowfmt ne %then %str(format &rowvar &rowfmt..;); %if &colfmt ne %then %str(format &colvar &colfmt..;); %if &pagefmt ne %then %str(format &pagevar &pagefmt..;); run; %let dsn=%trim(%left(&syslast)); *STEP 2: For each class C, sum up weights to household (ultimate cluster) level. Note that C may be an intersection or union. Classes are defined by rowvar colvar pagevar; proc means data=&dsn noprint missing; %if %quote(&hhwhere) ne %then %str(where CntCode gt 0 and &hhwhere;); %else %str(where CntCode gt 0;); %if &rowvar ne %then %str(class &rowvar &colvar &pagevar;); by varstrat substrat houseid; %if &ycode=NR %then %str(var &yvar;); %else %str(var &yvar &ywt;); output out=&dsn (drop=_FREQ_) sum=; run; *Compute nhouse values; data; set &dsn; where _TYPE_=0; run; %let dsnh=%trim(%left(&syslast)); proc means data=&dsnh noprint; var &yvar; by varstrat substrat; output out=&dsnh (drop=_FREQ_ _TYPE_) n=nhouse; %if (&cel_p=1) or (&row_p=1) or (&col_p=1) %then %do; data; set &dsn; run; %let dsncopy=%trim(%left(&syslast)); %end; run; %if &ycode=R %then %do; data &dsn; set &dsn; crossy=&yvar*&ywt; %end; proc sort data=&dsn; by varstrat substrat &rowvar &colvar &pagevar; proc means noprint data=&dsn; by varstrat substrat &rowvar &colvar &pagevar; %if &ycode=R %then %str(var &yvar &ywt &crossy;); %else %str(var &yvar;); output out=&dsn (drop=_freq_ _type_) sum= %if &ycode=R %then %str(uss (&yvar &ywt)=uss ussw;); %else %str(uss (&yvar)=uss;); *Merge in ultimate cluster frequency in each substratum (nhouse); data &dsn; merge &dsn (in=in1) &dsnh; by varstrat substrat; if in1; if nhouse gt 1 then do; std_erry=(nhouse/(nhouse-1))*(uss - &yvar*&yvar/nhouse); %if &ycode=R %then %do; covy=(nhouse/(nhouse-1))*(&crossy - &yvar*&ywt/nhouse); se_ywt=(nhouse/(nhouse-1))*(ussw - &ywt*&ywt/nhouse); %end; end; else do; std_erry=&yvar*&yvar; %if &ycode=R %then %do; se_ywt=&ywt*&ywt; covy=&ywt*&yvar; %end; end; drop nhouse; *STEP 4: Tally up over substrata within classes; proc means noprint data=&dsn nway missing; class &rowvar &colvar &pagevar; %if &ycode=R %then %str(var &yvar &ywt std_erry se_ywt &covy;); %else %str(var &yvar std_erry;); output out=&dsn (drop=_freq_ _type_) sum=; *Obtain the row, column, and page variable types and formats as macro variables; run; %let dsid=%sysfunc(open(&dsn,i)); %if &dsid=0 %then %put %sysfunc(sysmsg()); %if &rowvar ne %then %let rfmt=%sysfunc(varfmt(&dsid, %sysfunc(varnum(&dsid,&rowvar)))); %if &colvar ne %then %let cfmt=%sysfunc(varfmt(&dsid, %sysfunc(varnum(&dsid,&colvar)))); %if &pagevar ne %then %let pfmt=%sysfunc(varfmt(&dsid, %sysfunc(varnum(&dsid,&pagevar)))); %let rc=%sysfunc(close(&dsid)); *Obtain character and numeric maximum row, column, page variable values; data _null_; set &dsn end=eof; length charmax $ 200 nummax 8; retain charmax '' nummax . charlen 0; %if &rowtype=N %then %str(nummax=nummax<>&rowvar;); %else %if &rowtype=C %then %do; charmax=charmax<>&rowvar; charlen=charlen<>length(&rowvar); %end; %if &coltype=N %then %str(nummax=nummax<>&colvar;); %else %if &coltype=C %then %do; charmax=charmax<>&colvar; charlen=charlen<>length(&colvar); %end; %if &pagetype=N %then %str(nummax=nummax<>&pagevar;); %else %if &pagetype=C %then %do; charmax=charmax<>&pagevar; charlen=charlen<>length(&pagevar); %end; if eof then do; call symput('charmax',trim(left(charmax)||'~')); call symput('charlen',trim(left(charlen+1))); call symput('nummax',trim(left(nummax+1))); call symput('rfmt',compress("&rfmt",' .')); call symput('cfmt',compress("&cfmt",' .')); call symput('pfmt',compress("&pfmt",' .')); end; run; *Append to formats with 'All' for 'All'-values; %if &rfmt ne %then %do; proc format library=&rowlib; %if &rowtype=C %then %str(value $nrfmt '.'='Missing' "&charmax"='All' other=[&rfmt];); %else %str(value nrfmt .M='Missing' &nummax='All' other=[&rfmt];); %end; %else %do; proc format; value $nrfmt '.'='Missing' "&charmax"='All'; value nrfmt .M='Missing' &nummax='All'; %end; %if &cfmt ne %then %do; proc format library=&collib; %if &coltype=C %then %str(value $ncfmt '.'='Missing' "&charmax"='All' other=[&cfmt];); %else %str(value ncfmt .M='Missing' &nummax='All' other=[&cfmt];); %end; %else %do; proc format; value $ncfmt '.'='Missing' "&charmax"='All'; value ncfmt .M='Missing' &nummax='All'; %end; %if &pfmt ne %then %do; proc format library=&pagelib; %if &pagetype=C %then %str(value $npfmt '.'='Missing' "&charmax"='All' other=[&pfmt];); %else %str(value npfmt .M='Missing' &nummax='All' other=[&pfmt];); %end; %else %do; proc format; value $npfmt '.'='Missing' "&charmax"='All'; value npfmt .M='Missing' &nummax='All'; %end; *Redefine '' to maximum and associate new 'All' formats; data &dsn; %if &rowtype=C %then %str(length &rowvar $ &charlen;); %if &coltype=C %then %str(length &colvar $ &charlen;); %if &pagetype=C %then %str(length &pagevar $ &charlen;); set &dsn; %if &ycode=R %then %do; if &ywt gt 0 then do; ratioy=&yvar/&ywt; se_raty=(std_erry + se_ywt*ratioy**2 - 2*covy*ratioy)/(&ywt**2); if se_raty ne . then se_raty=sqrt(max(0,se_raty)); end; se_ywt=sqrt(se_ywt); drop covy; label se_ywt="SE total &ywtlab"; label ratioy="Ratio &yvarlab to &ywtlab"; label se_raty="Taylor SE Ratio"; %end; std_erry=sqrt(std_erry); label std_erry="SE total &yvarlab"; %if &rowvar ne %then %do; if &rowvar='' then do; if "&rowtype"='N' then &rowvar=&nummax; else &rowvar="&charmax"; end; %if &rowtype=N %then %str(format &rowvar nrfmt.;); %else %str(format &rowvar $nrfmt.;); %end; %if &colvar ne %then %do; if &colvar='' then do; if "&coltype"='N' then &colvar=&nummax; else &colvar="&charmax"; end; %if &coltype=N %then %str(format &colvar ncfmt.;); %else %str(format &colvar $ncfmt.;); %end; %if &pagevar ne %then %do; if &pagevar='' then do; if "&pagetype"='N' then &pagevar=&nummax; else &pagevar="&charmax"; end; %if &pagetype=N %then %str(format &pagevar npfmt.;); %else %str(format &pagevar $npfmt.;); %end; *Tabulate-formatted output of totals or ratios; %if &TC=T %then %do; * rtg; %if &server=1 %then %tab2htm(capture=on, runmode=b); proc tabulate data=&dsn; class &rowvar &colvar &pagevar; var std_erry; keylabel sum=' '; %if &pagevar ne %then %str(table &pagevar, &rowvar,(std_erry)*&colvar;); %else %if &colvar ne %then %str(table &rowvar,(std_erry)*&colvar;); %else %if &rowvar ne %then %str(table &rowvar,(std_erry);); %else %str(table std_erry;); * rtg; %if &server=1 %then %tab2htm(capture=off, runmode=b, openmode=append, htmlfile=&sehtml, center=Y, cpad=5, ttag=HEADER 3); %end; %else %if &ycode=R %then %do; * rtg; %if &server=1 %then %tab2htm(capture=on, runmode=b); proc tabulate data=&dsn; class &rowvar &colvar &pagevar; var se_raty; keylabel sum=' '; %if &pagevar ne %then %str(table &pagevar, &rowvar,(se_raty)*&colvar;); %else %if &colvar ne %then %str(table &rowvar,(se_raty)*&colvar;); %else %if &rowvar ne %then %str(table &rowvar,(se_raty);); %else %str(table se_raty;); * rtg; %if &server=1 %then %tab2htm(capture=off, runmode=b, openmode=append, htmlfile=&sehtml, center=Y, cpad=5, ttag=HEADER 3); %end; *CELL, ROW, COLUMN PERCENTS AND STANDARD ERRORS; %if (&cel_p=1) or (&row_p=1) or (&col_p=1) %then %do; %let crossx=crossx; %let covx=covx; %macro utility(RCPx); *Idea now is to merge each possibly broader x-total with its corresponding y-total; proc sort data=&dsnx; by varstrat substrat &RCPx houseid; proc sort data=&dsncopy out=&dsn; by varstrat substrat &RCPx houseid; data &dsn; merge &dsn (in=in1) &dsnx (keep=varstrat substrat houseid &RCPx xvar); by varstrat substrat &RCPx houseid; if in1; crossx=xvar*&yvar; drop xvar; *STEP 3: Summary stats--sums over households--by substratum within stratum for each class C. Class statement is not used because _TYPE_ has already been defined.; proc means noprint data=&dsnx; by varstrat substrat &RCPx; var xvar; output out=&dsnx (drop=_freq_ _type_) sum= uss(xvar)=ussx; *Merge in ultimate cluster frequency in each substratum; data &dsnx; merge &dsnx (in=in1) &dsnh; by varstrat substrat; if in1; if nhouse gt 1 then std_errx=(nhouse/(nhouse-1))*(ussx - xvar*xvar/nhouse); *If substratum has only one observation then estimate variance as square of total (expectation is variance + mean squared). This is OK as long as there are not many singletons, but may be extremely conservative if there are.; else std_errx=xvar*xvar; proc sort data=&dsn; by varstrat substrat &rowvar &colvar &pagevar; *Yes, did this before, but not for &crossx; proc means noprint data=&dsn; by varstrat substrat &rowvar &colvar &pagevar; var &yvar &crossx; output out=&dsn (drop=_freq_ _type_) sum= uss (&yvar)=uss; *Merge in x-data plus ultimate cluster frequency in each substratum (nhouse); proc sort data=&dsn; by varstrat substrat &RCPx; data &dsn; merge &dsn (in=in1) &dsnx (keep=nhouse xvar varstrat substrat &RCPx); by varstrat substrat &RCPx; if in1; if nhouse gt 1 then do; std_erry=(nhouse/(nhouse-1))*(uss - &yvar*&yvar/nhouse); covx=(nhouse/(nhouse-1))*(&crossx - xvar*&yvar/nhouse); end; *If substratum has only one observation then estimate variance as square of total (expectation is variance + mean squared). This is OK as long as there are not many singletons, but may be extremely conservative if there are.; else do; covx=xvar*&yvar; std_erry=&yvar*&yvar; end; drop nhouse xvar; *STEP 4: Tally up over substrata within classes; proc means noprint data=&dsn nway missing; class &rowvar &colvar &pagevar; var &yvar std_erry &covx; output out=&dsn (drop=_freq_ _type_) sum=; %if &RCPx ne %then %do; proc means noprint data=&dsnx nway missing; class &RCPx; var xvar std_errx; output out=&dsnx (drop=_freq_ _type_) sum=; proc sort data=&dsn; by &RCPx; data &dsn; merge &dsn &dsnx; by &RCPx; %end; %else %do; proc means noprint data=&dsnx; var xvar std_errx; output out=&dsnx (drop=_freq_ _type_) sum=; data &dsn; retain xvar std_errx; set &dsn; if _n_ eq 1 then set &dsnx (keep=xvar std_errx); %end; if xvar gt 0 then do; ratiox=&yvar/xvar; se_ratx=(std_erry + std_errx*ratiox**2 - 2*covx*ratiox)/(xvar**2); if se_ratx ne . then se_ratx=100*sqrt(max(se_ratx,0)); ratiox=100*ratiox; end; if &rowvar='' then do; if "&rowtype"='N' then &rowvar=&nummax; else &rowvar="&charmax"; end; if &colvar='' then do; if "&coltype"='N' then &colvar=&nummax; else &colvar="&charmax"; end; %if &pagevar ne %then %do; if &pagevar='' then do; if "&pagetype"='N' then &pagevar=&nummax; else &pagevar="&charmax"; end; %if &pagetype=N %then %str(format &pagevar npfmt.;); %else %str(format &pagevar $npfmt.;); %end; %if &rowtype=N %then %str(format &rowvar nrfmt.;); %else %str(format &rowvar $nrfmt.;); %if &coltype=N %then %str(format &colvar ncfmt.;); %else %str(format &colvar $ncfmt.;); label se_ratx="Taylor SE"; drop covx; %mend utility; data; run; %let dsnx=%trim(%left(&syslast)); %if &cel_p=1 %then %do; *CELL PERCENTS AND STANDARD ERRORS; data &dsnx; set &dsncopy; if &rowvar='' and &colvar=''; rename &yvar=xvar; drop &rowvar &colvar; %utility(&pagevar); * rtg; %if &server=1 %then %tab2htm(capture=on, runmode=b); proc tabulate data=&dsn missing; title 'Standard Errors of Cell Percents'; class &rowvar &colvar &pagevar; var se_ratx; keylabel sum=' '; %if &pagevar ne %then %str(table &pagevar, &rowvar,(se_ratx)*&colvar;); %else %str(table &rowvar,(se_ratx)*&colvar;); * rtg; %if &server=1 %then %tab2htm(capture=off, runmode=b, openmode=append, htmlfile=&sehtml, center=Y, cpad=5, ttag=HEADER 3); %end; %if &row_p=1 %then %do; *COLUMN PERCENTS AND STANDARD ERRORS; data &dsnx; set &dsncopy; if &rowvar=''; rename &yvar=xvar; drop &rowvar; %utility(&colvar &pagevar); * rtg; %if &server=1 %then %tab2htm(capture=on, runmode=b); proc tabulate data=&dsn missing; title 'Standard Errors of Column Percents'; class &rowvar &colvar &pagevar; var se_ratx; keylabel sum=' '; %if &pagevar ne %then %str(table &pagevar, &rowvar,(se_ratx)*&colvar;); %else %str(table &rowvar,(se_ratx)*&colvar;); * rtg; %if &server=1 %then %tab2htm(capture=off, runmode=b, openmode=append, htmlfile=&sehtml, center=Y, cpad=5, ttag=HEADER 3); %end; %if &col_p=1 %then %do; *ROW PERCENTS AND STANDARD ERRORS; data &dsnx; set &dsncopy; if &colvar=''; rename &yvar=xvar; drop &colvar; %utility(&rowvar &pagevar); * rtg; %if &server=1 %then %tab2htm(capture=on, runmode=b); proc tabulate data=&dsn missing; title 'Standard Errors of Row Percents'; class &rowvar &colvar &pagevar; var se_ratx; keylabel sum=' '; %if &pagevar ne %then %str(table &pagevar, &rowvar,(se_ratx)*&colvar;); %else %str(table &rowvar,(se_ratx)*&colvar;); * rtg; %if &server=1 %then %tab2htm(capture=off, runmode=b, openmode=append, htmlfile=&sehtml, center=Y, cpad=5, ttag=HEADER 3); %end; run; %end; run; *Cleanup; proc datasets; delete %scan(&dsn,2,.) %scan(&dsnh,2,.); %if (&cel_p=1) or (&row_p=1) or (&col_p=1) %then %do; proc datasets; delete %scan(&dsncopy,2,.) %scan(&dsnx,2,.); %end; run; %mend tablew;