/* This code is still at a very early stage, and for instance is missing corrections for continuity. It presumes the presence of two data sets sst and lrdata described at the bottom of this file.*/ /* This first macro acutally calculates the Skovgaard approximation. It first reconstructs a raw data vector consistent with the with input sufficient statistics.*/ %macro dogen(fixed,notfixed); %newy(%UNQUOTE(&fixed) %UNQUOTE(¬fixed)); %fitgen(%UNQUOTE(&fixed) %UNQUOTE(¬fixed)); data summary; set outmod ; big=value; drop value ; run; %fitgen(%UNQUOTE(&fixed)); data summary; retain what zhat; merge summary outmod ; by ssn; if _n_=1 then zhat= big; if _n_=2 then zhat= zhat*sqrt(VALUE/big); if _n_=3 then what= sign(zhat)*sqrt(value-big); if _n_=3 then output; run; data summary; set summary; approx=probnorm(what)+ exp(-what*what/2.0)/sqrt(2.0d0*3.1415926535897931)*(1/what-1/zhat); run; %mend; /* The next macro reconstructs a raw data vector consistent with the observed set of sufficient statistics, by solving a constrained optimization problem. */ %macro newy(vars); data cov; set lrdata; obj=0;keep %UNQUOTE(&fixed) %UNQUOTE(¬fixed) n obj; run; data nsst; set sst; ssn=_n_; run; proc transpose data=cov out=cov; run; proc transpose data=cov out=named; run; data cov; set cov; dummy=0; ssn=_n_; run; data cov; merge nsst cov; by ssn; run; data cov; set cov; by dummy; _TYPE_="EQ "; if t=. then _TYPE_="UPPERBD"; if last.dummy then _TYPE_="MAX "; drop dummy ssn; run; proc lp data=cov primalout=newy; rhs t; run; data newy ; set newy; _NAME_=_VAR_; keep _NAME_ _VALUE_ ; run; data named ; merge newy named ; by _NAME_; if n=. then delete; drop obj _NAME_; run; %mend; /* This next macro solves the saddlepoint equations. To specify the model change the word ``identity'' below to the correct canonical link. */ %macro fitgen(vars); ods output covb=outcov; ods output modfit=outmod; ods output ParameterEstimates=parest; proc genmod data=named; model _VALUE_/n= %UNQUOTE(&vars) / link=identity covb noint; run; proc iml ; use outcov; read all into mat; c=det(mat); create deter from c [colname="VALUE"]; append from c; quit; data parest; set parest; if df=0 then delete; VALUE=estimate; keep value df; run; data parest; set parest; by df ; if last.df then ; else delete ; run; data outmod; set parest deter outmod; if _n_>3 then delete; ssn=_n_; keep VALUE ssn ; run; %mend; /* This macro is executed by creating a SAS data set call lrdata containing information about the regression problem (specifically, the covariates and the number of observations per cluster) and another data set called sst containing values of the sufficient statistic, and typing the names of the variables associated with statistics to be conditioned on before the comma and the name of the variable not conditioned on after the comma, as below: */ %dogen(v1 v2,v3);