/***************************************************************************************************************** This program adapted by MJS from a file posted online. SAS file name: Discrete_Dist_Fit.sas File location: _________________________________________________________________________________________________________________ Purpose: Fit discrete distributions to univariate data using PROC GENMOD Author: Peter Clemmensen Creation Date: 12/07/2017 This program supports the blog post "Fit Discrete Distributions to Univariate Data" on SASnrd.com *****************************************************************************************************************/ proc import DATAFILE="/home/mstear0/Nematodirus/Nemb95.txt" out=onea dbms=dlm replace; datarow=2; delimiter='09'x; run; proc sgplot data=onea; Histogram nembjun ; xaxis label = 'EGGS PER GRAM'; yaxis label='FREQUENCY'; run; data one; set onea; count = round(nembjun / 12.5); if count=. then delete; run; proc genmod data=one; model count = / dist = zip; zeromodel / link=logit; run; proc univariate data=one; var count; run; /* Tabulate counts and plot data */ proc freq data=one noprint; tables count / out=CountMinMax; run; data _null_; set CountMinMax end=eof; if _N_=1 then call symputx('minCount', count); if eof then call symputx('maxCount', count); run; %put min=&minCount max=&maxCount; /* Visualize the data */ title 'Frequency Plot of Count Dataset'; proc sgplot data=one; vbar count; xaxis display=(nolabel) fitpolicy=THIN; yaxis display=(nolabel) ; run; title; /* Fit Poisson distribution to data */ proc genmod data=one; model count = / dist=Poisson; /* No variables are specified, only mean is estimated */ output out=PoissonFit p=lambda; run; /* Save Poisson parameter lambda in macro variables */ data _null_; set PoissonFit(obs=1); call symputx('lambda', lambda); run; /* Use Min/Max values and the fitted Lambda to create theoretical Poisson Data */ /* Fit Zero-inflated Poisson distribution to data */ proc genmod data=one; model count = / dist = zip; /* No variables are specified, only mean is estimated */ zeromodel / link=logit; output out=ZIPoissonFit p=lambdazip pzero=infprob; run; /* Save Poisson parameter lambda in macro variables */ data _null_; set ZIPoissonFit(obs=1); lambda2=lambdazip /(1-infprob); call symputx('lambda2', lambda2); run; /* Use Min/Max values and the fitted Lambda to create theoretical Poisson Data */ data TheoreticalPoisson; do count= &minCount to &maxCount; po=pdf('Poisson', count, &lambda); zip=pdf('Poisson', count, &lambda2); output; end; run; /* Negative Binomial Example */ /* Fit Negative Binomial distribution to data */ proc genmod data=one; model count= /dist=NegBin; /* No variables are specified, only mean is estimated */ ods output parameterestimates=NegBinParameters; run; /* Transpose Data */ proc transpose data=NegBinParameters out=NegBinParameters; var estimate; id parameter; run; /* Calculate k and p from intercept and dispersion parameters */ data NegBinParameters; set NegBinParameters; k = 1/dispersion; p = 1/(1+exp(intercept)*dispersion); run; /* Save k and p in macro variables */ data _null_; set NegBinParameters; call symputx('k', k); call symputx('p', p); run; /* Calculate theoretical Negative Binomial PMF based on fitted k and p */ data TheoreticalNegBin; do count=&minCount to &maxCount; NegBin=pdf('NegBinomial', count, &p, &k); output; end; run; /* Merge The datasets for plotting */ data PlotData(keep=count freq po zip NegBin); merge TheoreticalPoisson TheoreticalNegBin CountMinMax; by count; freq = PERCENT/100; run; title 'June 1995 data overlaid with fitted distributions'; proc sgplot data=PlotData noautolegend; vbarparm category=count response=freq / legendlabel='Count Data'; series x=count y=po / markers markerattrs=(symbol=circlefilled color=red) lineattrs=(color=red)legendlabel='Fitted Poisson '; series x=count y=zip / markers markerattrs=(symbol=squarefilled color=yellow) lineattrs=(color=yellow)legendlabel='Fitted Zeroinflated Poisson'; series x=count y=NegBin / markers markerattrs=(symbol=squarefilled color=green) lineattrs=(color=green)legendlabel='Fitted Negative Binomial'; ***xaxis display=(nolabel);xaxis label = 'NUMBER OF EGGS COUNTED' Fitpolicy=THIN; ***values = (0 to 160 by 20); ***yaxis display=(nolabel);yaxis label = 'PROBABILITY MASS FUNCTION'; keylegend / location=inside position=NE across=1; run; title;