/**************Statistical Tests***********************/ OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; title; libname sasdata2 "C:\Users\kwelch\Desktop\sasdata2"; /*Read in the pulse data*/ data pulse; infile "pulse.csv" firstobs=2 delimiter="," missover; input pulse1 pulse2 ran smokes sex height weight activity; label pulse1 = "Resting pulse, rate per minute" pulse2 = "Second pulse, rate per minute"; run; /*Create formats for categorical variables*/ proc format; value sexfmt 1="Male" 2="Female"; value yesnofmt 1="Yes" 2="No"; value actfmt 1="Low" 2="Medium" 3="High"; run; proc print data=pulse (obs=25) label; format sex sexfmt. ran smokes yesnofmt. activity actfmt.; run; proc means data=pulse; run; /**************Statistical Analyses***********************/ /*Binomial Confidence Intervals and Tests for Binary Variables*/ proc freq data = pulse; tables smokes / binomial(p=.25); run; proc freq data = pulse; tables smokes / binomial(p=.25); exact binomial; run; /*Chi-square Goodness of Fit Tests for Categorical Variables*/ proc freq data = pulse; tables activity / chisq; run; proc freq data = pulse; tables activity /chisq testp = ( .20 , .50, .30 ); run; proc freq data = pulse; tables activity /chisq testp = ( 20 , 50, 30 ); run; /*One-Sample test for a continuous variable*/ proc univariate data=pulse mu0=72; var pulse1; histogram / normal (mu=est sigma=est); qqplot /normal (mu=est sigma=est); run; proc ttest data=pulse H0=72 ; var pulse1; run; /*Paired Samples t-test*/ proc ttest data=pulse; paired pulse2*pulse1; run; /*Paired Samples t-test stratified by ran*/ proc sort data=pulse; by ran; run; proc ttest data=pulse; paired pulse2*pulse1; by ran; run; /*Independent Samples t-test*/ proc ttest data=sasdata2.employee; class gender; var salary; run; /*Check the distribution of salary for each gender*/ proc univariate data=sasdata2.employee; var salary; class gender; histogram; run; /*Assume salary is lognormally distributed*/ proc ttest data=sasdata2.employee dist=lognormal; class gender; var salary ; run; proc sort data=sasdata2.employee; by jobcat; run; proc ttest data=sasdata2.employee dist=lognormal; by jobcat; class gender; var salary; run; /*NON-PARAMETRIC TEST: WILCOXON/MANN-WHITNEY TEST*/ proc npar1way data=sasdata2.employee wilcoxon; class gender; var salary; run; proc npar1way data=sasdata2.employee wilcoxon; class gender; var salary; exact wilcoxon / mc; run; /*Correlation*/ proc corr data=sasdata2.employee; var salary salbegin educ; run; /*Linear Regression*/ ods graphics on; proc reg data=sasdata2.employee2; model salary = salbegin educ jobdum2 jobdum3 prevexp female; output out=regdat p=predict r=resid rstudent=rstudent; run; quit; ods graphics off; /*Carry out a linear regression on logsalary*/ data temp; set sasdata2.employee2; logsalary = log(salary); run; ods graphics; proc reg data=temp; model logsalary = salbegin educ jobdum2 jobdum3 prevexp female; output out=regdat2 p=predict r=resid rstudent=rstudent; run; quit; ods graphics off; /*Chi-square test of independence*/ proc freq data=sasdata2.employee; tables gender*jobcat / chisq; run; title; proc freq data=sasdata2.employee; tables gender*jobcat / chisq; exact fisher / mc seed=0; run; /*McNemar's Test of Symmetry for Paired Categorical Data*/ data newpulse; set pulse; if pulse1 > 80 then hipulse1 = 1; if pulse1 > 0 and pulse1 <=89 then hipulse1=0; if pulse2 > 80 then hipulse2 = 1; if pulse2 > 0 and pulse2 <=89 then hipulse2=0; run; proc freq data=newpulse; tables hipulse1 hipulse2; run; proc freq data=newpulse; tables hipulse1*hipulse2/ agree; run; /*Logistic Regression*/ data afifi; set sasdata2.afifi; if survive=3 then died=1; if survive=1 then died=0; run; proc logistic data=afifi descending; model died = map1 shockdum sex / risklimits; run; ods graphics on; proc logistic data=afifi descending PLOTS(ONLY) = (effect oddsratio); model died = map1 shockdum sex / risklimits; units map1 = 1 10 shockdum = 1 sex=1; run; ods graphics off; /*Generalized Linear Model for Count Data*/ data baseball; set sasdata2.baseball; log_atbat = log(no_atbat); run; proc genmod data=baseball ; class league division; model no_home = salary / dist=poisson offset=log_atbat; run; proc sgplot data=Pfitdata; scatter y=resraw x=predict; run; proc genmod data=baseball ; class league division; model no_home = salary / dist=poisson offset=log_atbat; estimate "Effect of 100k salary increase" salary 100 / est; output out=Pfitdata p=predict resraw=resraw reschi=reschi; run; data Pfitdata2; set Pfitdata; if 0<= predict <5 then group=1; if 5<= predict <10 then group=2; if 10<= predict < 15 then group=3; if 15<= predict < 20 then group=4; if 20<= predict then group=5; run; proc format; value grpfmt 1="0 to 4.9" 2="5 to 9.9" 3="10 to 14.9" 4="15 to 19.9" 5="20 to Max"; run; proc means data=Pfitdata2 n min max mean std var; class group; var predict resraw; format group grpfmt.; run; ods graphics on; proc genmod data=baseball plots = (predicted(clm)); class league division; model no_home = salary / dist=negbin offset=log_atbat; output out=nbfitdata p=predict resraw=resraw reschi=reschi; estimate "Effect of 100k salary increase" salary 100 / est; run; ods graphics off;