/************************************************ Exercise 3 solution filename: exercise3.sas *************************************************/ /*Set up the libname statement and SAS environment*/ libname sasdata2 "C:\Users\kwelch\Desktop\sasdata2"; ods listing; OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; /*Get proc contents on each of the data sets*/ proc contents data=sasdata2.thai1 varnum; run; proc contents data=sasdata2.thai2 varnum; run; /*Get descriptive statistics on each data set*/ proc means data=sasdata2.thai1; run; proc means data=sasdata2.thai2; run; /*Sort both data sets*/ proc sort data=sasdata2.thai1; by schoolid; run; proc sort data=sasdata2.thai2; by schoolid; run; /*Merge the two data sets(n=8583)*/ data combine; merge sasdata2.thai1 sasdata2.thai2; by schoolid; run; /*Create a new variable, _merge_, that tells which dataset(s) contributed each case Cumulative Cumulative _merge_ Frequency Percent Frequency Percent ------------------------------------------------------------ 1 1066 12.42 1066 12.42 2 1 0.01 1067 12.43 3 7516 87.57 8583 100.00 There were 1066 cases that were in the thai1 data set, but not in thai2 (they had information for the child, but no matching school information). There was 1 case that was in the thai2 data set, but not in thai1, in other words, there was 1 school that did not have any children in the study. There were 7516 cases that were in both data sets, that is, they were in the thai1 data set and in the thai2 data set, these are kids who had data, and had matching school information */ data newcombine; merge sasdata2.thai1(in=a) sasdata2.thai2(in=b); by schoolid; if a=1 and b=0 then _merge_=1; if a=0 and b=1 then _merge_=2; if a=1 and b=1 then _merge_=3; run; proc freq data=newcombine; tables _merge_; run; /*List the schools for the students who didn't have a matching school*/ proc freq data=newcombine; where _merge_=1; tables schoolid; run; /*Merge the two data sets, keeping only cases that are in both data sets (n=7516)*/ data allcombine; merge sasdata2.thai1(in=a) sasdata2.thai2(in=b); by schoolid; if a and b; run; /******************************************************** Stacking Data ********************************************************* /*Get proc contents about each data set*/ proc contents data=sasdata2.wave1 varnum; run; proc contents data=sasdata2.wave2 varnum; run; proc contents data=sasdata2.wave3 varnum; run; /*Stack all three data sets together*/ data sasdata2.allwaves; set sasdata2.wave1 sasdata2.wave2 sasdata2.wave3; run; /*How many observations are in each round?*/ proc means data=sasdata2.allwaves; class round; run; /*How many smokers in each round? How many missing values?*/ proc freq data=sasdata2.allwaves; tables round*cig / nocol nopercent missing ; run;