/********************************************** DATA MANAGEMENT TASKS USING SAS FILENAME: datamgt.sas **********************************************/ /*Specify generic output using Options Formchar= */ options formchar="|----|+|---+=|-/\<>*"; title; /*Read in the raw data. Set up variable labels.*/ data march; infile "marflt.dat"; input flight 1-3 @4 date mmddyy6. @10 time time5. orig $ 15-17 dest $ 18-20 @21 miles comma5. mail 26-29 freight 30-33 boarded 34-36 transfer 37-39 nonrev 40-42 deplane 43-45 capacity 46-48; format date mmddyy10. time time5. miles comma5.; label flight="Flight number" orig ="Origination City" dest ="Destination City"; run; /*Or Import the Data Using Proc Import or the Import Wizard. Variable labels are taken from row 1 of Excel file by default, not user-specified.*/ PROC IMPORT OUT= WORK.MARCH DATAFILE= "MARCH.XLS" DBMS=EXCEL REPLACE; SHEET="march$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; /*CREATE A COPY OF A DATA SET*/ data newmarch; set march; run; /*SUBSET DATA*/ data march15; set work.march; if date = "15MAR1990"d; run; proc print data=march15; run; data london; set march; if dest="LON"; run; data longflt; set march; if miles >=1000 then output; run; /*DELETE CASES*/ data shortflt; set march; if miles >=1000 then delete; run; /*Keep or Drop Observations*/ data march_passngrs; set march; keep date time orig dest miles boarded transfer nonrev deplane capacity;run; data march_passngrs; set march; drop mail freight; run; /*Create New Variables Using Transformations and Recodes*/ data march_recode; set march; totpassngrs = boarded + transfer + nonrev; empty_seats = capacity - totpassngrs; totnonpass = mail + freight; pctfull = (totpassngrs/capacity)*100; logpassngrs = log(totpassngrs); totpassngrs2 = sum(boarded,transfer,nonrev); int_pctfull = int(totpassngrs/capacity)*100; rnd_pctfull = round(pctfull,.1); if pctfull = 100 then full_flight = 1; else full_flight = 0; if pctfull = . then full_flight = .; if pctfull not=. then do; if pctfull < 25 then full_cat = 1; if pctfull >=25 and pctfull <50 then full_cat=2; if pctfull >=50 and pctfull <75 then full_cat=3; if pctfull >=75 then full_cat=4; end; if dest = "CPH" or dest="FRA" or dest = "LON" or dest = "PAR" or dest = "YYZ" then USA = 0; if dest in("DFW", "LAX", "ORD", "WAS") then USA = 1; run; title "Check New Variables"; proc means data=march_recode; run; proc freq data=march_recode; tables full_flight full_cat dest USA; run; /*SORT*/ proc sort data=march_recode; by USA; run; title "Descriptive Statistics by US vs Non-US Destinations"; proc means data=march_recode; by USA; run; /*SORT BY MORE THAN ONE VARIABLE*/ proc sort data=march_recode; by date USA; run; title "Descriptive Statistics by Date and Destination"; proc means data=march_recode; by date USA; run; /*Using the Tagsort Option*/ proc sort data=march_recode tagsort; by date dest; run; /*CREATE A SORTED OUTPUT DATA SET*/ proc sort data=march_recode out=sortdat; by date dest; run; /*SELECT CASES FOR AN ANALYSIS*/ /*Select cases based on values of a character variable*/ title "Flights to Los Angeles"; proc print data=march_recode; where dest = "LAX"; var flight dest totpassngrs; run; title "Missing Destination"; proc print data=march_recode; where dest = " "; var flight dest totpassngrs; run; /*Select cases based on values of a numeric variable*/ title "Flights Less than 30 Percent Full"; proc print data=march_recode; where pctfull < 30; var dest date pctfull; run; /*Using Where with Between*/ title "Flights Between 25 and 30 Percent Full"; proc print data=march_recode; where pctfull between 25 and 35 ; var flight dest totpassngrs pctfull; run; /*Select Cases that are missing on a numeric variable*/ title "Cases Where Number of Passengers is Missing"; proc print data=march_recode; where totpassngrs = . ; var flight dest totpassngrs; run; /*Select cases based on a more complicated selection*/ title "Flights less than 60 percent full to London"; proc print data=march_recode; where (pctfull < 60) and (dest="LON") ; var flight dest totpassngrs capacity pctfull; run; /*Select cases based on values of a date variable*/ title "Flights on March 7th, 1990"; proc print data=march_recode; where date = "07MAR90"D; run; title "Flights March 7th to March 9th , 1990"; proc print data=march_recode; where date between "07MAR90"D and "09MAR90"D; run; title "Cases with Missing Date"; proc print data= march_recode; where date = .; var flight dest date; run;