/********************************************************************************************* **************************** Intro to SAS ************************************************** **************************** Day 1 Quiz **************************************************** **********************************************************************************************/ PROC IMPORT OUT= WORK.bank DATAFILE= "C:\temp\fin_data\BANK.XLS" DBMS=EXCEL REPLACE; SHEET="bank"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; /* 1) Use SAS commands to display the contents of the Bank dataset. List the variables according to the order they are stored in the data set. */ proc contents data=bank varnum; run; /* 2) Suppose you want to print out the first 25 observations from the Bank data set. The variables to be included are ID, sex, job category (variable JOBCAT), beginning salary (variable SALBEG), and current salary (variable SALNOW). */ proc print data=bank (obs=25); var ID sex JOBCAT SALBEG SALNOW; run; /* 3) Get a cross-tabulation of SEX (as the row variable) by JOBCAT (as the column variable) for employees aged 50 and above. */ proc freq data=bank; tables SEX * JOBCAT /nopercent nocol; where AGE >= 50; run; /* 4) Use a BY statement to obtain simple descriptive statistics for all numeric variables for males and females individually. Fill in the information below. */ proc sort data=bank; by SEX; run; proc means data=bank; by SEX; run; /* 5) Create a new variable (variable name SALDIFF) as the difference between the employees' current salaries and their beginning salaries. How many people have salary increase greater than or equal to $5,000? */ data bank; set bank; SALDIFF = SALNOW - SALBEG; if SALDIFF >= 5000 then DIFFABOVE5K = 1; else if SALDIFF < 5000 then DIFFABOVE5K = 0; run; proc freq data=bank; tables DIFFABOVE5K; run; /* 6) Get a bivariate scatter plot with SALNOW as the Y variable, and SALBEG as the X variable using Proc GPlot. Include a regression line for each sex in your plot. */ goptions reset=all; symbol1 color=black value=dot interpol=rl line=1; symbol2 color=black value=circle interpol=rl line=2; proc gplot data=bank; plot SALNOW * SALBEG = SEX; run;quit; /* 7) Create a dataset Bank2, in which you generate a categorical variable AGEGRP based on the following cut points. AGEGRP: 1 = Less than 25 years 2 = 25 to less than 35 years 3 = 35 to less than 45 years 4 = 45 to less than 55 years 5 = 55 or older */ data bank2; set bank; if AGE ne . then do; if AGE < 25 then AGEGRP = 1; if AGE >= 25 and AGE < 35 then AGEGRP = 2; if AGE >= 35 and AGE < 45 then AGEGRP = 3; if AGE >= 45 and AGE < 55 then AGEGRP = 4; if AGE >= 55 then AGEGRP = 5; end; run; /* Question: Create a dataset Bank3, which should contain only females (SEX=1) whose AGEGRP = 2 or 3. */ data bank3; set bank2; if SEX=1 and (AGEGRP = 2 or AGEGRP = 3); /* or equivalently if SEX = 1 and AGEGRP in (2, 3) then output; */ run; proc contents data=bank3 varnum; run;