#Archer, K. J., & Lemeshow, S. (2006). #Goodness-of-fit test for a logistic regression model fitted using survey sample data. #The Stata Journal, 6(1), 97-105. library(tidyverse) library(survey) library(haven) setwd('C:/Users/kevin/OneDrive/School & Research/UTMB/Summer 2022/UofMichigan/Data Sets/Data Sets') ### NCS-R DATA ### ncsr = read_dta( "ncsr_sub_5apr2017.dta" ) colnames(ncsr) = tolower(colnames(ncsr)) ncsr = ncsr %>% mutate( wkstat3c = factor( wkstat3c, levels = 1:3, labels = c("Employed", "Unemployed", "NLF") ), ed4cat = factor( ed4cat, levels = 1:4, labels = c("0-11", "12", "13-15", "16+") ), racecat = factor( racecat, levels = 1:4, labels = c("Other", "Hisp", "Black", "White") ), obese6ca = factor( obese6ca, levels = 1:6, labels = c("<18.5", "18.5-24.9", "25-29.9", "30-34.9", "35-39", "40+") ) ) ncsr_svy = svydesign( strata = ~sestrat, id = ~seclustr, weights = ~ncsrwtsh, data = ncsr, nest = T ) summary(ncsr_svy) allCC <- svyglm(formula=ald ~ factor(sex)+factor(ed4cat)+factor(mar3cat), design=ncsr_svy, family="quasibinomial") summary(allCC) #Recommendations from Lumley (from R Help Archive) on implementing the Archer Lemeshow GOF test #This code aims to approximate the design-adjusted decile rank test cited above #Wald statistic is modified to form an F-corrected test statistic for model fit based on the following discussion #https://stat.ethz.ch/pipermail/r-help/2016-November/443250.html r <- residuals (allCC, type = "response") f <- fitted(allCC) g<- cut(f, c(-Inf, quantile(f, (1:9)/10, Inf))) ncsr[ , 'g' ] <- g ncsr[ , 'r' ] <- r newdesign<-svydesign( strata = ~sestrat, id = ~seclustr, weights = ~ncsrwtsh, data = ncsr, nest = T ) decilemodel<- svyglm(r~g, design=newdesign) regTermTest(decilemodel, ~g)