-
Notifications
You must be signed in to change notification settings - Fork 1
/
broomify-amelia.R
67 lines (57 loc) · 2.56 KB
/
broomify-amelia.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# ---- broomify -----------------------------------------------------------
tidy.melded <- function(x, conf.int = FALSE, conf.level = 0.95) {
# Get the df from one of the models
model_degrees_freedom <- glance(x[[1]])$df.residual
# Create matrices of the estimates and standard errors
params <- data_frame(models = x) %>%
mutate(m = 1:n(),
tidied = models %>% map(tidy)) %>%
unnest(tidied) %>%
select(m, term, estimate, std.error) %>%
gather(key, value, estimate, std.error) %>%
mutate(term = fct_inorder(term)) %>% # Order the terms so that spread() keeps them in order
spread(term, value)
just_coefs <- params %>% filter(key == "estimate") %>% select(-m, -key)
just_ses <- params %>% filter(key == "std.error") %>% select(-m, -key)
# Meld the coefficients with Rubin's rules
coefs_melded <- mi.meld(just_coefs, just_ses)
# Create tidy output
output <- as.data.frame(cbind(t(coefs_melded$q.mi),
t(coefs_melded$se.mi))) %>%
magrittr::set_colnames(c("estimate", "std.error")) %>%
mutate(term = rownames(.)) %>%
select(term, everything()) %>%
mutate(statistic = estimate / std.error,
p.value = 2 * pt(abs(statistic), model_degrees_freedom, lower.tail = FALSE))
# Add confidence intervals if needed
if (conf.int & conf.level) {
# Convert conf.level to tail values (0.025 when it's 0.95)
a <- (1 - conf.level) / 2
output <- output %>%
mutate(conf.low = estimate + std.error * qt(a, model_degrees_freedom),
conf.high = estimate + std.error * qt((1 - a), model_degrees_freedom))
}
# tidy objects only have a data.frame class, not tbl_df or anything else
class(output) <- "data.frame"
output
}
glance.melded <- function(x) {
# Because the properly melded parameters and the simple average of the
# parameters of these models are roughly the same (see
# https://www.andrewheiss.com/blog/2018/03/07/amelia-tidy-melding/), for the
# sake of simplicty we just take the average here
output <- data_frame(models = x) %>%
mutate(glance = models %>% map(glance)) %>%
unnest(glance) %>%
summarize_at(vars(r.squared, adj.r.squared, sigma, statistic, p.value, df,
logLik, AIC, BIC, deviance, df.residual),
funs(mean)) %>%
mutate(m = as.integer(length(x)))
# glance objects only have a data.frame class, not tbl_df or anything else
class(output) <- "data.frame"
output
}
nobs.melded <- function(x, ...) {
# Take the number of observations from the first model
nobs(x[[1]])
}