library(tidyverse)
library(scales)
library(janitor)
library(survey)
library(srvyr)
library(gtsummary)5 Descriptive Statistics
Point estimates such as totals, means, variances, etc. may be the primary objective of the survey, or just an exploratory step prior to a multivariate analysis. Heeringa (2017) explains the math behind sample statistics. Zimmer (2024) explains how to calculate them using the srvyr package.
5.1 Survey Summary
apisrs_des |>
tbl_svysummary(
by = sch.wide,
include = c(api00, target, growth, awards, comp.imp)
) |>
modify_spanning_header(all_stat_cols() ~ "Met Growth Target") |>
as_gt() |>
gt::tab_header(
title = "Simple Random Sample Survey Summary"
) |>
gt::tab_options(heading.align = "left")| Simple Random Sample Survey Summary | ||
|---|---|---|
Characteristic |
Met Growth Target |
|
No |
Yes |
|
| api00 | 541 (482, 708) | 675 (577, 772) |
| target | 13 (4, 16) | 9 (5, 14) |
| Unknown | 0 | 588 |
| growth | -1 (-12, 6) | 33 (20, 51) |
| awards | 0 (0%) | 3,840 (76%) |
| comp.imp | 62 (5.4%) | 4,057 (80%) |
| 1
Median (Q1, Q3); n (%) |
||
5.2 Point Estimates
How many schools met the growth target? survey_count() returns a scaled row count. If you are collecting other statistics, use survey_total(1) to sum the rows. Use the vartype parameter to add cols for interval-related data around the estimate.
apisrs_pe <-
apisrs_des |>
group_by(sch.wide) |>
cascade(
Schools = survey_total(1, vartype = NULL),
Proportion = survey_mean(proportion = TRUE, vartype = NULL),
EnrollSum = survey_total(enroll, vartype = NULL),
EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
)Show the code
apisrs_pe |>
gt::gt() |>
gt::fmt_number(columns = c(2, 4:10), decimals = 0) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
gt::cols_label(
sch.wide = "Met Target",
EnrollMean = "Mean",
EnrollMean_se = "SE",
EnrollMean_low = "Low",
EnrollMean_upp = "Upp",
EnrollIQR_q25 = "Q25",
EnrollIQR_q75 = "Q75"
)| Met Target | Schools | Proportion | EnrollSum | Mean Enrollment | IQR | ||||
|---|---|---|---|---|---|---|---|---|---|
| Mean | SE | Low | Upp | Q25 | Q75 | ||||
| No | 1,146 | 19% | 948,766 | 828 | 91 | 648 | 1,008 | 412 | 1,252 |
| Yes | 5,048 | 81% | 2,672,308 | 529 | 24 | 481 | 578 | 339 | 639 |
| NA | 6,194 | 100% | 3,621,074 | 585 | 27 | 531 | 639 | 339 | 664 |
apistrat_pe <-
apistrat_des |>
group_by(sch.wide) |>
cascade(
Schools = survey_total(1, vartype = NULL),
Proportion = survey_mean(proportion = TRUE, vartype = NULL),
EnrollSum = survey_total(enroll, vartype = NULL),
EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
)Show the code
apistrat_pe |>
gt::gt() |>
gt::fmt_number(columns = c(2, 4:10), decimals = 0) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
gt::cols_label(
sch.wide = "Met Target",
EnrollMean = "Mean",
EnrollMean_se = "SE",
EnrollMean_low = "Low",
EnrollMean_upp = "Upp",
EnrollIQR_q25 = "Q25",
EnrollIQR_q75 = "Q75"
)| Met Target | Schools | Proportion | EnrollSum | Mean Enrollment | IQR | ||||
|---|---|---|---|---|---|---|---|---|---|
| Mean | SE | Low | Upp | Q25 | Q75 | ||||
| No | 1,066 | 17% | 1,013,067 | 951 | 95 | 764 | 1,137 | 441 | 1,515 |
| Yes | 5,128 | 83% | 2,674,110 | 521 | 19 | 484 | 558 | 325 | 613 |
| NA | 6,194 | 100% | 3,687,178 | 595 | 19 | 559 | 632 | 334 | 660 |
apiclus2_pe <-
apiclus2_des |>
group_by(sch.wide) |>
cascade(
Schools = survey_total(1, vartype = NULL),
Proportion = survey_mean(proportion = TRUE, vartype = NULL),
EnrollSum = survey_total(enroll, vartype = NULL),
EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
# EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
)Show the code
apiclus2_pe |>
gt::gt() |>
gt::fmt_number(columns = c(2, 4:8), decimals = 0) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
gt::cols_label(
sch.wide = "Met Target",
EnrollMean = "Mean",
EnrollMean_se = "SE",
EnrollMean_low = "Low",
EnrollMean_upp = "Upp"
# EnrollIQR_q25 = "Q25",
# EnrollIQR_q75 = "Q75"
)| Met Target | Schools | Proportion | EnrollSum | Mean Enrollment | |||
|---|---|---|---|---|---|---|---|
| Mean | SE | Low | Upp | ||||
| No | 1,276 | 25% | 1,104,845 | 866 | 106 | 653 | 1,080 |
| Yes | 3,853 | 75% | NA | NA | NA | NA | NA |
| NA | 5,129 | 100% | NA | NA | NA | NA | NA |
5.3 Bivariate Relationships
Bivariate statistics include ratios and correlations.
apisrs_bv <-
apisrs_des |>
group_by(sch.wide) |>
cascade(
Meals = survey_total(meals, vartype = NULL),
Enrollment = survey_total(enroll, vartype = NULL),
RatioEst = survey_ratio(meals, enroll),
CorrEst = survey_corr(meals, enroll)
)Show the code
apisrs_bv |>
gt::gt() |>
gt::fmt_number(columns = c(2:3), decimals = 0) |>
gt::fmt_number(columns = c(4:7), decimals = 4) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Ratio", starts_with("Ratio")) |>
gt::tab_spanner("Correlation", starts_with("Corr")) |>
gt::cols_label(
sch.wide = "Met Target",
RatioEst = "Est",
RatioEst_se = "SE",
CorrEst = "Est",
CorrEst_se = "SE"
)| Met Target | Meals | Enrollment | Ratio | Correlation | ||
|---|---|---|---|---|---|---|
| Est | SE | Est | SE | |||
| No | 58,038 | 94,876,595% | 0.0612 | 0.0096 | −0.1103 | 0.1523 |
| Yes | 251,724 | 267,230,839% | 0.0942 | 0.0062 | −0.0360 | 0.0560 |
| NA | 309,762 | 362,107,434% | 0.0855 | 0.0055 | −0.0511 | 0.0590 |
apistrat_bv <-
apisrs_des |>
group_by(sch.wide) |>
cascade(
Meals = survey_total(meals, vartype = NULL),
Enrollment = survey_total(enroll, vartype = NULL),
RatioEst = survey_ratio(meals, enroll),
CorrEst = survey_corr(meals, enroll)
)Show the code
apistrat_bv |>
gt::gt() |>
gt::fmt_number(columns = c(2:3), decimals = 0) |>
gt::fmt_number(columns = c(4:7), decimals = 4) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Ratio", starts_with("Ratio")) |>
gt::tab_spanner("Correlation", starts_with("Corr")) |>
gt::cols_label(
sch.wide = "Met Target",
RatioEst = "Est",
RatioEst_se = "SE",
CorrEst = "Est",
CorrEst_se = "SE"
)| Met Target | Meals | Enrollment | Ratio | Correlation | ||
|---|---|---|---|---|---|---|
| Est | SE | Est | SE | |||
| No | 58,038 | 94,876,595% | 0.0612 | 0.0096 | −0.1103 | 0.1523 |
| Yes | 251,724 | 267,230,839% | 0.0942 | 0.0062 | −0.0360 | 0.0560 |
| NA | 309,762 | 362,107,434% | 0.0855 | 0.0055 | −0.0511 | 0.0590 |
apiclus2_bv <-
apisrs_des |>
group_by(sch.wide) |>
cascade(
Meals = survey_total(meals, vartype = NULL),
Enrollment = survey_total(enroll, vartype = NULL),
RatioEst = survey_ratio(meals, enroll),
CorrEst = survey_corr(meals, enroll)
)Show the code
apiclus2_bv |>
gt::gt() |>
gt::fmt_number(columns = c(2:3), decimals = 0) |>
gt::fmt_number(columns = c(4:7), decimals = 4) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Ratio", starts_with("Ratio")) |>
gt::tab_spanner("Correlation", starts_with("Corr")) |>
gt::cols_label(
sch.wide = "Met Target",
RatioEst = "Est",
RatioEst_se = "SE",
CorrEst = "Est",
CorrEst_se = "SE"
)| Met Target | Meals | Enrollment | Ratio | Correlation | ||
|---|---|---|---|---|---|---|
| Est | SE | Est | SE | |||
| No | 58,038 | 94,876,595% | 0.0612 | 0.0096 | −0.1103 | 0.1523 |
| Yes | 251,724 | 267,230,839% | 0.0942 | 0.0062 | −0.0360 | 0.0560 |
| NA | 309,762 | 362,107,434% | 0.0855 | 0.0055 | −0.0511 | 0.0590 |