library(tidyverse)
library(scales)
library(janitor)
library(survey)
library(srvyr)
library(gtsummary)
5 Descriptive Statistics
Point estimates such as totals, means, variances, etc. may be the primary objective of the survey, or just an exploratory step prior to a multivariate analysis. Heeringa (2017) explains the math behind sample statistics. Zimmer (2024) explains how to calculate them using the srvyr package.
5.1 Survey Summary
|>
apisrs_des tbl_svysummary(
by = sch.wide,
include = c(api00, target, growth, awards, comp.imp)
|>
) modify_spanning_header(all_stat_cols() ~ "Met Growth Target") |>
as_gt() |>
::tab_header(
gttitle = "Simple Random Sample Survey Summary"
|>
) ::tab_options(heading.align = "left") gt
Simple Random Sample Survey Summary | ||
---|---|---|
Characteristic |
Met Growth Target |
|
No |
Yes |
|
api00 | 541 (482, 708) | 675 (577, 772) |
target | 13 (4, 16) | 9 (5, 14) |
Unknown | 0 | 588 |
growth | -1 (-12, 6) | 33 (20, 51) |
awards | 0 (0%) | 3,840 (76%) |
comp.imp | 62 (5.4%) | 4,057 (80%) |
1
Median (Q1, Q3); n (%) |
5.2 Point Estimates
How many schools met the growth target? survey_count()
returns a scaled row count. If you are collecting other statistics, use survey_total(1)
to sum the rows. Use the vartype
parameter to add cols for interval-related data around the estimate.
<-
apisrs_pe |>
apisrs_des group_by(sch.wide) |>
cascade(
Schools = survey_total(1, vartype = NULL),
Proportion = survey_mean(proportion = TRUE, vartype = NULL),
EnrollSum = survey_total(enroll, vartype = NULL),
EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
)
Show the code
|>
apisrs_pe ::gt() |>
gt::fmt_number(columns = c(2, 4:10), decimals = 0) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
gt::cols_label(
gtsch.wide = "Met Target",
EnrollMean = "Mean",
EnrollMean_se = "SE",
EnrollMean_low = "Low",
EnrollMean_upp = "Upp",
EnrollIQR_q25 = "Q25",
EnrollIQR_q75 = "Q75"
)
Met Target | Schools | Proportion | EnrollSum | Mean Enrollment | IQR | ||||
---|---|---|---|---|---|---|---|---|---|
Mean | SE | Low | Upp | Q25 | Q75 | ||||
No | 1,146 | 19% | 948,766 | 828 | 91 | 648 | 1,008 | 412 | 1,252 |
Yes | 5,048 | 81% | 2,672,308 | 529 | 24 | 481 | 578 | 339 | 639 |
NA | 6,194 | 100% | 3,621,074 | 585 | 27 | 531 | 639 | 339 | 664 |
<-
apistrat_pe |>
apistrat_des group_by(sch.wide) |>
cascade(
Schools = survey_total(1, vartype = NULL),
Proportion = survey_mean(proportion = TRUE, vartype = NULL),
EnrollSum = survey_total(enroll, vartype = NULL),
EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
)
Show the code
|>
apistrat_pe ::gt() |>
gt::fmt_number(columns = c(2, 4:10), decimals = 0) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
gt::cols_label(
gtsch.wide = "Met Target",
EnrollMean = "Mean",
EnrollMean_se = "SE",
EnrollMean_low = "Low",
EnrollMean_upp = "Upp",
EnrollIQR_q25 = "Q25",
EnrollIQR_q75 = "Q75"
)
Met Target | Schools | Proportion | EnrollSum | Mean Enrollment | IQR | ||||
---|---|---|---|---|---|---|---|---|---|
Mean | SE | Low | Upp | Q25 | Q75 | ||||
No | 1,066 | 17% | 1,013,067 | 951 | 95 | 764 | 1,137 | 441 | 1,515 |
Yes | 5,128 | 83% | 2,674,110 | 521 | 19 | 484 | 558 | 325 | 613 |
NA | 6,194 | 100% | 3,687,178 | 595 | 19 | 559 | 632 | 334 | 660 |
<-
apiclus2_pe |>
apiclus2_des group_by(sch.wide) |>
cascade(
Schools = survey_total(1, vartype = NULL),
Proportion = survey_mean(proportion = TRUE, vartype = NULL),
EnrollSum = survey_total(enroll, vartype = NULL),
EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
# EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
)
Show the code
|>
apiclus2_pe ::gt() |>
gt::fmt_number(columns = c(2, 4:8), decimals = 0) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
gt::cols_label(
gtsch.wide = "Met Target",
EnrollMean = "Mean",
EnrollMean_se = "SE",
EnrollMean_low = "Low",
EnrollMean_upp = "Upp"
# EnrollIQR_q25 = "Q25",
# EnrollIQR_q75 = "Q75"
)
Met Target | Schools | Proportion | EnrollSum | Mean Enrollment | |||
---|---|---|---|---|---|---|---|
Mean | SE | Low | Upp | ||||
No | 1,276 | 25% | 1,104,845 | 866 | 106 | 653 | 1,080 |
Yes | 3,853 | 75% | NA | NA | NA | NA | NA |
NA | 5,129 | 100% | NA | NA | NA | NA | NA |
5.3 Bivariate Relationships
Bivariate statistics include ratios and correlations.
<-
apisrs_bv |>
apisrs_des group_by(sch.wide) |>
cascade(
Meals = survey_total(meals, vartype = NULL),
Enrollment = survey_total(enroll, vartype = NULL),
RatioEst = survey_ratio(meals, enroll),
CorrEst = survey_corr(meals, enroll)
)
Show the code
|>
apisrs_bv ::gt() |>
gt::fmt_number(columns = c(2:3), decimals = 0) |>
gt::fmt_number(columns = c(4:7), decimals = 4) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Ratio", starts_with("Ratio")) |>
gt::tab_spanner("Correlation", starts_with("Corr")) |>
gt::cols_label(
gtsch.wide = "Met Target",
RatioEst = "Est",
RatioEst_se = "SE",
CorrEst = "Est",
CorrEst_se = "SE"
)
Met Target | Meals | Enrollment | Ratio | Correlation | ||
---|---|---|---|---|---|---|
Est | SE | Est | SE | |||
No | 58,038 | 94,876,595% | 0.0612 | 0.0096 | −0.1103 | 0.1523 |
Yes | 251,724 | 267,230,839% | 0.0942 | 0.0062 | −0.0360 | 0.0560 |
NA | 309,762 | 362,107,434% | 0.0855 | 0.0055 | −0.0511 | 0.0590 |
<-
apistrat_bv |>
apisrs_des group_by(sch.wide) |>
cascade(
Meals = survey_total(meals, vartype = NULL),
Enrollment = survey_total(enroll, vartype = NULL),
RatioEst = survey_ratio(meals, enroll),
CorrEst = survey_corr(meals, enroll)
)
Show the code
|>
apistrat_bv ::gt() |>
gt::fmt_number(columns = c(2:3), decimals = 0) |>
gt::fmt_number(columns = c(4:7), decimals = 4) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Ratio", starts_with("Ratio")) |>
gt::tab_spanner("Correlation", starts_with("Corr")) |>
gt::cols_label(
gtsch.wide = "Met Target",
RatioEst = "Est",
RatioEst_se = "SE",
CorrEst = "Est",
CorrEst_se = "SE"
)
Met Target | Meals | Enrollment | Ratio | Correlation | ||
---|---|---|---|---|---|---|
Est | SE | Est | SE | |||
No | 58,038 | 94,876,595% | 0.0612 | 0.0096 | −0.1103 | 0.1523 |
Yes | 251,724 | 267,230,839% | 0.0942 | 0.0062 | −0.0360 | 0.0560 |
NA | 309,762 | 362,107,434% | 0.0855 | 0.0055 | −0.0511 | 0.0590 |
<-
apiclus2_bv |>
apisrs_des group_by(sch.wide) |>
cascade(
Meals = survey_total(meals, vartype = NULL),
Enrollment = survey_total(enroll, vartype = NULL),
RatioEst = survey_ratio(meals, enroll),
CorrEst = survey_corr(meals, enroll)
)
Show the code
|>
apiclus2_bv ::gt() |>
gt::fmt_number(columns = c(2:3), decimals = 0) |>
gt::fmt_number(columns = c(4:7), decimals = 4) |>
gt::fmt_percent(columns = 3, decimals = 0) |>
gt::tab_spanner("Ratio", starts_with("Ratio")) |>
gt::tab_spanner("Correlation", starts_with("Corr")) |>
gt::cols_label(
gtsch.wide = "Met Target",
RatioEst = "Est",
RatioEst_se = "SE",
CorrEst = "Est",
CorrEst_se = "SE"
)
Met Target | Meals | Enrollment | Ratio | Correlation | ||
---|---|---|---|---|---|---|
Est | SE | Est | SE | |||
No | 58,038 | 94,876,595% | 0.0612 | 0.0096 | −0.1103 | 0.1523 |
Yes | 251,724 | 267,230,839% | 0.0942 | 0.0062 | −0.0360 | 0.0560 |
NA | 309,762 | 362,107,434% | 0.0855 | 0.0055 | −0.0511 | 0.0590 |