5  Descriptive Statistics

library(tidyverse)
library(scales)
library(janitor)
library(survey)
library(srvyr)
library(gtsummary)

Point estimates such as totals, means, variances, etc. may be the primary objective of the survey, or just an exploratory step prior to a multivariate analysis. Heeringa (2017) explains the math behind sample statistics. Zimmer (2024) explains how to calculate them using the srvyr package.

5.1 Survey Summary

apisrs_des |>
  tbl_svysummary(
    by  = sch.wide, 
    include = c(api00, target, growth, awards, comp.imp)
  ) |>
  modify_spanning_header(all_stat_cols() ~ "Met Growth Target") |>
  as_gt() |>
  gt::tab_header(
    title = "Simple Random Sample Survey Summary"
  ) |>
  gt::tab_options(heading.align = "left")
Simple Random Sample Survey Summary

Characteristic

Met Growth Target

No
N = 1,146

1

Yes
N = 5,048

1
api00 541 (482, 708) 675 (577, 772)
target 13 (4, 16) 9 (5, 14)
    Unknown 0 588
growth -1 (-12, 6) 33 (20, 51)
awards 0 (0%) 3,840 (76%)
comp.imp 62 (5.4%) 4,057 (80%)
1

Median (Q1, Q3); n (%)

5.2 Point Estimates

How many schools met the growth target? survey_count() returns a scaled row count. If you are collecting other statistics, use survey_total(1) to sum the rows. Use the vartype parameter to add cols for interval-related data around the estimate.

apisrs_pe <-
  apisrs_des |>
  group_by(sch.wide) |>
  cascade(
    Schools = survey_total(1, vartype = NULL),
    Proportion = survey_mean(proportion = TRUE, vartype = NULL),
    EnrollSum = survey_total(enroll, vartype = NULL),
    EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
    EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
  )
Show the code
apisrs_pe |>
  gt::gt() |>
  gt::fmt_number(columns = c(2, 4:10), decimals = 0) |>
  gt::fmt_percent(columns = 3, decimals = 0) |>
  gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
  gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
  gt::cols_label(
    sch.wide = "Met Target",
      EnrollMean = "Mean",
      EnrollMean_se = "SE",
      EnrollMean_low = "Low",
      EnrollMean_upp = "Upp",
      EnrollIQR_q25 = "Q25",
      EnrollIQR_q75 = "Q75"
  )
Met Target Schools Proportion EnrollSum Mean Enrollment IQR
Mean SE Low Upp Q25 Q75
No 1,146 19% 948,766 828 91 648 1,008 412 1,252
Yes 5,048 81% 2,672,308 529 24 481 578 339 639
NA 6,194 100% 3,621,074 585 27 531 639 339 664
apistrat_pe <-
  apistrat_des |>
  group_by(sch.wide) |>
  cascade(
    Schools = survey_total(1, vartype = NULL),
    Proportion = survey_mean(proportion = TRUE, vartype = NULL),
    EnrollSum = survey_total(enroll, vartype = NULL),
    EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
    EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
  )
Show the code
apistrat_pe |>
  gt::gt() |>
  gt::fmt_number(columns = c(2, 4:10), decimals = 0) |>
  gt::fmt_percent(columns = 3, decimals = 0) |>
  gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
  gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
  gt::cols_label(
    sch.wide = "Met Target",
      EnrollMean = "Mean",
      EnrollMean_se = "SE",
      EnrollMean_low = "Low",
      EnrollMean_upp = "Upp",
      EnrollIQR_q25 = "Q25",
      EnrollIQR_q75 = "Q75"
  )
Met Target Schools Proportion EnrollSum Mean Enrollment IQR
Mean SE Low Upp Q25 Q75
No 1,066 17% 1,013,067 951 95 764 1,137 441 1,515
Yes 5,128 83% 2,674,110 521 19 484 558 325 613
NA 6,194 100% 3,687,178 595 19 559 632 334 660
apiclus2_pe <-
  apiclus2_des |>
  group_by(sch.wide) |>
  cascade(
    Schools = survey_total(1, vartype = NULL),
    Proportion = survey_mean(proportion = TRUE, vartype = NULL),
    EnrollSum = survey_total(enroll, vartype = NULL),
    EnrollMean = survey_mean(enroll, vartype = c("se", "ci")),
    # EnrollIQR = survey_quantile(enroll, quantiles = c(.25, .75), vartype = NULL)
  )
Show the code
apiclus2_pe |>
  gt::gt() |>
  gt::fmt_number(columns = c(2, 4:8), decimals = 0) |>
  gt::fmt_percent(columns = 3, decimals = 0) |>
  gt::tab_spanner("Mean Enrollment", starts_with("EnrollMean")) |>
  gt::tab_spanner("IQR", starts_with("EnrollIQR")) |>
  gt::cols_label(
    sch.wide = "Met Target",
    EnrollMean = "Mean",
    EnrollMean_se = "SE",
    EnrollMean_low = "Low",
    EnrollMean_upp = "Upp"
    # EnrollIQR_q25 = "Q25",
    # EnrollIQR_q75 = "Q75"
  )
Met Target Schools Proportion EnrollSum Mean Enrollment
Mean SE Low Upp
No 1,276 25% 1,104,845 866 106 653 1,080
Yes 3,853 75% NA NA NA NA NA
NA 5,129 100% NA NA NA NA NA

5.3 Bivariate Relationships

Bivariate statistics include ratios and correlations.

apisrs_bv <-
  apisrs_des |>
  group_by(sch.wide) |>
  cascade(
    Meals = survey_total(meals, vartype = NULL),
    Enrollment = survey_total(enroll, vartype = NULL),
    RatioEst = survey_ratio(meals, enroll),
    CorrEst = survey_corr(meals, enroll)
  )
Show the code
apisrs_bv |>
  gt::gt() |>
  gt::fmt_number(columns = c(2:3), decimals = 0) |>
  gt::fmt_number(columns = c(4:7), decimals = 4) |>
  gt::fmt_percent(columns = 3, decimals = 0) |>
  gt::tab_spanner("Ratio", starts_with("Ratio")) |>
  gt::tab_spanner("Correlation", starts_with("Corr")) |>
  gt::cols_label(
    sch.wide = "Met Target",
    RatioEst = "Est",
    RatioEst_se = "SE",
    CorrEst = "Est",
    CorrEst_se = "SE"
  )
Met Target Meals Enrollment Ratio Correlation
Est SE Est SE
No 58,038 94,876,595% 0.0612 0.0096 −0.1103 0.1523
Yes 251,724 267,230,839% 0.0942 0.0062 −0.0360 0.0560
NA 309,762 362,107,434% 0.0855 0.0055 −0.0511 0.0590
apistrat_bv <-
  apisrs_des |>
  group_by(sch.wide) |>
  cascade(
    Meals = survey_total(meals, vartype = NULL),
    Enrollment = survey_total(enroll, vartype = NULL),
    RatioEst = survey_ratio(meals, enroll),
    CorrEst = survey_corr(meals, enroll)
  )
Show the code
apistrat_bv |>
  gt::gt() |>
  gt::fmt_number(columns = c(2:3), decimals = 0) |>
  gt::fmt_number(columns = c(4:7), decimals = 4) |>
  gt::fmt_percent(columns = 3, decimals = 0) |>
  gt::tab_spanner("Ratio", starts_with("Ratio")) |>
  gt::tab_spanner("Correlation", starts_with("Corr")) |>
  gt::cols_label(
    sch.wide = "Met Target",
    RatioEst = "Est",
    RatioEst_se = "SE",
    CorrEst = "Est",
    CorrEst_se = "SE"
  )
Met Target Meals Enrollment Ratio Correlation
Est SE Est SE
No 58,038 94,876,595% 0.0612 0.0096 −0.1103 0.1523
Yes 251,724 267,230,839% 0.0942 0.0062 −0.0360 0.0560
NA 309,762 362,107,434% 0.0855 0.0055 −0.0511 0.0590
apiclus2_bv <-
  apisrs_des |>
  group_by(sch.wide) |>
  cascade(
    Meals = survey_total(meals, vartype = NULL),
    Enrollment = survey_total(enroll, vartype = NULL),
    RatioEst = survey_ratio(meals, enroll),
    CorrEst = survey_corr(meals, enroll)
  )
Show the code
apiclus2_bv |>
  gt::gt() |>
  gt::fmt_number(columns = c(2:3), decimals = 0) |>
  gt::fmt_number(columns = c(4:7), decimals = 4) |>
  gt::fmt_percent(columns = 3, decimals = 0) |>
  gt::tab_spanner("Ratio", starts_with("Ratio")) |>
  gt::tab_spanner("Correlation", starts_with("Corr")) |>
  gt::cols_label(
    sch.wide = "Met Target",
    RatioEst = "Est",
    RatioEst_se = "SE",
    CorrEst = "Est",
    CorrEst_se = "SE"
  )
Met Target Meals Enrollment Ratio Correlation
Est SE Est SE
No 58,038 94,876,595% 0.0612 0.0096 −0.1103 0.1523
Yes 251,724 267,230,839% 0.0942 0.0062 −0.0360 0.0560
NA 309,762 362,107,434% 0.0855 0.0055 −0.0511 0.0590