Calculate summary metrics on an organisation

Given individual level variables and an organisation structure, this function calculates aggregated metrics using either the cumulative approach (all individuals in that unit or its descendents) or the orthodox approach (individuals immediately associated with that unit only).

calc_summary_df(tg, df, tall_df = NULL, selected_vars,
  is_cumulative = FALSE)

Arguments

tg	tbl_graph that passes a check with check_tbl_graph_is_org
df	a data frame with columns named unit_id and invididual_id and (and optionally individual level variables) with one row per individual - see the indiv_df format in `orgsurveyr-data-formats`
tall_df	a data frame with columns named individual id, metric_id and value with multiple rows per individual - see the indiv_tall_df format in `orgsurveyr-data-formats`
selected_vars	names of variables in df or tall_df (character vector)
is_cumulative	whether to calculate cumulative or orthodox aggregations (logical)

Value

A tall data frame with three columns: unit_id, metric_id and value.

Examples

library(tidygraph)
#> 
#> Attaching package: ‘tidygraph’
#> The following object is masked from ‘package:testthat’:
#> 
#>     matches
#> The following object is masked from ‘package:stats’:
#> 
#>     filter
library(dplyr)
#> 
#> Attaching package: ‘dplyr’
#> The following object is masked from ‘package:testthat’:
#> 
#>     matches
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union
set.seed(1231)
tg1b <- create_realistic_org(4,3, prob=0.3, delete_units = TRUE) %>% simulate_unit_size
# NOT RUN {
plot_org(tg1b) + geom_node_text(aes(label=unit_size), color='white')
# }tg1b_indiv_df <- tg1b %>%
  simulate_individuals_df() %>%
  mutate(test_var2 = purrr::map_dbl(individual_id, ~rnorm(1, 20,3)))
tg1b_indiv_df
#> # A tibble: 98 x 5
#>    individual_id individual_name unit_id test_var test_var2
#>    <chr>         <chr>           <chr>      <dbl>     <dbl>
#>  1 1             1_1             1          11.7       19.9
#>  2 2             1_2             1           6.61      14.7
#>  3 3             1_3             1           6.43      18.0
#>  4 4             2_1             2          11.5       22.0
#>  5 5             2_2             2           7.36      20.6
#>  6 6             4_1             4           8.51      17.7
#>  7 7             4_2             4          10.9       18.4
#>  8 8             5_1             5           8.43      19.9
#>  9 9             5_2             5          13.7       17.6
#> 10 10            6_1             6          15.0       22.4
#> # ... with 88 more rows

tg1b_indiv_tall_df <- tg1b_indiv_df %>%
  select(individual_id, test_var, test_var2) %>%
  tidyr::gather('metric_id', 'value', -individual_id)
tg1b_indiv_tall_df
#> # A tibble: 196 x 3
#>    individual_id metric_id value
#>    <chr>         <chr>     <dbl>
#>  1 1             test_var  11.7 
#>  2 2             test_var   6.61
#>  3 3             test_var   6.43
#>  4 4             test_var  11.5 
#>  5 5             test_var   7.36
#>  6 6             test_var   8.51
#>  7 7             test_var  10.9 
#>  8 8             test_var   8.43
#>  9 9             test_var  13.7 
#> 10 10            test_var  15.0 
#> # ... with 186 more rows

# using wide data frame
calc_summary_df(tg1b, tg1b_indiv_df, NULL,
                'test_var2', is_cumulative=TRUE)
#> Using wide data frame format for individual variables
#> # A tibble: 29 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var2  19.6
#>  2 16      test_var2  19.1
#>  3 17      test_var2  19.2
#>  4 18      test_var2  19.5
#>  5 2       test_var2  20.4
#>  6 20      test_var2  19.6
#>  7 22      test_var2  17.6
#>  8 23      test_var2  22.3
#>  9 26      test_var2  19.3
#> 10 27      test_var2  19.3
#> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, NULL,
                c('test_var', 'test_var2'), is_cumulative=TRUE)
#> Using wide data frame format for individual variables
#> # A tibble: 58 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var  10.1 
#>  2 1       test_var2 19.6 
#>  3 16      test_var   9.52
#>  4 16      test_var2 19.1 
#>  5 17      test_var  10.1 
#>  6 17      test_var2 19.2 
#>  7 18      test_var  11.1 
#>  8 18      test_var2 19.5 
#>  9 2       test_var  10.2 
#> 10 2       test_var2 20.4 
#> # ... with 48 more rows
calc_summary_df(tg1b, tg1b_indiv_df, NULL,
               'test_var2', is_cumulative=FALSE)
#> Using wide data frame format for individual variables
#> # A tibble: 29 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var2  17.5
#>  2 16      test_var2  21.5
#>  3 17      test_var2  18.1
#>  4 18      test_var2  19.3
#>  5 2       test_var2  21.3
#>  6 20      test_var2  23.3
#>  7 22      test_var2  17.6
#>  8 23      test_var2  22.3
#>  9 26      test_var2  19.3
#> 10 27      test_var2  19.3
#> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, NULL,
                c('test_var', 'test_var2'), is_cumulative=FALSE)
#> Using wide data frame format for individual variables
#> # A tibble: 58 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var   8.23
#>  2 1       test_var2 17.5 
#>  3 16      test_var  10.4 
#>  4 16      test_var2 21.5 
#>  5 17      test_var   7.46
#>  6 17      test_var2 18.1 
#>  7 18      test_var   9.57
#>  8 18      test_var2 19.3 
#>  9 2       test_var   9.45
#> 10 2       test_var2 21.3 
#> # ... with 48 more rows

# using tall data frame
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df,
                'test_var2', is_cumulative=TRUE)
#> Using tall data frame format for individual variables
#> # A tibble: 29 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var2  19.6
#>  2 16      test_var2  19.1
#>  3 17      test_var2  19.2
#>  4 18      test_var2  19.5
#>  5 2       test_var2  20.4
#>  6 20      test_var2  19.6
#>  7 22      test_var2  17.6
#>  8 23      test_var2  22.3
#>  9 26      test_var2  19.3
#> 10 27      test_var2  19.3
#> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df,
                c('test_var', 'test_var2'), is_cumulative=TRUE)
#> Using tall data frame format for individual variables
#> # A tibble: 58 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var  10.1 
#>  2 1       test_var2 19.6 
#>  3 16      test_var   9.52
#>  4 16      test_var2 19.1 
#>  5 17      test_var  10.1 
#>  6 17      test_var2 19.2 
#>  7 18      test_var  11.1 
#>  8 18      test_var2 19.5 
#>  9 2       test_var  10.2 
#> 10 2       test_var2 20.4 
#> # ... with 48 more rows
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df,
                'test_var2', is_cumulative=FALSE)
#> Using tall data frame format for individual variables
#> # A tibble: 29 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var2  17.5
#>  2 16      test_var2  21.5
#>  3 17      test_var2  18.1
#>  4 18      test_var2  19.3
#>  5 2       test_var2  21.3
#>  6 20      test_var2  23.3
#>  7 22      test_var2  17.6
#>  8 23      test_var2  22.3
#>  9 26      test_var2  19.3
#> 10 27      test_var2  19.3
#> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df,
                c('test_var', 'test_var2'), is_cumulative=FALSE)
#> Using tall data frame format for individual variables
#> # A tibble: 58 x 3
#>    unit_id metric_id value
#>    <chr>   <chr>     <dbl>
#>  1 1       test_var   8.23
#>  2 1       test_var2 17.5 
#>  3 16      test_var  10.4 
#>  4 16      test_var2 21.5 
#>  5 17      test_var   7.46
#>  6 17      test_var2 18.1 
#>  7 18      test_var   9.57
#>  8 18      test_var2 19.3 
#>  9 2       test_var   9.45
#> 10 2       test_var2 21.3 
#> # ... with 48 more rows

Arguments

Value

Examples

Contents