Given individual level variables and an organisation structure, this function calculates aggregated metrics using either the cumulative approach (all individuals in that unit or its descendents) or the orthodox approach (individuals immediately associated with that unit only).

calc_summary_df(tg, df, tall_df = NULL, selected_vars,
  is_cumulative = FALSE)

Arguments

tg

tbl_graph that passes a check with check_tbl_graph_is_org

df

a data frame with columns named unit_id and invididual_id and (and optionally individual level variables) with one row per individual - see the indiv_df format in orgsurveyr-data-formats

tall_df

a data frame with columns named individual id, metric_id and value with multiple rows per individual - see the indiv_tall_df format in orgsurveyr-data-formats

selected_vars

names of variables in df or tall_df (character vector)

is_cumulative

whether to calculate cumulative or orthodox aggregations (logical)

Value

A tall data frame with three columns: unit_id, metric_id and value.

Examples

library(tidygraph)
#> #> Attaching package: ‘tidygraph’
#> The following object is masked from ‘package:testthat’: #> #> matches
#> The following object is masked from ‘package:stats’: #> #> filter
library(dplyr)
#> #> Attaching package: ‘dplyr’
#> The following object is masked from ‘package:testthat’: #> #> matches
#> The following objects are masked from ‘package:stats’: #> #> filter, lag
#> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union
set.seed(1231) tg1b <- create_realistic_org(4,3, prob=0.3, delete_units = TRUE) %>% simulate_unit_size
# NOT RUN { plot_org(tg1b) + geom_node_text(aes(label=unit_size), color='white') # }
tg1b_indiv_df <- tg1b %>% simulate_individuals_df() %>% mutate(test_var2 = purrr::map_dbl(individual_id, ~rnorm(1, 20,3))) tg1b_indiv_df
#> # A tibble: 98 x 5 #> individual_id individual_name unit_id test_var test_var2 #> <chr> <chr> <chr> <dbl> <dbl> #> 1 1 1_1 1 11.7 19.9 #> 2 2 1_2 1 6.61 14.7 #> 3 3 1_3 1 6.43 18.0 #> 4 4 2_1 2 11.5 22.0 #> 5 5 2_2 2 7.36 20.6 #> 6 6 4_1 4 8.51 17.7 #> 7 7 4_2 4 10.9 18.4 #> 8 8 5_1 5 8.43 19.9 #> 9 9 5_2 5 13.7 17.6 #> 10 10 6_1 6 15.0 22.4 #> # ... with 88 more rows
tg1b_indiv_tall_df <- tg1b_indiv_df %>% select(individual_id, test_var, test_var2) %>% tidyr::gather('metric_id', 'value', -individual_id) tg1b_indiv_tall_df
#> # A tibble: 196 x 3 #> individual_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 11.7 #> 2 2 test_var 6.61 #> 3 3 test_var 6.43 #> 4 4 test_var 11.5 #> 5 5 test_var 7.36 #> 6 6 test_var 8.51 #> 7 7 test_var 10.9 #> 8 8 test_var 8.43 #> 9 9 test_var 13.7 #> 10 10 test_var 15.0 #> # ... with 186 more rows
# using wide data frame calc_summary_df(tg1b, tg1b_indiv_df, NULL, 'test_var2', is_cumulative=TRUE)
#> Using wide data frame format for individual variables
#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 19.6 #> 2 16 test_var2 19.1 #> 3 17 test_var2 19.2 #> 4 18 test_var2 19.5 #> 5 2 test_var2 20.4 #> 6 20 test_var2 19.6 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, NULL, c('test_var', 'test_var2'), is_cumulative=TRUE)
#> Using wide data frame format for individual variables
#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 10.1 #> 2 1 test_var2 19.6 #> 3 16 test_var 9.52 #> 4 16 test_var2 19.1 #> 5 17 test_var 10.1 #> 6 17 test_var2 19.2 #> 7 18 test_var 11.1 #> 8 18 test_var2 19.5 #> 9 2 test_var 10.2 #> 10 2 test_var2 20.4 #> # ... with 48 more rows
calc_summary_df(tg1b, tg1b_indiv_df, NULL, 'test_var2', is_cumulative=FALSE)
#> Using wide data frame format for individual variables
#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 17.5 #> 2 16 test_var2 21.5 #> 3 17 test_var2 18.1 #> 4 18 test_var2 19.3 #> 5 2 test_var2 21.3 #> 6 20 test_var2 23.3 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, NULL, c('test_var', 'test_var2'), is_cumulative=FALSE)
#> Using wide data frame format for individual variables
#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 8.23 #> 2 1 test_var2 17.5 #> 3 16 test_var 10.4 #> 4 16 test_var2 21.5 #> 5 17 test_var 7.46 #> 6 17 test_var2 18.1 #> 7 18 test_var 9.57 #> 8 18 test_var2 19.3 #> 9 2 test_var 9.45 #> 10 2 test_var2 21.3 #> # ... with 48 more rows
# using tall data frame calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, 'test_var2', is_cumulative=TRUE)
#> Using tall data frame format for individual variables
#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 19.6 #> 2 16 test_var2 19.1 #> 3 17 test_var2 19.2 #> 4 18 test_var2 19.5 #> 5 2 test_var2 20.4 #> 6 20 test_var2 19.6 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, c('test_var', 'test_var2'), is_cumulative=TRUE)
#> Using tall data frame format for individual variables
#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 10.1 #> 2 1 test_var2 19.6 #> 3 16 test_var 9.52 #> 4 16 test_var2 19.1 #> 5 17 test_var 10.1 #> 6 17 test_var2 19.2 #> 7 18 test_var 11.1 #> 8 18 test_var2 19.5 #> 9 2 test_var 10.2 #> 10 2 test_var2 20.4 #> # ... with 48 more rows
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, 'test_var2', is_cumulative=FALSE)
#> Using tall data frame format for individual variables
#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 17.5 #> 2 16 test_var2 21.5 #> 3 17 test_var2 18.1 #> 4 18 test_var2 19.3 #> 5 2 test_var2 21.3 #> 6 20 test_var2 23.3 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rows
calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, c('test_var', 'test_var2'), is_cumulative=FALSE)
#> Using tall data frame format for individual variables
#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 8.23 #> 2 1 test_var2 17.5 #> 3 16 test_var 10.4 #> 4 16 test_var2 21.5 #> 5 17 test_var 7.46 #> 6 17 test_var2 18.1 #> 7 18 test_var 9.57 #> 8 18 test_var2 19.3 #> 9 2 test_var 9.45 #> 10 2 test_var2 21.3 #> # ... with 48 more rows