Given individual level variables and an organisation structure, this function calculates aggregated metrics using either the cumulative approach (all individuals in that unit or its descendents) or the orthodox approach (individuals immediately associated with that unit only).
calc_summary_df(tg, df, tall_df = NULL, selected_vars, is_cumulative = FALSE)
tg | tbl_graph that passes a check with check_tbl_graph_is_org |
---|---|
df | a data frame with columns named unit_id and invididual_id and
(and optionally individual level variables) with one row per individual - see the indiv_df format
in |
tall_df | a data frame with columns named individual id, metric_id and value with
multiple rows per individual - see the indiv_tall_df format in |
selected_vars | names of variables in df or tall_df (character vector) |
is_cumulative | whether to calculate cumulative or orthodox aggregations (logical) |
A tall data frame with three columns: unit_id, metric_id and value.
library(tidygraph)#> #>#>#> #>#>#> #>library(dplyr)#> #>#>#> #>#>#> #>#>#> #>set.seed(1231) tg1b <- create_realistic_org(4,3, prob=0.3, delete_units = TRUE) %>% simulate_unit_size# NOT RUN { plot_org(tg1b) + geom_node_text(aes(label=unit_size), color='white') # }tg1b_indiv_df <- tg1b %>% simulate_individuals_df() %>% mutate(test_var2 = purrr::map_dbl(individual_id, ~rnorm(1, 20,3))) tg1b_indiv_df#> # A tibble: 98 x 5 #> individual_id individual_name unit_id test_var test_var2 #> <chr> <chr> <chr> <dbl> <dbl> #> 1 1 1_1 1 11.7 19.9 #> 2 2 1_2 1 6.61 14.7 #> 3 3 1_3 1 6.43 18.0 #> 4 4 2_1 2 11.5 22.0 #> 5 5 2_2 2 7.36 20.6 #> 6 6 4_1 4 8.51 17.7 #> 7 7 4_2 4 10.9 18.4 #> 8 8 5_1 5 8.43 19.9 #> 9 9 5_2 5 13.7 17.6 #> 10 10 6_1 6 15.0 22.4 #> # ... with 88 more rowstg1b_indiv_tall_df <- tg1b_indiv_df %>% select(individual_id, test_var, test_var2) %>% tidyr::gather('metric_id', 'value', -individual_id) tg1b_indiv_tall_df#> # A tibble: 196 x 3 #> individual_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 11.7 #> 2 2 test_var 6.61 #> 3 3 test_var 6.43 #> 4 4 test_var 11.5 #> 5 5 test_var 7.36 #> 6 6 test_var 8.51 #> 7 7 test_var 10.9 #> 8 8 test_var 8.43 #> 9 9 test_var 13.7 #> 10 10 test_var 15.0 #> # ... with 186 more rows# using wide data frame calc_summary_df(tg1b, tg1b_indiv_df, NULL, 'test_var2', is_cumulative=TRUE)#>#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 19.6 #> 2 16 test_var2 19.1 #> 3 17 test_var2 19.2 #> 4 18 test_var2 19.5 #> 5 2 test_var2 20.4 #> 6 20 test_var2 19.6 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rows#>#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 10.1 #> 2 1 test_var2 19.6 #> 3 16 test_var 9.52 #> 4 16 test_var2 19.1 #> 5 17 test_var 10.1 #> 6 17 test_var2 19.2 #> 7 18 test_var 11.1 #> 8 18 test_var2 19.5 #> 9 2 test_var 10.2 #> 10 2 test_var2 20.4 #> # ... with 48 more rowscalc_summary_df(tg1b, tg1b_indiv_df, NULL, 'test_var2', is_cumulative=FALSE)#>#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 17.5 #> 2 16 test_var2 21.5 #> 3 17 test_var2 18.1 #> 4 18 test_var2 19.3 #> 5 2 test_var2 21.3 #> 6 20 test_var2 23.3 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rows#>#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 8.23 #> 2 1 test_var2 17.5 #> 3 16 test_var 10.4 #> 4 16 test_var2 21.5 #> 5 17 test_var 7.46 #> 6 17 test_var2 18.1 #> 7 18 test_var 9.57 #> 8 18 test_var2 19.3 #> 9 2 test_var 9.45 #> 10 2 test_var2 21.3 #> # ... with 48 more rows# using tall data frame calc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, 'test_var2', is_cumulative=TRUE)#>#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 19.6 #> 2 16 test_var2 19.1 #> 3 17 test_var2 19.2 #> 4 18 test_var2 19.5 #> 5 2 test_var2 20.4 #> 6 20 test_var2 19.6 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rowscalc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, c('test_var', 'test_var2'), is_cumulative=TRUE)#>#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 10.1 #> 2 1 test_var2 19.6 #> 3 16 test_var 9.52 #> 4 16 test_var2 19.1 #> 5 17 test_var 10.1 #> 6 17 test_var2 19.2 #> 7 18 test_var 11.1 #> 8 18 test_var2 19.5 #> 9 2 test_var 10.2 #> 10 2 test_var2 20.4 #> # ... with 48 more rowscalc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, 'test_var2', is_cumulative=FALSE)#>#> # A tibble: 29 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var2 17.5 #> 2 16 test_var2 21.5 #> 3 17 test_var2 18.1 #> 4 18 test_var2 19.3 #> 5 2 test_var2 21.3 #> 6 20 test_var2 23.3 #> 7 22 test_var2 17.6 #> 8 23 test_var2 22.3 #> 9 26 test_var2 19.3 #> 10 27 test_var2 19.3 #> # ... with 19 more rowscalc_summary_df(tg1b, tg1b_indiv_df, tg1b_indiv_tall_df, c('test_var', 'test_var2'), is_cumulative=FALSE)#>#> # A tibble: 58 x 3 #> unit_id metric_id value #> <chr> <chr> <dbl> #> 1 1 test_var 8.23 #> 2 1 test_var2 17.5 #> 3 16 test_var 10.4 #> 4 16 test_var2 21.5 #> 5 17 test_var 7.46 #> 6 17 test_var2 18.1 #> 7 18 test_var 9.57 #> 8 18 test_var2 19.3 #> 9 2 test_var 9.45 #> 10 2 test_var2 21.3 #> # ... with 48 more rows