Creating tables - Homework

The Assignment

Variables

  • The data are part of the exscidata package.
  • To load the data and check variable names:
library(tidyverse); library(gt); library(exscidata)

colnames(hypertrophy)
  [1] "PARTICIPANT"              "GROUP"                   
  [3] "AGE"                      "HEIGHT"                  
  [5] "TRAINING_AGE"             "BODYMASS_T2"             
  [7] "VL_T1"                    "VL_T2"                   
  [9] "VL_T3"                    "VL_T4"                   
 [11] "BICEPS_T1"                "BICEPS_T2"               
 [13] "BICEPS_T3"                "BICEPS_T4"               
 [15] "ECW_T1"                   "ECW_T2"                  
 [17] "ECW_T3"                   "ECW_T4"                  
 [19] "ICW_T1"                   "ICW_T2"                  
 [21] "ICW_T3"                   "ICW_T4"                  
 [23] "TBW_T1"                   "TBW_T2"                  
 [25] "TBW_T3"                   "TBW_T4"                  
 [27] "TOTAL_VOLUME_LOAD_WEEK_1" "TOTAL_VOLUME_LOAD_WEEK_2"
 [29] "TOTAL_VOLUME_LOAD_WEEK_3" "TOTAL_VOLUME_LOAD_WEEK_4"
 [31] "TOTAL_VOLUME_LOAD_WEEK_5" "TOTAL_VOLUME_LOAD_WEEK_6"
 [33] "CALS_WEEK_1"              "CALS_WEEK_2"             
 [35] "CALS_WEEK_3"              "CALS_WEEK_4"             
 [37] "CALS_WEEK_5"              "CALS_WEEK_6"             
 [39] "PROTEIN_WEEK_1"           "PROTEIN_WEEK_2"          
 [41] "PROTEIN_WEEK_3"           "PROTEIN_WEEK_4"          
 [43] "PROTEIN_WEEK_5"           "PROTEIN_WEEK_6"          
 [45] "CHO_WEEK_1"               "CHO_WEEK_2"              
 [47] "CHO_WEEK_3"               "CHO_WEEK_4"              
 [49] "CHO_WEEK_5"               "CHO_WEEK_6"              
 [51] "FAT_WEEK_1"               "FAT_WEEK_2"              
 [53] "FAT_WEEK_3"               "FAT_WEEK_4"              
 [55] "FAT_WEEK_5"               "FAT_WEEK_6"              
 [57] "TMD_T1"                   "TMD_T2"                  
 [59] "TMD_T3"                   "PPT_AVG_T1"              
 [61] "PPT_AVG_T2"               "PPT_AVG_T3"              
 [63] "BODYMASS_T1"              "BODYMASS_T3"             
 [65] "SQUAT_3RM"                "SQUAT_VOLUME"            
 [67] "CLUSTER"                  "PERCENT_TYPE_I_T1"       
 [69] "PERCENT_TYPE_II_T1"       "FAST_CSA_T1"             
 [71] "FAST_CSA_T2"              "FAST_CSA_T3"             
 [73] "SLOW_CSA_T1"              "SLOW_CSA_T2"             
 [75] "SLOW_CSA_T3"              "FAST_NUCLEI_T1"          
 [77] "FAST_NUCLEI_T2"           "FAST_NUCLEI_T3"          
 [79] "SLOW_NUCLEI_T1"           "SLOW_NUCLEI_T2"          
 [81] "SLOW_NUCLEI_T3"           "AR_PROTEIN_T1"           
 [83] "AR_PROTEIN_T2"            "AR_PROTEIN_T3"           
 [85] "PROTEASOME_T1"            "PROTEASOME_T2"           
 [87] "PROTEASOME_T3"            "GLYCOGEN_T1"             
 [89] "GLYCOGEN_T2"              "GLYCOGEN_T3"             
 [91] "CS_T1"                    "CS_T2"                   
 [93] "CS_T3"                    "CK_T1"                   
 [95] "CK_T2"                    "CK_T3"                   
 [97] "TESTOSTERONE_T1"          "TESTOSTERONE_T2"         
 [99] "TESTOSTERONE_T3"          "CORTISOL_T1"             
[101] "CORTISOL_T2"              "CORTISOL_T3"             
[103] "PAN4EBP1_T1"              "PAN4EBP1_T2"             
[105] "PAN4EBP1_T3"              "PHOSPHO4EBP1_T1"         
[107] "PHOSPHO4EBP1_T2"          "PHOSPHO4EBP1_T3"         
[109] "PANMTOR_T1"               "PANMTOR_T2"              
[111] "PANMTOR_T3"               "PHOSPHOMTOR_T1"          
[113] "PHOSPHOMTOR_T2"           "PHOSPHOMTOR_T3"          
[115] "PANAMPK_T1"               "PANAMPK_T2"              
[117] "PANAMPK_T3"               "PHOSPHOAMPK_T1"          
[119] "PHOSPHOAMPK_T2"           "PHOSPHOAMPK_T3"          
[121] "PANP70S6K_T1"             "PANP70S6K_T2"            
[123] "PANP70S6K_T3"             "PHOSPHOP70S6K_T1"        
[125] "PHOSPHOP70S6K_T2"         "PHOSPHOP70S6K_T3"        
[127] "PANPOLYUB_T1"             "PANPOLYUB_T2"            
[129] "PANPOLYUB_T3"             "RNA_T1"                  
[131] "RNA_T2"                   "RNA_T3"                  
[133] "MGF_T2T1_FOLD_CHANGE"     "MGF_T3T1_FOLD_CHANGE"    
[135] "MGF_T1"                   "MSTN_T2T1_FOLD_CHANGE"   
[137] "MSTN_T3T1_FOLD_CHANGE"    "MSTN_T1"                 
[139] "RNA45S_T1"                "RNA45S_T2T1_FOLD_CHANGE" 
[141] "RNA45S_T3T1_FOLD_CHANGE"  "DXA_LBM_T1"              
[143] "DXA_LBM_T2"               "DXA_LBM_T3"              
[145] "DXA_FM_T1"                "DXA_FM_T2"               
[147] "DXA_FM_T3"               

Variables in the table, some are missing from the data

hypertrophy %>%
        select(GROUP, AGE, 
               HEIGHT, 
               BODYMASS_T1, 
               DXA_LBM_T1, 
               DXA_FM_T1, 
               SQUAT_3RM) %>%
        head()
# A tibble: 6 × 7
  GROUP   AGE HEIGHT BODYMASS_T1 DXA_LBM_T1 DXA_FM_T1 SQUAT_3RM
  <chr> <dbl>  <dbl>       <dbl>      <dbl>     <dbl>     <dbl>
1 WP       22   186         91.6       69.6     17.3        152
2 WP       20   179         82         63.9     15.6        125
3 GWP      24   178         77.7       66.0      9.37       143
4 GWP      22   183         94.2       70.8     19.8        134
5 MALTO    19   169         71         56.2     12.1        138
6 WP       20   178.        83.4       57.5     22.6        147
## Missing variables: Bench press, Deadlift, Lat pulldown, Overhead press

Grouping and summarising

  • The table shows data per group and across all groups.
  • I will first create a summary of groups the across all groups.
group_wise <- hypertrophy %>%
                select(GROUP,AGE, HEIGHT, BODYMASS_T1,DXA_LBM_T1, DXA_FM_T1, 
                        SQUAT_3RM) %>%
        pivot_longer(names_to = "variable", 
                     values_to = "value", 
                     cols = AGE:SQUAT_3RM) %>%
        
        summarise(m = mean(value), 
                  s = sd(value), 
                  .by = c(GROUP, variable)) %>%
        print()
# A tibble: 18 × 4
   GROUP variable        m     s
   <chr> <chr>       <dbl> <dbl>
 1 WP    AGE          21.4  2.37
 2 WP    HEIGHT      178.   5.60
 3 WP    BODYMASS_T1  82.2  8.70
 4 WP    DXA_LBM_T1   63.7  7.11
 5 WP    DXA_FM_T1    15.2  4.15
 6 WP    SQUAT_3RM   134.  21.4 
 7 GWP   AGE          22.3  2.24
 8 GWP   HEIGHT      183.   7.39
 9 GWP   BODYMASS_T1  NA   NA   
10 GWP   DXA_LBM_T1   NA   NA   
11 GWP   DXA_FM_T1    NA   NA   
12 GWP   SQUAT_3RM    NA   NA   
13 MALTO AGE          20.7  1.57
14 MALTO HEIGHT      178.   9.56
15 MALTO BODYMASS_T1  81.4 10.7 
16 MALTO DXA_LBM_T1   62.1  8.44
17 MALTO DXA_FM_T1    16.1  3.55
18 MALTO SQUAT_3RM   126   17.2 

It seems we have missing data

  • Adding two variables to get more control
group_wise <- hypertrophy %>%
                select(GROUP,AGE, HEIGHT, BODYMASS_T1,DXA_LBM_T1, DXA_FM_T1, 
                        SQUAT_3RM) %>%
        pivot_longer(names_to = "variable", 
                     values_to = "value", 
                     cols = AGE:SQUAT_3RM) %>%
        
        summarise(m = mean(value, na.rm = TRUE), 
                  s = sd(value, na.rm = TRUE), 
                  miss_val = sum(is.na(value)),
                  n = n(),
                  .by = c(GROUP, variable)) %>%
        print()
# A tibble: 18 × 6
   GROUP variable        m     s miss_val     n
   <chr> <chr>       <dbl> <dbl>    <int> <int>
 1 WP    AGE          21.4  2.37        0    10
 2 WP    HEIGHT      178.   5.60        0    10
 3 WP    BODYMASS_T1  82.2  8.70        0    10
 4 WP    DXA_LBM_T1   63.7  7.11        0    10
 5 WP    DXA_FM_T1    15.2  4.15        0    10
 6 WP    SQUAT_3RM   134.  21.4         0    10
 7 GWP   AGE          22.3  2.24        0    11
 8 GWP   HEIGHT      183.   7.39        0    11
 9 GWP   BODYMASS_T1  85.1 15.0         1    11
10 GWP   DXA_LBM_T1   68.2 10.8         1    11
11 GWP   DXA_FM_T1    13.4  4.77        1    11
12 GWP   SQUAT_3RM   134.  20.6         1    11
13 MALTO AGE          20.7  1.57        0    10
14 MALTO HEIGHT      178.   9.56        0    10
15 MALTO BODYMASS_T1  81.4 10.7         0    10
16 MALTO DXA_LBM_T1   62.1  8.44        0    10
17 MALTO DXA_FM_T1    16.1  3.55        0    10
18 MALTO SQUAT_3RM   126   17.2         0    10

Wrangling to a wide format

group_wise <- group_wise %>%
        select(GROUP:s) %>%
        pivot_wider(names_from = GROUP, 
                    values_from = c(m ,s)) %>%
        
        select(variable, 
               m_WP, s_WP,
               m_GWP, s_GWP, 
               m_MALTO, s_MALTO) %>%
                
        print()
# A tibble: 6 × 7
  variable     m_WP  s_WP m_GWP s_GWP m_MALTO s_MALTO
  <chr>       <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>
1 AGE          21.4  2.37  22.3  2.24    20.7    1.57
2 HEIGHT      178.   5.60 183.   7.39   178.     9.56
3 BODYMASS_T1  82.2  8.70  85.1 15.0     81.4   10.7 
4 DXA_LBM_T1   63.7  7.11  68.2 10.8     62.1    8.44
5 DXA_FM_T1    15.2  4.15  13.4  4.77    16.1    3.55
6 SQUAT_3RM   134.  21.4  134.  20.6    126     17.2 

Summarising across groups

total <- hypertrophy %>%
                select(GROUP,AGE, HEIGHT, BODYMASS_T1,DXA_LBM_T1, DXA_FM_T1, 
                        SQUAT_3RM) %>%
        pivot_longer(names_to = "variable", 
                     values_to = "value", 
                     cols = AGE:SQUAT_3RM) %>%
        
        summarise(m = mean(value, na.rm = TRUE), 
                  s = sd(value, na.rm = TRUE), 
                  miss_val = sum(is.na(value)),
                  n = n(),
                  .by = c(variable)) %>%
        print()
# A tibble: 6 × 5
  variable        m     s miss_val     n
  <chr>       <dbl> <dbl>    <int> <int>
1 AGE          21.5  2.13        0    31
2 HEIGHT      180.   7.91        0    31
3 BODYMASS_T1  82.9 11.5         1    31
4 DXA_LBM_T1   64.7  9.02        1    31
5 DXA_FM_T1    14.9  4.20        1    31
6 SQUAT_3RM   131.  19.5         1    31

Combining group-wise and total summaries (1)

group_wise %>%
        inner_join(total) %>%
        print()
Joining with `by = join_by(variable)`
# A tibble: 6 × 11
  variable     m_WP  s_WP m_GWP s_GWP m_MALTO s_MALTO     m     s miss_val     n
  <chr>       <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl> <dbl>    <int> <int>
1 AGE          21.4  2.37  22.3  2.24    20.7    1.57  21.5  2.13        0    31
2 HEIGHT      178.   5.60 183.   7.39   178.     9.56 180.   7.91        0    31
3 BODYMASS_T1  82.2  8.70  85.1 15.0     81.4   10.7   82.9 11.5         1    31
4 DXA_LBM_T1   63.7  7.11  68.2 10.8     62.1    8.44  64.7  9.02        1    31
5 DXA_FM_T1    15.2  4.15  13.4  4.77    16.1    3.55  14.9  4.20        1    31
6 SQUAT_3RM   134.  21.4  134.  20.6    126     17.2  131.  19.5         1    31

Combining group-wise and total summaries (2)

cbind(group_wise, total[,c(2,3)])
     variable    m_WP      s_WP     m_GWP     s_GWP m_MALTO   s_MALTO         m
1         AGE  21.400  2.366432  22.27273  2.240130  20.700  1.567021  21.48387
2      HEIGHT 177.850  5.597867 183.45455  7.390719 177.750  9.563385 179.80645
3 BODYMASS_T1  82.190  8.699355  85.14000 14.951938  81.350 10.701947  82.89333
4  DXA_LBM_T1  63.732  7.107267  68.24200 10.840768  62.058  8.436904  64.67733
5   DXA_FM_T1  15.181  4.146110  13.39200  4.770725  16.101  3.546486  14.89133
6   SQUAT_3RM 134.500 21.422989 133.50000 20.560210 126.000 17.185265 131.33333
          s
1  2.127142
2  7.912730
3 11.451816
4  9.016394
5  4.196470
6 19.497716

Formatting the table (gt)

tbl <- group_wise %>%
        inner_join(total) %>%
        select(variable:s) %>%
        mutate(variable = factor(variable, 
                                 levels = c("AGE", "HEIGHT", "BODYMASS_T1", 
                                            "DXA_LBM_T1", "DXA_FM_T1", 
                                            "SQUAT_3RM"), 
                                 labels = c("Age (years)", 
                                            "Height (cm)", 
                                            "Weight (kg)", 
                                            "Total lean mass (kg)", 
                                            "Total fat mass (kg)", 
                                            "Squat 3RM (kg)"))) %>%
        gt() %>%
        fmt_number(columns = m_WP:s, 
                   decimals = 2) %>%
        cols_merge(columns = c(m_WP, s_WP), 
                   pattern = "{1} &plusmn; {2}") %>%
        cols_merge(columns = c(m_GWP, s_GWP), 
                   pattern = "{1} &plusmn; {2}") %>%
        cols_merge(columns = c(m_MALTO, s_MALTO), 
                   pattern = "{1} &plusmn; {2}") %>%
        cols_merge(columns = c(m, s), 
                   pattern = "{1} &plusmn; {2}")
Joining with `by = join_by(variable)`

Fixing column names and footnotes

# Calculate n participants per group
labels <- hypertrophy %>%
        summarise(.by = GROUP, 
                  n = n()) %>%
        add_row(GROUP = "Total")%>%
        
        mutate(n = if_else(is.na(n), sum(n, na.rm = TRUE), n),
               label = paste0(GROUP, " (n = ",n, ")")) 

## Store labels as a list
col_labs <- as.list(labels$label)
## Name the element of the list
names(col_labs) <- c("m_WP", "m_GWP",  "m_MALTO", "m")


tbl <- tbl %>%
        # cols_label accepts a list as input
        cols_label(.list = col_labs) %>%
        # cols_label also accepts <column name> = <label>
        cols_label(variable = "Variable")

Last touch

tbl %>%
        cols_align(align = "center", 
                   columns = starts_with("m")) %>%
         cols_align(align = "left", 
                   columns = starts_with("v"))  %>%
          tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels(everything())
  ) %>% 
        
        
        tab_footnote(footnote = md("*All data presented as means &plusmn; standard deviation values.*"))
Variable WP (n = 10) GWP (n = 11) MALTO (n = 10) Total (n = 31)
Age (years) 21.40 ± 2.37 22.27 ± 2.24 20.70 ± 1.57 21.48 ± 2.13
Height (cm) 177.85 ± 5.60 183.45 ± 7.39 177.75 ± 9.56 179.81 ± 7.91
Weight (kg) 82.19 ± 8.70 85.14 ± 14.95 81.35 ± 10.70 82.89 ± 11.45
Total lean mass (kg) 63.73 ± 7.11 68.24 ± 10.84 62.06 ± 8.44 64.68 ± 9.02
Total fat mass (kg) 15.18 ± 4.15 13.39 ± 4.77 16.10 ± 3.55 14.89 ± 4.20
Squat 3RM (kg) 134.50 ± 21.42 133.50 ± 20.56 126.00 ± 17.19 131.33 ± 19.50
All data presented as means ± standard deviation values.
?tab_style
starting httpd help server ... done