Create a crosstable with integer and percentages together in the same cell

70 views Asked by At

I filter humans in the starwars dataset. Using the ‘sex’ and ‘skin_color’ columns in the humans dataset, I want to create the crosstable as follows:

enter image description here

my trial:

# Load the starwars dataset 
library(janitor)
library(dplyr)
library(tibble)
starwars_data <- as_tibble(starwars)

# Subset the humans from the starwars dataset
humans_data <- starwars_data %>%
  filter(species == "Human")

# Create the crosstab
crosstab <- table(humans_data$sex, humans_data$skin_color)

# Add row and column totals
crosstab <- addmargins(crosstab)

# Calculate row percentages
row_percentages <- prop.table(crosstab, margin = 1) * 100

# Combine the crosstab and row percentages
crosstab_with_percentages <- cbind(crosstab, row_percentages)

# Print the result
print(crosstab_with_percentages)

dark fair light none pale tan white Sum     dark     fair     light     none     pale      tan    white
female    0    3     5    1    0   0     0   9 0.000000 16.66667 27.777778 5.555556 0.000000 0.000000 0.000000
male      4   13     5    0    1   2     1  26 7.692308 25.00000  9.615385 0.000000 1.923077 3.846154 1.923077
Sum       4   16    10    1    1   2     1  35 5.714286 22.85714 14.285714 1.428571 1.428571 2.857143 1.428571
       Sum
female  50
male    50
Sum     50
3

There are 3 answers

0
Allan Cameron On BEST ANSWER

You could do this as a pedestrian data-wrangling task in tidyverse:

library(tidyverse)

starwars %>%
  filter(species == "Human") %>%
  with(table(sex, skin_color)) %>%
  as.data.frame() %>%
  bind_rows(
    bind_cols(tibble(sex = "Total"),
              summarise(., Freq = sum(Freq), .by = skin_color))) %>%
  mutate(prop = Freq/sum(Freq), .by = sex) %>%
  mutate(prop = paste0(Freq, " (", scales::percent(prop, 1), ")")) %>%
  select(-Freq) %>%
  pivot_wider(names_from = skin_color, values_from = prop) %>%
  rowwise() %>%
  mutate(Sum = sum(sapply(strsplit(c_across(-1), " "),
                          \(x) as.numeric(x[1])))) %>%
  mutate(Sum = paste(Sum, "(100%)")) %>%
  rename(`sex / skin_color` = sex) %>%
  as.data.frame(check.names = FALSE)
#>   sex / skin_color    dark     fair    light   pale    tan  white       Sum
#> 1           female  0 (0%)  3 (33%)  6 (67%) 0 (0%) 0 (0%) 0 (0%)  9 (100%)
#> 2             male 4 (15%) 13 (50%)  5 (19%) 1 (4%) 2 (8%) 1 (4%) 26 (100%)
#> 3            Total 4 (11%) 16 (46%) 11 (31%) 1 (3%) 2 (6%) 1 (3%) 35 (100%)

Created on 2024-02-01 with reprex v2.0.2

0
lotus On

The janitor package that you've already loaded in your example is designed for this. You can do:

library(janitor)
library(dplyr)

starwars %>%
  filter(species == "Human") %>%
  tabyl(sex, skin_color) %>%
  adorn_totals(where = c("row", "col")) %>%
  adorn_percentages() %>%
  adorn_pct_formatting(digits = 0) %>%
  adorn_ns("front") %>%
  rename(`sex / skin color` = sex)

 sex / skin color    dark     fair    light    none   pale    tan  white     Total
           female 0  (0%)  3 (33%)  5 (56%) 1 (11%) 0 (0%) 0 (0%) 0 (0%)  9 (100%)
             male 4 (15%) 13 (50%)  5 (19%) 0  (0%) 1 (4%) 2 (8%) 1 (4%) 26 (100%)
            Total 4 (11%) 16 (46%) 10 (29%) 1  (3%) 1 (3%) 2 (6%) 1 (3%) 35 (100%)
0
thelatemail On

A base R table/prop.table attempt for fun:

tab   <- addmargins(
    table(starwars_data[starwars_data$species == "Human", c("sex","skin_color")])
)
ptab  <- cbind(round(prop.table(tab[, -ncol(tab)], 1) * 100, 0), Sum=100)
tab[] <- paste0(tab, " (", ptab, "%)")
print(tab, right=TRUE)

##        skin_color
##sex         dark     fair    light   pale    tan  white       Sum
##  female  0 (0%)  3 (33%)  6 (67%) 0 (0%) 0 (0%) 0 (0%)  9 (100%)
##  male   4 (15%) 13 (50%)  5 (19%) 1 (4%) 2 (8%) 1 (4%) 26 (100%)
##  Sum    4 (11%) 16 (46%) 11 (31%) 1 (3%) 2 (6%) 1 (3%) 35 (100%)