Christopher M. Uyehara



Preamble

This document, knitted by RStudio under the all-knowing gaze of the great Hadley Wickham (blessed be his name, long may he reign) is a record of a great battle that took place in the year two-thousand and twenty-two of the common era between the members of two star-touched labs. We fought. We cheered. We yelled. We cursed. And then, once the day was done, we made peace. For in the end, though each began the bout with sneers and bold boasts of our acuity and wisdom, each us was ultimately forced to confront the terrors our own ignorance. Not a single one was perfect. Not a single one saw the true end!

Alas, dear reader, it is tempting, but folly, to find fault in our stars. Far greater men and woman have wrought far greater feats in the face of far greater adversity than we humble few. No! We embrace our failures as our own and, through that embrace, put aside our animosity and embrace each other as well. Competitors no longer, we stand tall as one–united in defeat.

So say we all. (So say we all).

library(conflicted)
library(dplyr)
library(magrittr)
library(ggupset)
library(ggside)
library(glue)
library(here)
library(patchwork)
library(stringr)
library(tidyr)
library(purrr)
library(forcats)
library(gtExtras)
library(gt)
library(gridExtra)
library(eulerr)
library(ggtext)

outTheme = theme_bw() + 
  theme(text = element_text(size = 14, color = 'black'),
        axis.text = element_text(color = 'black') )

theme_set(outTheme)
plotBaseBracket = function(bracket.base){
  bracket.plot = bracket.base$horiz %>%
    ggplot(aes(y = y, x = x)) +
    geom_segment(data = bracket.base$horiz, aes(x = x, xend = xend, y = y, yend = y )) + 
    geom_segment(data = bracket.base$vert, aes(xend = xend, yend = yend)) + 
    geom_label(data = bracket.base$labs, aes(label = label), label.size = NA, fill = 'white') + 
    scale_y_reverse() +
    theme_void() + 
    theme(legend.position = 'none',
          plot.title = element_text(hjust = 0.5, size = 16),
          plot.subtitle = element_text(hjust = 0.5, size = 13)) + 
    NULL  
  
 return(bracket.plot) 
}

checkPicks = function(picks, actual.df, round){
  actual.df = actual.df %>%
    select(match, id, label, winLose) %>%
    rename(actual = 'label', win.actual = 'winLose')
  
  picks = picks %>%
    left_join(actual.df, by = c('match', 'id') ) %>%
    dplyr::filter(actual != '') 
  
  if(round != 'F' & round != '3P'){
    picks = picks %>%
      mutate(check = case_when(label == actual ~ 'Correct',
                               label != actual & label %in% actual.df$actual ~ 'In round, wrong pos',
                               T ~ 'Wrong'))      
  } else{
    winner.actual = dplyr::filter(actual.df, win.actual == 'W')
    loser.actual = dplyr::filter(actual.df, win.actual != 'W')
    #print(loser.actual$actual)
    
    picks = picks %>%
      mutate(check = case_when(winLose == 'W' & label == winner.actual$actual ~ 'Correct',
                               winLose == '' & label == loser.actual$actual ~ 'Correct',
                               label %in% actual.df$actual ~ 'In round, wrong pos',
                               T ~ 'Wrong'))
  }

  return(picks) 
}

addWinLose = function(bracket.plot, bracket.df, title){
  bracket.plot +
    geom_text(data = bracket.df, aes(label = label, fontface = font, color = winLose),
              size = 14/.pt) + 
    labs(title = title, subtitle = 'World Cup 2022') + 
    scale_color_manual(values = c('black', 'magenta3')) + 
    NULL
}

addCorrectIncorrect = function(bracket.plot, bracket.df, title){
  bracket.plot +
    geom_text(data = bracket.df, aes(label = label, color = check),
              size = 14/.pt)  + 
    labs(title = title, subtitle = 'World Cup 2022') + 
    scale_color_manual(values = c('springgreen4', 'goldenrod3', 'red2')) + 
    theme(legend.position = 'bottom') + 
    guides(color = guide_legend(title.position = 'top', title.hjust = 0.5))
}
points.df = list(
  Ro16 = list(Correct = 2, `In round, wrong pos` = 1, Wrong = 0),
  QF = list(Correct = 4, `In round, wrong pos` = 2, Wrong = 0),
  SF = list(Correct = 8, `In round, wrong pos` = 4, Wrong = 0),
  `F` = list(Correct = 10, `In round, wrong pos` = 8, Wrong = 0),
  `3P` = list(Correct = 5, `In round, wrong pos` = 4, Wrong = 0)
  ) %>%
  dplyr::bind_rows(.id = 'round') %>%
  pivot_longer(-round, names_to = 'check', values_to = 'pts_per_round')

arabicNames = readxl::read_xlsx('./Assets/Data/players.xlsx')

countries = read.csv('./Assets/Data/countries.csv')

bracket.base = list(horiz = './Assets/Data/bracket_horizSeg.csv', 
               vert = './Assets/Data/bracket_vertSeg.csv',
               labs = './Assets/Data/bracket_labs.csv' ) %>%
  lapply(., read.csv)

base.plot = plotBaseBracket(bracket.base)

fifa_picks = read.csv('./Assets/Data/fifa_picks.csv') %>%
  mutate(font = ifelse(winLose == 'W', 'bold', 'plain')) %>%
  dplyr::left_join(., arabicNames, by = 'name') %>%
  dplyr::left_join(countries, by = c('label' = 'country')) %>%
  mutate(title = glue('{name} {arabic}')) %>%
  nest(dat = -c(name, title)) %>%
  mutate(dat = setNames(dat, name)) %>%
  arrange(name) %>%
  mutate(brackets = map2(dat, title, ~addWinLose(base.plot, bracket.df = .x, title = .y)))

Brackets

Note: There was a mixup with the printed brackets that were used for the competition. The semi-finals were transposed, causing some peoples’ brackets to have impossible choices.

Highlighted by predicted winner

fifa_picks %$%
  walk2(name, brackets, function(n, b){
    cat(paste0('### ', n, '\n'))
    print(b)
    cat('\n\n')
  })

Actual

Alejandra

Alex

Chris

Dylan

Effie

Kaushiki

Kritika

Ly-Sha

Matthias

Subhashini

UkJin

Highlighted by correctness

pick_check = fifa_picks %>%
  unnest(dat) %>%
  mutate(round = case_when(match <= 8 ~ 'Ro16',
                           match <= 12  ~ 'QF',
                           match <= 14 ~ 'SF',
                           match == 15 ~ 'F',
                           match == 16 ~ '3P')) %>%
  nest(dat = -c(name, title, round)) %>%
  dplyr::group_by(round) %>%
  dplyr::mutate(actual = dat[which(name == 'Actual')]) %>%
  mutate(dat = pmap(list(dat, actual, round), ~checkPicks(..1, ..2, ..3))) %>%
  select(-actual) %>%
  unnest(dat) %>%
  nest(dat = -c(name, title)) %>%
  mutate(brackets = map2(dat, title, ~addCorrectIncorrect(base.plot, bracket.df = .x, title = .y)))
pick_check %$%
  walk2(name, brackets, function(n, b){
    cat(paste0('### ', n, '\n'))
    print(b)
    cat('\n\n')
  })

Actual

Alejandra

Alex

Chris

Dylan

Effie

Kaushiki

Kritika

Ly-Sha

Matthias

Subhashini

UkJin

Summary Table

pick_check_summary = pick_check %>%
  select(-brackets) %>%
  unnest(dat) %>%
  select(name, id, label, check)  %>%
  dplyr::mutate(round = case_when(str_detect(id, 'QF') ~ 'QF',
                                  str_detect(id, 'SF') ~ 'SF',
                                  str_detect(id, 'F.[AB]') ~ 'F',
                                  str_detect(id, '3P') ~ '3P',
                                  T ~ 'Ro16'
                                  ) ) %>%
  dplyr::count(name, round, check) %>%
  left_join(points.df, by = c('round', 'check')) %>%
  mutate(points = n * pts_per_round) %>%
  group_by(name) %>%
  mutate(`Total Points` = sum(points)) %>%
  group_by(name, round) %>%
  mutate(points = sum(points)) %>%
  ungroup() %>%
  select(-pts_per_round ) %>%
  pivot_wider(names_from = check, values_from = c(n), values_fill = 0) %>%
  mutate(round = factor(round, levels = c('Ro16', 'QF', 'SF', 'F', '3P'))) %>%
  arrange(name, as.numeric(round)) %>%
  relocate(Wrong, .after = dplyr::last_col()) %>%
  relocate(c(points, `Total Points`),  .after = dplyr::last_col()) %>%
  dplyr::mutate(`Total Points` = ifelse(round == 'Ro16', `Total Points`, NA_integer_))
pick_check_summary %>%
  dplyr::rename("In Round,\nWrong Pos" = "In round, wrong pos") %>%
  group_by(name) %>%
  mutate(tp.num = sum(points)) %>%
  ungroup() %>%
  dplyr::mutate(name = fct_reorder(name, tp.num))  %>%
  arrange(-as.numeric(name)) %>%
  select(-tp.num) %>%
  group_by(name) %>%
  dplyr::rename('Round' = 'round') %>%
  gt() %>%
  gt_color_rows(., Correct, palette =  colorRampPalette(c('white', 'springgreen2'))(50)) %>%
  gt_color_rows(.,"In Round,\nWrong Pos", palette =  colorRampPalette(c('white', 'goldenrod'))(50)) %>%
  gt_color_rows(., Wrong, palette =  colorRampPalette(c('white', 'red'))(50))  %>%
  sub_missing(`Total Points`, missing_text = '') %>%
  data_color(., `Total Points`,
             colors = scales::col_numeric(palette = colorRampPalette(c('white', 'dodgerblue'))(50),
                                          domain =  range(pick_check_summary$`Total Points`, na.rm = T),
                                          na.color = 'transparent'
                                          )) %>%
  cols_width(`Wrong` ~ px(90), Correct ~ px(75)) %>%
  tab_options(table.align = 'left', row_group.as_column = T) %>%
  gt_theme_538() 
Round Correct In Round, Wrong Pos Wrong points Total Points
Actual Ro16 16 0 0 32 126
QF 8 0 0 32
SF 4 0 0 32
F 2 0 0 20
3P 2 0 0 10
Kaushiki Ro16 8 2 6 18 64
QF 5 0 3 20
SF 2 0 2 16
F 1 0 1 10
3P 0 0 2 0
Chris Ro16 8 2 6 18 64
QF 5 0 3 20
SF 2 0 2 16
F 1 0 1 10
3P 0 0 2 0
Ly-Sha Ro16 7 3 6 17 60
QF 4 1 3 18
SF 1 1 2 12
F 0 1 1 8
3P 1 0 1 5
Kritika Ro16 12 1 3 25 55
QF 6 1 1 26
SF 0 1 3 4
F 0 0 2 0
3P 0 0 2 0
Alejandra Ro16 9 3 4 21 49
QF 3 0 5 12
SF 1 0 3 8
F 0 1 1 8
3P 0 0 2 0
Subhashini Ro16 5 5 6 15 41
QF 1 2 5 8
SF 1 0 3 8
F 1 0 1 10
3P 0 0 2 0
Alex Ro16 9 1 6 19 39
QF 5 0 3 20
SF 0 0 4 0
F 0 0 2 0
3P 0 0 2 0
UkJin Ro16 8 2 6 18 32
QF 3 1 4 14
SF 0 0 4 0
F 0 0 2 0
3P 0 0 2 0
Effie Ro16 7 4 5 18 32
QF 3 1 4 14
SF 0 0 4 0
F 0 0 2 0
3P 0 0 2 0
Matthias Ro16 6 4 6 16 30
QF 3 1 4 14
SF 0 0 4 0
F 0 0 2 0
3P 0 0 2 0
Dylan Ro16 5 4 7 14 20
QF 0 3 5 6
SF 0 0 4 0
F 0 0 2 0
3P 0 0 2 0

Analysis

Composition of brackets

fifa_picks.regSummary = fifa_picks %>%
  unnest(dat) %>% 
  dplyr::mutate(person.region = ifelse(name == 'Actual', 'Actual', person.region)) %>%
  dplyr::filter(!is.na(country.region)) %>%
  select(name, match, winLose, person.region, country.region) 

actualOrd = fifa_picks.regSummary %>%
  dplyr::filter(person.region == 'Actual') %>%
  dplyr::count(country.region) %>%
  arrange(n) %>%
  extract2('country.region')

Were people more likely to fill their brackets with countries from their region of origin?

fifa_picks.regSummary %>%
  dplyr::count(person.region, country.region) %>%
  dplyr::mutate(country.region = factor(country.region, levels = actualOrd)) %>%
  ggplot(aes(x = person.region, y = n, fill = country.region)) + 
  geom_col(position = 'fill') + 
  scale_fill_manual(values = MetBrewer::met.brewer('Egypt', 5)) +
  labs(title = 'Proportion each region was found within\neach bracket',
       x = "Lab Member", y = '', fill = 'Team Region') + 
  NULL

How representative was each individual bracket?

fifa_picks.regSummary %>%
  dplyr::count(name, person.region, country.region) %>%
  dplyr::mutate(country.region = factor(country.region, levels = actualOrd)) %>%
  ggplot(aes(x = name, y = n, fill = country.region)) + 
  geom_col(position = 'fill') + 
  scale_fill_manual(values = MetBrewer::met.brewer('Egypt', 5)) +
  labs(title = 'Proportion each region was found within bracket',
    x = "Lab Member", y = '', fill = 'Team Region') + 
  NULL

Total Points

Were there any sex-specific differences in bracket accuracy?

totalPts.mf = pick_check_summary %>%
  dplyr::filter(!is.na(`Total Points`)) %>%
  dplyr::left_join(., arabicNames, by = 'name')  %>%
  dplyr::select(-arabic) %>%
  dplyr::filter(name != 'Actual') %>%
  dplyr::mutate(m.f = ifelse(m.f == 'F', 'Female', 'Male')) %>%
  dplyr::mutate(m.f = factor(m.f, levels = c('Female', 'Male'))) 

mf.tTest.vect = totalPts.mf %>%
  dplyr::select(m.f, `Total Points`) %>%
  split(., .$m.f)

mf.tTest = t.test(x = mf.tTest.vect$Female$`Total Points`, 
       y = mf.tTest.vect$Male$`Total Points`, alternative = "two.sided")

mf.test.df = data.frame(group1 = 'Male', group2 = 'Female', label = signif(mf.tTest$p.value, 2), y.position = 70)
totalPts.mf %>%
  ggplot(aes(x = m.f, y = `Total Points`)) + 
  geom_violin() + 
  geom_boxplot(width = 0.2, aes(fill = m.f)) + 
  ggprism::add_pvalue(mf.test.df) + 
  scale_fill_manual(values = c('#5dbdfc', '#f573f2' )) + 
  labs(
    x = '', 
    fill = 'Female / Male',
    title = "<span style = 'color:#5dbdfc'>**Females**</span> appeared to have higher point totals<br>than <span style = 'color:#f573f2'>**Males**</span>,
    but the result is **not** statistically<br>significant.",
    ) +
  theme(plot.title = element_textbox(), plot.caption = element_text(hjust = 0))