Christopher M. Uyehara

Preamble

This document, knitted by RStudio under the all-knowing gaze of the great Hadley Wickham (blessed be his name, long may he reign) is a record of a great battle that took place in the year two-thousand and twenty-two of the common era between the members of two star-touched labs. We fought. We cheered. We yelled. We cursed. And then, once the day was done, we made peace. For in the end, though each began the bout with sneers and bold boasts of our acuity and wisdom, each us was ultimately forced to confront the terrors our own ignorance. Not a single one was perfect. Not a single one saw the true end!

Alas, dear reader, it is tempting, but folly, to find fault in our stars. Far greater men and woman have wrought far greater feats in the face of far greater adversity than we humble few. No! We embrace our failures as our own and, through that embrace, put aside our animosity and embrace each other as well. Competitors no longer, we stand tall as one–united in defeat.

So say we all. (So say we all).

library(conflicted)
library(dplyr)
library(magrittr)
library(ggupset)
library(ggside)
library(glue)
library(here)
library(patchwork)
library(stringr)
library(tidyr)
library(purrr)
library(forcats)
library(gtExtras)
library(gt)
library(gridExtra)
library(eulerr)
library(ggtext)

outTheme = theme_bw() + 
  theme(text = element_text(size = 14, color = 'black'),
        axis.text = element_text(color = 'black') )

theme_set(outTheme)

plotBaseBracket = function(bracket.base){
  bracket.plot = bracket.base$horiz %>%
    ggplot(aes(y = y, x = x)) +
    geom_segment(data = bracket.base$horiz, aes(x = x, xend = xend, y = y, yend = y )) + 
    geom_segment(data = bracket.base$vert, aes(xend = xend, yend = yend)) + 
    geom_label(data = bracket.base$labs, aes(label = label), label.size = NA, fill = 'white') + 
    scale_y_reverse() +
    theme_void() + 
    theme(legend.position = 'none',
          plot.title = element_text(hjust = 0.5, size = 16),
          plot.subtitle = element_text(hjust = 0.5, size = 13)) + 
    NULL  
  
 return(bracket.plot) 
}

checkPicks = function(picks, actual.df, round){
  actual.df = actual.df %>%
    select(match, id, label, winLose) %>%
    rename(actual = 'label', win.actual = 'winLose')
  
  picks = picks %>%
    left_join(actual.df, by = c('match', 'id') ) %>%
    dplyr::filter(actual != '') 
  
  if(round != 'F' & round != '3P'){
    picks = picks %>%
      mutate(check = case_when(label == actual ~ 'Correct',
                               label != actual & label %in% actual.df$actual ~ 'In round, wrong pos',
                               T ~ 'Wrong'))      
  } else{
    winner.actual = dplyr::filter(actual.df, win.actual == 'W')
    loser.actual = dplyr::filter(actual.df, win.actual != 'W')
    #print(loser.actual$actual)
    
    picks = picks %>%
      mutate(check = case_when(winLose == 'W' & label == winner.actual$actual ~ 'Correct',
                               winLose == '' & label == loser.actual$actual ~ 'Correct',
                               label %in% actual.df$actual ~ 'In round, wrong pos',
                               T ~ 'Wrong'))
  }

  return(picks) 
}

addWinLose = function(bracket.plot, bracket.df, title){
  bracket.plot +
    geom_text(data = bracket.df, aes(label = label, fontface = font, color = winLose),
              size = 14/.pt) + 
    labs(title = title, subtitle = 'World Cup 2022') + 
    scale_color_manual(values = c('black', 'magenta3')) + 
    NULL
}

addCorrectIncorrect = function(bracket.plot, bracket.df, title){
  bracket.plot +
    geom_text(data = bracket.df, aes(label = label, color = check),
              size = 14/.pt)  + 
    labs(title = title, subtitle = 'World Cup 2022') + 
    scale_color_manual(values = c('springgreen4', 'goldenrod3', 'red2')) + 
    theme(legend.position = 'bottom') + 
    guides(color = guide_legend(title.position = 'top', title.hjust = 0.5))
}

points.df = list(
  Ro16 = list(Correct = 2, `In round, wrong pos` = 1, Wrong = 0),
  QF = list(Correct = 4, `In round, wrong pos` = 2, Wrong = 0),
  SF = list(Correct = 8, `In round, wrong pos` = 4, Wrong = 0),
  `F` = list(Correct = 10, `In round, wrong pos` = 8, Wrong = 0),
  `3P` = list(Correct = 5, `In round, wrong pos` = 4, Wrong = 0)
  ) %>%
  dplyr::bind_rows(.id = 'round') %>%
  pivot_longer(-round, names_to = 'check', values_to = 'pts_per_round')

arabicNames = readxl::read_xlsx('./Assets/Data/players.xlsx')

countries = read.csv('./Assets/Data/countries.csv')

bracket.base = list(horiz = './Assets/Data/bracket_horizSeg.csv', 
               vert = './Assets/Data/bracket_vertSeg.csv',
               labs = './Assets/Data/bracket_labs.csv' ) %>%
  lapply(., read.csv)

base.plot = plotBaseBracket(bracket.base)

fifa_picks = read.csv('./Assets/Data/fifa_picks.csv') %>%
  mutate(font = ifelse(winLose == 'W', 'bold', 'plain')) %>%
  dplyr::left_join(., arabicNames, by = 'name') %>%
  dplyr::left_join(countries, by = c('label' = 'country')) %>%
  mutate(title = glue('{name} {arabic}')) %>%
  nest(dat = -c(name, title)) %>%
  mutate(dat = setNames(dat, name)) %>%
  arrange(name) %>%
  mutate(brackets = map2(dat, title, ~addWinLose(base.plot, bracket.df = .x, title = .y)))

Brackets

Note: There was a mixup with the printed brackets that were used for the competition. The semi-finals were transposed, causing some peoples’ brackets to have impossible choices.

Highlighted by predicted winner

fifa_picks %$%
  walk2(name, brackets, function(n, b){
    cat(paste0('### ', n, '\n'))
    print(b)
    cat('\n\n')
  })

Actual

Alejandra

Alex

Chris

Dylan

Effie

Kaushiki

Kritika

Ly-Sha

Matthias

Subhashini

UkJin

Highlighted by correctness

pick_check = fifa_picks %>%
  unnest(dat) %>%
  mutate(round = case_when(match <= 8 ~ 'Ro16',
                           match <= 12  ~ 'QF',
                           match <= 14 ~ 'SF',
                           match == 15 ~ 'F',
                           match == 16 ~ '3P')) %>%
  nest(dat = -c(name, title, round)) %>%
  dplyr::group_by(round) %>%
  dplyr::mutate(actual = dat[which(name == 'Actual')]) %>%
  mutate(dat = pmap(list(dat, actual, round), ~checkPicks(..1, ..2, ..3))) %>%
  select(-actual) %>%
  unnest(dat) %>%
  nest(dat = -c(name, title)) %>%
  mutate(brackets = map2(dat, title, ~addCorrectIncorrect(base.plot, bracket.df = .x, title = .y)))

pick_check %$%
  walk2(name, brackets, function(n, b){
    cat(paste0('### ', n, '\n'))
    print(b)
    cat('\n\n')
  })

Actual

Alejandra

Alex

Chris

Dylan

Effie

Kaushiki

Kritika

Ly-Sha

Matthias

Subhashini

UkJin

Summary Table

pick_check_summary = pick_check %>%
  select(-brackets) %>%
  unnest(dat) %>%
  select(name, id, label, check)  %>%
  dplyr::mutate(round = case_when(str_detect(id, 'QF') ~ 'QF',
                                  str_detect(id, 'SF') ~ 'SF',
                                  str_detect(id, 'F.[AB]') ~ 'F',
                                  str_detect(id, '3P') ~ '3P',
                                  T ~ 'Ro16'
                                  ) ) %>%
  dplyr::count(name, round, check) %>%
  left_join(points.df, by = c('round', 'check')) %>%
  mutate(points = n * pts_per_round) %>%
  group_by(name) %>%
  mutate(`Total Points` = sum(points)) %>%
  group_by(name, round) %>%
  mutate(points = sum(points)) %>%
  ungroup() %>%
  select(-pts_per_round ) %>%
  pivot_wider(names_from = check, values_from = c(n), values_fill = 0) %>%
  mutate(round = factor(round, levels = c('Ro16', 'QF', 'SF', 'F', '3P'))) %>%
  arrange(name, as.numeric(round)) %>%
  relocate(Wrong, .after = dplyr::last_col()) %>%
  relocate(c(points, `Total Points`),  .after = dplyr::last_col()) %>%
  dplyr::mutate(`Total Points` = ifelse(round == 'Ro16', `Total Points`, NA_integer_))

pick_check_summary %>%
  dplyr::rename("In Round,\nWrong Pos" = "In round, wrong pos") %>%
  group_by(name) %>%
  mutate(tp.num = sum(points)) %>%
  ungroup() %>%
  dplyr::mutate(name = fct_reorder(name, tp.num))  %>%
  arrange(-as.numeric(name)) %>%
  select(-tp.num) %>%
  group_by(name) %>%
  dplyr::rename('Round' = 'round') %>%
  gt() %>%
  gt_color_rows(., Correct, palette =  colorRampPalette(c('white', 'springgreen2'))(50)) %>%
  gt_color_rows(.,"In Round,\nWrong Pos", palette =  colorRampPalette(c('white', 'goldenrod'))(50)) %>%
  gt_color_rows(., Wrong, palette =  colorRampPalette(c('white', 'red'))(50))  %>%
  sub_missing(`Total Points`, missing_text = '') %>%
  data_color(., `Total Points`,
             colors = scales::col_numeric(palette = colorRampPalette(c('white', 'dodgerblue'))(50),
                                          domain =  range(pick_check_summary$`Total Points`, na.rm = T),
                                          na.color = 'transparent'
                                          )) %>%
  cols_width(`Wrong` ~ px(90), Correct ~ px(75)) %>%
  tab_options(table.align = 'left', row_group.as_column = T) %>%
  gt_theme_538()

	Round	Correct	In Round, Wrong Pos	Wrong	points	Total Points
Actual	Ro16	16	0	0	32	126
	QF	8	0	0	32
	SF	4	0	0	32
	F	2	0	0	20
	3P	2	0	0	10
Kaushiki	Ro16	8	2	6	18	64
	QF	5	0	3	20
	SF	2	0	2	16
	F	1	0	1	10
	3P	0	0	2	0
Chris	Ro16	8	2	6	18	64
	QF	5	0	3	20
	SF	2	0	2	16
	F	1	0	1	10
	3P	0	0	2	0
Ly-Sha	Ro16	7	3	6	17	60
	QF	4	1	3	18
	SF	1	1	2	12
	F	0	1	1	8
	3P	1	0	1	5
Kritika	Ro16	12	1	3	25	55
	QF	6	1	1	26
	SF	0	1	3	4
	F	0	0	2	0
	3P	0	0	2	0
Alejandra	Ro16	9	3	4	21	49
	QF	3	0	5	12
	SF	1	0	3	8
	F	0	1	1	8
	3P	0	0	2	0
Subhashini	Ro16	5	5	6	15	41
	QF	1	2	5	8
	SF	1	0	3	8
	F	1	0	1	10
	3P	0	0	2	0
Alex	Ro16	9	1	6	19	39
	QF	5	0	3	20
	SF	0	0	4	0
	F	0	0	2	0
	3P	0	0	2	0
UkJin	Ro16	8	2	6	18	32
	QF	3	1	4	14
	SF	0	0	4	0
	F	0	0	2	0
	3P	0	0	2	0
Effie	Ro16	7	4	5	18	32
	QF	3	1	4	14
	SF	0	0	4	0
	F	0	0	2	0
	3P	0	0	2	0
Matthias	Ro16	6	4	6	16	30
	QF	3	1	4	14
	SF	0	0	4	0
	F	0	0	2	0
	3P	0	0	2	0
Dylan	Ro16	5	4	7	14	20
	QF	0	3	5	6
	SF	0	0	4	0
	F	0	0	2	0
	3P	0	0	2	0

Analysis

Composition of brackets

fifa_picks.regSummary = fifa_picks %>%
  unnest(dat) %>% 
  dplyr::mutate(person.region = ifelse(name == 'Actual', 'Actual', person.region)) %>%
  dplyr::filter(!is.na(country.region)) %>%
  select(name, match, winLose, person.region, country.region) 

actualOrd = fifa_picks.regSummary %>%
  dplyr::filter(person.region == 'Actual') %>%
  dplyr::count(country.region) %>%
  arrange(n) %>%
  extract2('country.region')

Were people more likely to fill their brackets with countries from their region of origin?

fifa_picks.regSummary %>%
  dplyr::count(person.region, country.region) %>%
  dplyr::mutate(country.region = factor(country.region, levels = actualOrd)) %>%
  ggplot(aes(x = person.region, y = n, fill = country.region)) + 
  geom_col(position = 'fill') + 
  scale_fill_manual(values = MetBrewer::met.brewer('Egypt', 5)) +
  labs(title = 'Proportion each region was found within\neach bracket',
       x = "Lab Member", y = '', fill = 'Team Region') + 
  NULL

How representative was each individual bracket?

fifa_picks.regSummary %>%
  dplyr::count(name, person.region, country.region) %>%
  dplyr::mutate(country.region = factor(country.region, levels = actualOrd)) %>%
  ggplot(aes(x = name, y = n, fill = country.region)) + 
  geom_col(position = 'fill') + 
  scale_fill_manual(values = MetBrewer::met.brewer('Egypt', 5)) +
  labs(title = 'Proportion each region was found within bracket',
    x = "Lab Member", y = '', fill = 'Team Region') + 
  NULL

Total Points

Were there any sex-specific differences in bracket accuracy?

totalPts.mf = pick_check_summary %>%
  dplyr::filter(!is.na(`Total Points`)) %>%
  dplyr::left_join(., arabicNames, by = 'name')  %>%
  dplyr::select(-arabic) %>%
  dplyr::filter(name != 'Actual') %>%
  dplyr::mutate(m.f = ifelse(m.f == 'F', 'Female', 'Male')) %>%
  dplyr::mutate(m.f = factor(m.f, levels = c('Female', 'Male'))) 

mf.tTest.vect = totalPts.mf %>%
  dplyr::select(m.f, `Total Points`) %>%
  split(., .$m.f)

mf.tTest = t.test(x = mf.tTest.vect$Female$`Total Points`, 
       y = mf.tTest.vect$Male$`Total Points`, alternative = "two.sided")

mf.test.df = data.frame(group1 = 'Male', group2 = 'Female', label = signif(mf.tTest$p.value, 2), y.position = 70)

totalPts.mf %>%
  ggplot(aes(x = m.f, y = `Total Points`)) + 
  geom_violin() + 
  geom_boxplot(width = 0.2, aes(fill = m.f)) + 
  ggprism::add_pvalue(mf.test.df) + 
  scale_fill_manual(values = c('#5dbdfc', '#f573f2' )) + 
  labs(
    x = '', 
    fill = 'Female / Male',
    title = "<span style = 'color:#5dbdfc'>**Females**</span> appeared to have higher point totals<br>than <span style = 'color:#f573f2'>**Males**</span>,
    but the result is **not** statistically<br>significant.",
    ) +
  theme(plot.title = element_textbox(), plot.caption = element_text(hjust = 0))

Christopher M. Uyehara

World Cup 2022

Apostolou / Stadtfeld Labs

CMUyehara

17 December, 2025

Preamble

Brackets

Highlighted by predicted winner

Actual

Alejandra

Alex

Chris

Dylan

Effie

Kaushiki

Kritika

Ly-Sha

Matthias

Subhashini

UkJin

Highlighted by correctness

Actual

Alejandra

Alex

Chris

Dylan

Effie

Kaushiki

Kritika

Ly-Sha

Matthias

Subhashini

UkJin

Summary Table

Analysis

Composition of brackets

Were people more likely to fill their brackets with countries from their region of origin?

How representative was each individual bracket?

Total Points

Were there any sex-specific differences in bracket accuracy?