This document, knitted by RStudio under the all-knowing gaze of the great Hadley Wickham (blessed be his name, long may he reign) is a record of a great battle that took place in the year two-thousand and twenty-two of the common era between the members of two star-touched labs. We fought. We cheered. We yelled. We cursed. And then, once the day was done, we made peace. For in the end, though each began the bout with sneers and bold boasts of our acuity and wisdom, each us was ultimately forced to confront the terrors our own ignorance. Not a single one was perfect. Not a single one saw the true end!
Alas, dear reader, it is tempting, but folly, to find fault in our stars. Far greater men and woman have wrought far greater feats in the face of far greater adversity than we humble few. No! We embrace our failures as our own and, through that embrace, put aside our animosity and embrace each other as well. Competitors no longer, we stand tall as one–united in defeat.
So say we all. (So say we all).
library(conflicted)
library(dplyr)
library(magrittr)
library(ggupset)
library(ggside)
library(glue)
library(here)
library(patchwork)
library(stringr)
library(tidyr)
library(purrr)
library(forcats)
library(gtExtras)
library(gt)
library(gridExtra)
library(eulerr)
library(ggtext)
outTheme = theme_bw() +
theme(text = element_text(size = 14, color = 'black'),
axis.text = element_text(color = 'black') )
theme_set(outTheme)
plotBaseBracket = function(bracket.base){
bracket.plot = bracket.base$horiz %>%
ggplot(aes(y = y, x = x)) +
geom_segment(data = bracket.base$horiz, aes(x = x, xend = xend, y = y, yend = y )) +
geom_segment(data = bracket.base$vert, aes(xend = xend, yend = yend)) +
geom_label(data = bracket.base$labs, aes(label = label), label.size = NA, fill = 'white') +
scale_y_reverse() +
theme_void() +
theme(legend.position = 'none',
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 13)) +
NULL
return(bracket.plot)
}
checkPicks = function(picks, actual.df, round){
actual.df = actual.df %>%
select(match, id, label, winLose) %>%
rename(actual = 'label', win.actual = 'winLose')
picks = picks %>%
left_join(actual.df, by = c('match', 'id') ) %>%
dplyr::filter(actual != '')
if(round != 'F' & round != '3P'){
picks = picks %>%
mutate(check = case_when(label == actual ~ 'Correct',
label != actual & label %in% actual.df$actual ~ 'In round, wrong pos',
T ~ 'Wrong'))
} else{
winner.actual = dplyr::filter(actual.df, win.actual == 'W')
loser.actual = dplyr::filter(actual.df, win.actual != 'W')
#print(loser.actual$actual)
picks = picks %>%
mutate(check = case_when(winLose == 'W' & label == winner.actual$actual ~ 'Correct',
winLose == '' & label == loser.actual$actual ~ 'Correct',
label %in% actual.df$actual ~ 'In round, wrong pos',
T ~ 'Wrong'))
}
return(picks)
}
addWinLose = function(bracket.plot, bracket.df, title){
bracket.plot +
geom_text(data = bracket.df, aes(label = label, fontface = font, color = winLose),
size = 14/.pt) +
labs(title = title, subtitle = 'World Cup 2022') +
scale_color_manual(values = c('black', 'magenta3')) +
NULL
}
addCorrectIncorrect = function(bracket.plot, bracket.df, title){
bracket.plot +
geom_text(data = bracket.df, aes(label = label, color = check),
size = 14/.pt) +
labs(title = title, subtitle = 'World Cup 2022') +
scale_color_manual(values = c('springgreen4', 'goldenrod3', 'red2')) +
theme(legend.position = 'bottom') +
guides(color = guide_legend(title.position = 'top', title.hjust = 0.5))
}
points.df = list(
Ro16 = list(Correct = 2, `In round, wrong pos` = 1, Wrong = 0),
QF = list(Correct = 4, `In round, wrong pos` = 2, Wrong = 0),
SF = list(Correct = 8, `In round, wrong pos` = 4, Wrong = 0),
`F` = list(Correct = 10, `In round, wrong pos` = 8, Wrong = 0),
`3P` = list(Correct = 5, `In round, wrong pos` = 4, Wrong = 0)
) %>%
dplyr::bind_rows(.id = 'round') %>%
pivot_longer(-round, names_to = 'check', values_to = 'pts_per_round')
arabicNames = readxl::read_xlsx('./Assets/Data/players.xlsx')
countries = read.csv('./Assets/Data/countries.csv')
bracket.base = list(horiz = './Assets/Data/bracket_horizSeg.csv',
vert = './Assets/Data/bracket_vertSeg.csv',
labs = './Assets/Data/bracket_labs.csv' ) %>%
lapply(., read.csv)
base.plot = plotBaseBracket(bracket.base)
fifa_picks = read.csv('./Assets/Data/fifa_picks.csv') %>%
mutate(font = ifelse(winLose == 'W', 'bold', 'plain')) %>%
dplyr::left_join(., arabicNames, by = 'name') %>%
dplyr::left_join(countries, by = c('label' = 'country')) %>%
mutate(title = glue('{name} {arabic}')) %>%
nest(dat = -c(name, title)) %>%
mutate(dat = setNames(dat, name)) %>%
arrange(name) %>%
mutate(brackets = map2(dat, title, ~addWinLose(base.plot, bracket.df = .x, title = .y)))
Note: There was a mixup with the printed brackets that were used for the competition. The semi-finals were transposed, causing some peoples’ brackets to have impossible choices.
fifa_picks %$%
walk2(name, brackets, function(n, b){
cat(paste0('### ', n, '\n'))
print(b)
cat('\n\n')
})
pick_check = fifa_picks %>%
unnest(dat) %>%
mutate(round = case_when(match <= 8 ~ 'Ro16',
match <= 12 ~ 'QF',
match <= 14 ~ 'SF',
match == 15 ~ 'F',
match == 16 ~ '3P')) %>%
nest(dat = -c(name, title, round)) %>%
dplyr::group_by(round) %>%
dplyr::mutate(actual = dat[which(name == 'Actual')]) %>%
mutate(dat = pmap(list(dat, actual, round), ~checkPicks(..1, ..2, ..3))) %>%
select(-actual) %>%
unnest(dat) %>%
nest(dat = -c(name, title)) %>%
mutate(brackets = map2(dat, title, ~addCorrectIncorrect(base.plot, bracket.df = .x, title = .y)))
pick_check %$%
walk2(name, brackets, function(n, b){
cat(paste0('### ', n, '\n'))
print(b)
cat('\n\n')
})
pick_check_summary = pick_check %>%
select(-brackets) %>%
unnest(dat) %>%
select(name, id, label, check) %>%
dplyr::mutate(round = case_when(str_detect(id, 'QF') ~ 'QF',
str_detect(id, 'SF') ~ 'SF',
str_detect(id, 'F.[AB]') ~ 'F',
str_detect(id, '3P') ~ '3P',
T ~ 'Ro16'
) ) %>%
dplyr::count(name, round, check) %>%
left_join(points.df, by = c('round', 'check')) %>%
mutate(points = n * pts_per_round) %>%
group_by(name) %>%
mutate(`Total Points` = sum(points)) %>%
group_by(name, round) %>%
mutate(points = sum(points)) %>%
ungroup() %>%
select(-pts_per_round ) %>%
pivot_wider(names_from = check, values_from = c(n), values_fill = 0) %>%
mutate(round = factor(round, levels = c('Ro16', 'QF', 'SF', 'F', '3P'))) %>%
arrange(name, as.numeric(round)) %>%
relocate(Wrong, .after = dplyr::last_col()) %>%
relocate(c(points, `Total Points`), .after = dplyr::last_col()) %>%
dplyr::mutate(`Total Points` = ifelse(round == 'Ro16', `Total Points`, NA_integer_))
pick_check_summary %>%
dplyr::rename("In Round,\nWrong Pos" = "In round, wrong pos") %>%
group_by(name) %>%
mutate(tp.num = sum(points)) %>%
ungroup() %>%
dplyr::mutate(name = fct_reorder(name, tp.num)) %>%
arrange(-as.numeric(name)) %>%
select(-tp.num) %>%
group_by(name) %>%
dplyr::rename('Round' = 'round') %>%
gt() %>%
gt_color_rows(., Correct, palette = colorRampPalette(c('white', 'springgreen2'))(50)) %>%
gt_color_rows(.,"In Round,\nWrong Pos", palette = colorRampPalette(c('white', 'goldenrod'))(50)) %>%
gt_color_rows(., Wrong, palette = colorRampPalette(c('white', 'red'))(50)) %>%
sub_missing(`Total Points`, missing_text = '') %>%
data_color(., `Total Points`,
colors = scales::col_numeric(palette = colorRampPalette(c('white', 'dodgerblue'))(50),
domain = range(pick_check_summary$`Total Points`, na.rm = T),
na.color = 'transparent'
)) %>%
cols_width(`Wrong` ~ px(90), Correct ~ px(75)) %>%
tab_options(table.align = 'left', row_group.as_column = T) %>%
gt_theme_538()
| Round | Correct | In Round, Wrong Pos | Wrong | points | Total Points | |
|---|---|---|---|---|---|---|
| Actual | Ro16 | 16 | 0 | 0 | 32 | 126 |
| QF | 8 | 0 | 0 | 32 | ||
| SF | 4 | 0 | 0 | 32 | ||
| F | 2 | 0 | 0 | 20 | ||
| 3P | 2 | 0 | 0 | 10 | ||
| Kaushiki | Ro16 | 8 | 2 | 6 | 18 | 64 |
| QF | 5 | 0 | 3 | 20 | ||
| SF | 2 | 0 | 2 | 16 | ||
| F | 1 | 0 | 1 | 10 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Chris | Ro16 | 8 | 2 | 6 | 18 | 64 |
| QF | 5 | 0 | 3 | 20 | ||
| SF | 2 | 0 | 2 | 16 | ||
| F | 1 | 0 | 1 | 10 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Ly-Sha | Ro16 | 7 | 3 | 6 | 17 | 60 |
| QF | 4 | 1 | 3 | 18 | ||
| SF | 1 | 1 | 2 | 12 | ||
| F | 0 | 1 | 1 | 8 | ||
| 3P | 1 | 0 | 1 | 5 | ||
| Kritika | Ro16 | 12 | 1 | 3 | 25 | 55 |
| QF | 6 | 1 | 1 | 26 | ||
| SF | 0 | 1 | 3 | 4 | ||
| F | 0 | 0 | 2 | 0 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Alejandra | Ro16 | 9 | 3 | 4 | 21 | 49 |
| QF | 3 | 0 | 5 | 12 | ||
| SF | 1 | 0 | 3 | 8 | ||
| F | 0 | 1 | 1 | 8 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Subhashini | Ro16 | 5 | 5 | 6 | 15 | 41 |
| QF | 1 | 2 | 5 | 8 | ||
| SF | 1 | 0 | 3 | 8 | ||
| F | 1 | 0 | 1 | 10 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Alex | Ro16 | 9 | 1 | 6 | 19 | 39 |
| QF | 5 | 0 | 3 | 20 | ||
| SF | 0 | 0 | 4 | 0 | ||
| F | 0 | 0 | 2 | 0 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| UkJin | Ro16 | 8 | 2 | 6 | 18 | 32 |
| QF | 3 | 1 | 4 | 14 | ||
| SF | 0 | 0 | 4 | 0 | ||
| F | 0 | 0 | 2 | 0 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Effie | Ro16 | 7 | 4 | 5 | 18 | 32 |
| QF | 3 | 1 | 4 | 14 | ||
| SF | 0 | 0 | 4 | 0 | ||
| F | 0 | 0 | 2 | 0 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Matthias | Ro16 | 6 | 4 | 6 | 16 | 30 |
| QF | 3 | 1 | 4 | 14 | ||
| SF | 0 | 0 | 4 | 0 | ||
| F | 0 | 0 | 2 | 0 | ||
| 3P | 0 | 0 | 2 | 0 | ||
| Dylan | Ro16 | 5 | 4 | 7 | 14 | 20 |
| QF | 0 | 3 | 5 | 6 | ||
| SF | 0 | 0 | 4 | 0 | ||
| F | 0 | 0 | 2 | 0 | ||
| 3P | 0 | 0 | 2 | 0 |
fifa_picks.regSummary = fifa_picks %>%
unnest(dat) %>%
dplyr::mutate(person.region = ifelse(name == 'Actual', 'Actual', person.region)) %>%
dplyr::filter(!is.na(country.region)) %>%
select(name, match, winLose, person.region, country.region)
actualOrd = fifa_picks.regSummary %>%
dplyr::filter(person.region == 'Actual') %>%
dplyr::count(country.region) %>%
arrange(n) %>%
extract2('country.region')
fifa_picks.regSummary %>%
dplyr::count(person.region, country.region) %>%
dplyr::mutate(country.region = factor(country.region, levels = actualOrd)) %>%
ggplot(aes(x = person.region, y = n, fill = country.region)) +
geom_col(position = 'fill') +
scale_fill_manual(values = MetBrewer::met.brewer('Egypt', 5)) +
labs(title = 'Proportion each region was found within\neach bracket',
x = "Lab Member", y = '', fill = 'Team Region') +
NULL
fifa_picks.regSummary %>%
dplyr::count(name, person.region, country.region) %>%
dplyr::mutate(country.region = factor(country.region, levels = actualOrd)) %>%
ggplot(aes(x = name, y = n, fill = country.region)) +
geom_col(position = 'fill') +
scale_fill_manual(values = MetBrewer::met.brewer('Egypt', 5)) +
labs(title = 'Proportion each region was found within bracket',
x = "Lab Member", y = '', fill = 'Team Region') +
NULL
totalPts.mf = pick_check_summary %>%
dplyr::filter(!is.na(`Total Points`)) %>%
dplyr::left_join(., arabicNames, by = 'name') %>%
dplyr::select(-arabic) %>%
dplyr::filter(name != 'Actual') %>%
dplyr::mutate(m.f = ifelse(m.f == 'F', 'Female', 'Male')) %>%
dplyr::mutate(m.f = factor(m.f, levels = c('Female', 'Male')))
mf.tTest.vect = totalPts.mf %>%
dplyr::select(m.f, `Total Points`) %>%
split(., .$m.f)
mf.tTest = t.test(x = mf.tTest.vect$Female$`Total Points`,
y = mf.tTest.vect$Male$`Total Points`, alternative = "two.sided")
mf.test.df = data.frame(group1 = 'Male', group2 = 'Female', label = signif(mf.tTest$p.value, 2), y.position = 70)
totalPts.mf %>%
ggplot(aes(x = m.f, y = `Total Points`)) +
geom_violin() +
geom_boxplot(width = 0.2, aes(fill = m.f)) +
ggprism::add_pvalue(mf.test.df) +
scale_fill_manual(values = c('#5dbdfc', '#f573f2' )) +
labs(
x = '',
fill = 'Female / Male',
title = "<span style = 'color:#5dbdfc'>**Females**</span> appeared to have higher point totals<br>than <span style = 'color:#f573f2'>**Males**</span>,
but the result is **not** statistically<br>significant.",
) +
theme(plot.title = element_textbox(), plot.caption = element_text(hjust = 0))