Für Vorlesungen, bitte die Webseite verwenden. https://flavigny.de/lecture
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

96 lines
3.0KB

  1. library(tidyverse)
  2. # --- load athlete data
  3. # We are only interested in following disciplines
  4. std_runs <- c("100 metres", "200 metres", "400 metres", "800 metres",
  5. "1,500 metres", "5,000 metres", "10,000 metres", "Marathon")
  6. read_csv("athlete_events.csv") %>%
  7. filter(Sport == "Athletics") %>%
  8. select(Name, Sex, Age, Height, Weight, Year, Event, Medal) %>%
  9. mutate(
  10. Event = str_remove(Event, "Athletics Women's "),
  11. Event = str_remove(Event, "Athletics Men's ")) %>%
  12. filter(Event %in% std_runs) ->
  13. athletes
  14. # --- Top Medalists
  15. athletes %>%
  16. pivot_wider(names_from = Medal, values_from = Medal) %>%
  17. mutate(across(c(Gold, Bronze, Silver), ~ !is.na(.))) %>%
  18. group_by(Name) %>%
  19. summarize(across(where(is.logical), sum)) %>%
  20. arrange(-Gold, -Silver, -Bronze) %>%
  21. transmute(Name, Gold, Silver, Bronze) ->
  22. medal_ranking
  23. # --- Age distribution among Women's Marathon participants
  24. medal_color <- c(Bronze = "#6A3805", Silver = "#B4B4B4", Gold = "#AF9500")
  25. athletes %>%
  26. filter(Event == "Marathon", Sex == "F", Year > 1980) %>%
  27. mutate(Year = as.factor(Year)) ->
  28. d
  29. ggplot(d, aes(x = Year, y = Age)) +
  30. geom_boxplot(na.rm=T) +
  31. geom_point(data = drop_na(d), mapping = aes(color = Medal)) +
  32. scale_color_manual(values = medal_color) +
  33. ggtitle("Age distribution among Women's Marathon participants")
  34. # --- Change in height of male runners
  35. athletes %>%
  36. mutate(Event = factor(Event, levels=std_runs)) %>% # use factor for ordering
  37. filter(Sex == "M", Year > 1900) %>%
  38. group_by(Event, Year) %>%
  39. summarize(MeanHeight = mean(Height, na.rm=T)) %>%
  40. ggplot(aes(x = Year, y = MeanHeight, color = Event)) +
  41. geom_point() +
  42. ggtitle("Men's runs - mean across participants") +
  43. geom_smooth(se = FALSE)
  44. # --- Medalist times
  45. # Times are given as strings with inconsistent format.
  46. # Need custom function for conversion in seconds
  47. str2sec <- function(s) {
  48. s %>%
  49. str_split(":|h|-") %>%
  50. sapply(function(x) {
  51. v <- as.double(x)
  52. v3 <- c(0,0,0)
  53. v3[(4-length(v)):3] <- v
  54. v3[1] * 3600 + v3[2] * 60 + v3[3]
  55. })
  56. }
  57. # We are only interested in following disciplines
  58. std_runs <- c("100M", "200M", "400M", "800M", "1500M", "5000M", "10000M", "Marathon")
  59. read_csv("results.csv") %>%
  60. mutate(
  61. Event = str_remove(Event, c(" Men")),
  62. Event = str_remove(Event, c(" Women"))) %>%
  63. filter(Event %in% std_runs) %>%
  64. mutate(Result = str2sec(Result)) %>%
  65. drop_na() ->
  66. runs
  67. medal_color <- c(B = "#6A3805", S = "#B4B4B4", G = "#AF9500")
  68. ggplot(runs, aes(x = Year, y = Result, shape = Gender)) +
  69. facet_wrap(vars(factor(Event, levels = std_runs)), scales = "free_y") +
  70. geom_point(aes(color = Medal)) +
  71. scale_color_manual(values = medal_color) +
  72. ggtitle("Times of medal winners in different running disciplines") +
  73. xlab("Year") +
  74. ylab("Time in seconds") +
  75. geom_smooth(se = T)