Für Vorlesungen, bitte die Webseite verwenden. https://flavigny.de/lecture
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

113 rindas
3.2KB

  1. # Josua Kugler, Christian Merten
  2. # install.packages("babynames")
  3. library(tidyverse)
  4. ## Create some data-----------------------------------------------------------
  5. set.seed(1)
  6. baseset <- list()
  7. baseset$grade <- as.integer(c(5,6,7,8,9,10,11))
  8. baseset$grade_boost <- c(1,3,5,7,8,9,10)
  9. baseset$letter <- letters[1:4]
  10. baseset$letter_boost <- sample(1:5, 4, replace=T)
  11. babynames::babynames %>%
  12. group_by(sex, name) %>%
  13. summarise(n = sum(n)) %>%
  14. arrange(desc(n)) %>%
  15. mutate(rank = min_rank(-n)) %>%
  16. filter (rank <= 3000) ->
  17. ranked_names
  18. baseset$name <- ranked_names$name
  19. baseset$distance <- c(100,200,400,1000)
  20. baseset$distance_boost <- c(14,12,10,8)
  21. sample_observation <- function(n) {
  22. res <- list()
  23. res$name <- sample(baseset$name, n, replace=T)
  24. res$grade <- sample(baseset$grade, n, replace=T)
  25. res$letter <- sample(baseset$letter, n, replace=T)
  26. boost_base <-
  27. baseset$grade_boost[match(res$grade,baseset$grade)] +
  28. baseset$letter_boost[match(res$letter,baseset$letter)]
  29. res$time100 <- sample_time(100, baseset$distance_boost[1] + boost_base)
  30. res$time200 <- sample_time(200, baseset$distance_boost[2] + boost_base)
  31. res$time400 <- sample_time(400, baseset$distance_boost[3] + boost_base)
  32. res$time1000 <- sample_time(1000, baseset$distance_boost[4] + boost_base)
  33. as_tibble(res)
  34. }
  35. sample_time <- function(dist, boost) {
  36. (runif(length(boost))/2+2.5)/boost*dist*2
  37. }
  38. sports <- sample_observation(1000)
  39. requirements <- tibble(
  40. level = 1:11,
  41. min100 = seq(43,23,len=11),
  42. min1000 = seq(500,300,len=11)
  43. )
  44. ## Exercises -----------------------------------------------------------------
  45. # a)
  46. # sort sports by 'name' (alphabetically)
  47. arrange(sports, name)
  48. # b)
  49. # sort sports by 'grade' (11, 10, ..., 5),
  50. # in case of ties by 'letter' (a, b, d, e),
  51. # in case of ties by 'name' (A-Z)
  52. arrange(sports, grade, letter, name)
  53. # c)
  54. # count the numbers of students per class
  55. sports %>% group_by(grade, letter) %>% summarize(student_count = n())
  56. # d)
  57. # what is the mean, max and min class size
  58. sports %>%
  59. group_by(grade, letter) %>%
  60. summarize(student_count = n()) %>%
  61. ungroup() %>%
  62. summarize(mean_class_size = mean(student_count),
  63. max_class_size = max(student_count),
  64. min_class_size = min(student_count))
  65. # e)
  66. # get all students with a non-unqiue name
  67. sports %>% group_by(name) %>% filter(n()>1)
  68. # f)
  69. # get the top 10 sprinters (100m)
  70. sports %>% top_n(10, -time100)
  71. # g)
  72. # get the slowest 10 sprinters (100m)
  73. sports %>% top_n(10, time100)
  74. # h)
  75. # remove 100m, 200m, and 400m, and add velocity in km/h for 1000m
  76. sports %>% mutate(velocity = 60*60/time1000) %>% select(-(time100:time1000))
  77. # i)
  78. # rename 'grade' to 'level'
  79. sports %>% rename(level = grade)
  80. # j)
  81. # calculate average and min times for 200m in each grade
  82. sports %>% group_by(grade) %>% summarize(average200 = mean(time200), min200 = min(time200))
  83. # k)
  84. # select all time-columns
  85. sports %>% select(time100:time1000)
  86. # l)
  87. # show the fastest sprinter (100m) in each class
  88. # sorted by class (ie by grade and letter)
  89. sports %>% group_by(grade, letter) %>% top_n(1, -time100) %>% arrange(grade, letter)