diff --git a/sose2021/algebra/alg4.pdf b/sose2021/algebra/alg4.pdf new file mode 100644 index 0000000..e3aeb64 Binary files /dev/null and b/sose2021/algebra/alg4.pdf differ diff --git a/sose2021/algebra/alg4.xopp b/sose2021/algebra/alg4.xopp new file mode 100644 index 0000000..5803fa4 Binary files /dev/null and b/sose2021/algebra/alg4.xopp differ diff --git a/sose2021/funktheo/funk4.pdf b/sose2021/funktheo/funk4.pdf new file mode 100644 index 0000000..841f907 Binary files /dev/null and b/sose2021/funktheo/funk4.pdf differ diff --git a/sose2021/funktheo/funk4.xopp b/sose2021/funktheo/funk4.xopp new file mode 100644 index 0000000..50cea6b Binary files /dev/null and b/sose2021/funktheo/funk4.xopp differ diff --git a/sose2021/r/w05/P05-1.R b/sose2021/r/w05/P05-1.R new file mode 100644 index 0000000..29064c4 --- /dev/null +++ b/sose2021/r/w05/P05-1.R @@ -0,0 +1,79 @@ +# Josua Kugler, Christian Merten + +# install.packages("babynames") +library(tidyverse) + +## Create some data----------------------------------------------------------- + +set.seed(1) + +baseset <- list() +baseset$grade <- as.integer(c(5,6,7,8,9,10,11)) +baseset$grade_boost <- c(1,3,5,7,8,9,10) +baseset$letter <- letters[1:4] +baseset$letter_boost <- sample(1:5, 4, replace=T) +babynames::babynames %>% + group_by(sex, name) %>% + summarise(n = sum(n)) %>% + arrange(desc(n)) %>% + mutate(rank = min_rank(-n)) %>% + filter (rank <= 3000) -> + ranked_names +baseset$name <- ranked_names$name +baseset$distance <- c(100,200,400,1000) +baseset$distance_boost <- c(14,12,10,8) + +sample_observation <- function(n) { + res <- list() + res$name <- sample(baseset$name, n, replace=T) + res$grade <- sample(baseset$grade, n, replace=T) + res$letter <- sample(baseset$letter, n, replace=T) + boost_base <- + baseset$grade_boost[match(res$grade,baseset$grade)] + + baseset$letter_boost[match(res$letter,baseset$letter)] + res$time100 <- sample_time(100, baseset$distance_boost[1] + boost_base) + res$time200 <- sample_time(200, baseset$distance_boost[2] + boost_base) + res$time400 <- sample_time(400, baseset$distance_boost[3] + boost_base) + res$time1000 <- sample_time(1000, baseset$distance_boost[4] + boost_base) + as_tibble(res) +} + +sample_time <- function(dist, boost) { + (runif(length(boost))/2+2.5)/boost*dist*2 +} + +sports <- sample_observation(1000) + +requirements <- tibble( + level = 1:11, + min100 = seq(43,23,len=11), + min1000 = seq(500,300,len=11) +) + + + +## Exercises ----------------------------------------------------------------- + + +# a) +# get all students who failed in 100m or 1000m +sports %>% left_join(requirements, by = c("grade" = "level")) %>% + filter(time100 <= min100, time1000 <= min1000) + +# b) +# get names, grade and letter of all students who failed 1000m by less than 1s +# so you can still let them pass :) +sports %>% left_join(requirements, by = c("grade" = "level")) %>% + filter((time1000 - min1000) > 0 & (time1000 - min1000) < 1) %>% + select(name, grade, letter) + +# c) +# tidy the data: +# create two columns from all timeXXX-columns: +# a column "time" with the entries from all timeXXX-columns +# a column "distance" of the distance the time refers to +# make sure all columns have a suitable type +sports %>% pivot_longer(c("time100", "time200", "time400", "time1000"), + names_to="distanceRaw", + values_to="time") %>% + extract(distanceRaw, into="distance", regex="time(.*)", convert=T) diff --git a/sose2021/r/w05/P05-2.R b/sose2021/r/w05/P05-2.R new file mode 100644 index 0000000..0b6ace9 --- /dev/null +++ b/sose2021/r/w05/P05-2.R @@ -0,0 +1,65 @@ +library(tidyverse) +library(babynames) + +# 1) +babynames %>% + filter(year >= 2000) %>% + group_by(sex, name) %>% + summarize(n=sum(n), .groups="drop") -> + bn20 + +# 2) +bn20 %>% + group_by(name) %>% + pivot_wider(names_from="sex", values_from="n") %>% + rename(female = "F", male = "M") %>% + filter(female > 0 & male > 0 & (female+male) > 1e5) %>% + arrange(abs(female - male) / (male + female)) -> + bn20_sim + +# 3) +bn20 %>% group_by(name_length = nchar(name)) %>% summarize(n=sum(n)) + +# 4) +min_len <- min(nchar(bn20$name)) +bn20 %>% filter(nchar(name) == min_len) %>% arrange(desc(n)) + +# 5) +babynames %>% + group_by(year, sex) %>% + summarize(avg_len = mean(nchar(name))) -> + bn_avg_len + +bn_avg_len %>% + ggplot() + + geom_line(aes(x=year, y=avg_len, color=sex)) + +# 6) +babynames %>% + select(-prop) %>% + filter(year >= 2000) -> + bn + +# Annahme: "Neue" Namen die im Vorjahr noch nicht in bn auftauchen +# werden ausgewertet mit n_prev = 0 +replace_with_zero <- function(x) { + x[is.na(x)] <- 0 + x +} + +bn %>% + filter(year > 2000) %>% + left_join(mutate(bn, year = year + 1, n_prev = n, n = NULL)) %>% + mutate(n_prev = replace_with_zero(n_prev)) -> + bn_prev + +# Wenn das nicht erwünscht, führe man diese Zeile aus +# bn %>% inner_join(mutate(bn, year = year + 1, n_prev = n, n = NULL)) -> bn_prev + +n_mean <- mean(bn$n) + +bn_prev %>% + mutate(s_incr = (n-n_prev)/(n_prev + n_mean)) %>% + group_by(year, sex) %>% + slice_max(s_incr, n = 3) -> + bn_trending diff --git a/sose2021/r/w05/P05-3.pdf b/sose2021/r/w05/P05-3.pdf new file mode 100644 index 0000000..8557c75 Binary files /dev/null and b/sose2021/r/w05/P05-3.pdf differ diff --git a/sose2021/r/w05/P05-3.xopp b/sose2021/r/w05/P05-3.xopp new file mode 100644 index 0000000..7a96144 Binary files /dev/null and b/sose2021/r/w05/P05-3.xopp differ diff --git a/sose2021/tut-ana/praesenz-4.pdf b/sose2021/tut-ana/praesenz-4.pdf new file mode 100644 index 0000000..5ed12b8 Binary files /dev/null and b/sose2021/tut-ana/praesenz-4.pdf differ diff --git a/sose2021/tut-ana/praesenz-4.xopp b/sose2021/tut-ana/praesenz-4.xopp new file mode 100644 index 0000000..95f704c Binary files /dev/null and b/sose2021/tut-ana/praesenz-4.xopp differ