|
- library(tidyverse)
- library(babynames)
-
- # 1)
- babynames %>%
- filter(year >= 2000) %>%
- group_by(sex, name) %>%
- summarize(n=sum(n), .groups="drop") ->
- bn20
-
- # 2)
- bn20 %>%
- group_by(name) %>%
- pivot_wider(names_from="sex", values_from="n") %>%
- rename(female = "F", male = "M") %>%
- filter(female > 0 & male > 0 & (female+male) > 1e5) %>%
- arrange(abs(female - male) / (male + female)) ->
- bn20_sim
-
- # 3)
- bn20 %>% group_by(name_length = nchar(name)) %>% summarize(n=sum(n))
-
- # 4)
- min_len <- min(nchar(bn20$name))
- bn20 %>% filter(nchar(name) == min_len) %>% arrange(desc(n))
-
- # 5)
- babynames %>%
- group_by(year, sex) %>%
- summarize(avg_len = mean(nchar(name))) ->
- bn_avg_len
-
- bn_avg_len %>%
- ggplot() +
- geom_line(aes(x=year, y=avg_len, color=sex))
-
- # 6)
- babynames %>%
- select(-prop) %>%
- filter(year >= 2000) ->
- bn
-
- # Annahme: "Neue" Namen die im Vorjahr noch nicht in bn auftauchen
- # werden ausgewertet mit n_prev = 0
- replace_with_zero <- function(x) {
- x[is.na(x)] <- 0
- x
- }
-
- bn %>%
- filter(year > 2000) %>%
- left_join(mutate(bn, year = year + 1, n_prev = n, n = NULL)) %>%
- mutate(n_prev = replace_with_zero(n_prev)) ->
- bn_prev
-
- # Wenn das nicht erwünscht, führe man diese Zeile aus
- # bn %>% inner_join(mutate(bn, year = year + 1, n_prev = n, n = NULL)) -> bn_prev
-
- n_mean <- mean(bn$n)
-
- bn_prev %>%
- mutate(s_incr = (n-n_prev)/(n_prev + n_mean)) %>%
- group_by(year, sex) %>%
- slice_max(s_incr, n = 3) ->
- bn_trending
|