Procházet zdrojové kódy

adapt bar plot syntax, improve labels, improve fraction specific vocabulary selection

genderequality-alternative
JosuaKugler před 4 roky
rodič
revize
354d18050b
1 změnil soubory, kde provedl 11 přidání a 11 odebrání
  1. +11
    -11
      vignettes/hitlercomparison.Rmd

+ 11
- 11
vignettes/hitlercomparison.Rmd Zobrazit soubor

@@ -120,31 +120,31 @@ all_words %>% group_by(Worte) %>% summarize(n = sum(n), part= sum(n)/total) -> a
Now we want to extract the words that are more frequently used by a specific `fraktion`.
```{r}
afd_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> afd_high_frequent
select(afd_high_frequent, fraktion_n, total_n)
select(afd_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

afdundfraktionslos_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent
select(afdundfraktionslos_high_frequent, fraktion_n, total_n)
select(afdundfraktionslos_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

grüne_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> grüne_high_frequent
select(grüne_high_frequent, fraktion_n, total_n)
select(grüne_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

cdu_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> cdu_high_frequent
select(cdu_high_frequent, fraktion_n, total_n)
select(cdu_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

linke_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> linke_high_frequent
select(linke_high_frequent, fraktion_n, total_n)
select(linke_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

fdp_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> fdp_high_frequent
select(fdp_high_frequent, fraktion_n, total_n)
select(fdp_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

fraktionslos_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> fraktionslos_high_frequent
select(fraktionslos_high_frequent, fraktion_n, total_n)
select(fraktionslos_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

spd_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> spd_high_frequent
select(spd_high_frequent, fraktion_n, total_n)
select(spd_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)

na_words %>% transmute(freq, fraktion_n = n) %>% left_join(all_words) %>% transmute(fraktion_freq = freq, total_freq = part, fraktion_n, total_n = n, rel_quotient = fraktion_freq/total_freq, abs_quotient = fraktion_n/total_n) %>% arrange(-abs_quotient, -fraktion_n) %>% filter(rel_quotient > 1) -> na_high_frequent
select(na_high_frequent, fraktion_n, total_n)
select(na_high_frequent, fraktion_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)
```

We compare these words with `hitlerwords`.
@@ -164,10 +164,10 @@ na_high_frequent %>% mutate(Worte = str_to_lower(Worte)) %>% inner_join(hitlerwo
tibble(fraktion = c("AfD", "AfD&Fraktionslos", "BÜNDNIS 90 / DIE GRÜNEN", "CDU/CSU", "DIE LINKE", "FDP", "Fraktionslos", "SPD"),
absolute = c(nrow(afd_hitler_comparison), nrow(afdundfraktionslos_hitler_comparison), nrow(grüne_hitler_comparison), nrow(cdu_hitler_comparison), nrow(linke_hitler_comparison), nrow(fdp_hitler_comparison), nrow(fraktionslos_hitler_comparison), nrow(spd_hitler_comparison)),
total = c(nrow(afd_words), nrow(afdundfraktionslos_words), nrow(grüne_words), nrow(cdu_words), nrow(linke_words), nrow(fdp_words), nrow(fraktionslos_words), nrow(spd_words))
) %>% mutate(n = absolute/total) -> hitler_comparison
) %>% mutate(percent = 100*absolute/total) -> hitler_comparison
hitler_comparison
```
Finally, we want to plot our results:
```{r, fig.width=7}
bar_plot_fraktionen(hitler_comparison)
bar_plot_fraktionen(hitler_comparison, percent, fill=fraktion, title="Coincidence of party vocabulary with nazi vocabulary", ylab="unique 'nazi' words per total (unique) fraction words [%]")
```

Načítá se…
Zrušit
Uložit