From 1db9e3f59a9821eb50a8c2e5df3c4722e5244333 Mon Sep 17 00:00:00 2001 From: JosuaKugler Date: Sun, 8 Aug 2021 22:35:08 +0200 Subject: [PATCH] add plots in genderequality and clean up hitlercomparison --- vignettes/genderequality.Rmd | 99 ++++++++++++++-------------------- vignettes/hitlercomparison.Rmd | 18 ++++++- 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/vignettes/genderequality.Rmd b/vignettes/genderequality.Rmd index 9e44994..2b16188 100644 --- a/vignettes/genderequality.Rmd +++ b/vignettes/genderequality.Rmd @@ -97,7 +97,7 @@ speaker %>% speaker_with_gender ``` -#Analyse +## Analyse First, let's look at the relative distribution of the sexes throughout the whole Bundestag. @@ -120,9 +120,9 @@ pie + ylab("") ``` -Next we look at the individual distributions between men and women in the different fractions. +Next, we look at the individual distributions between men and women in the different fractions. -```{r} +```{r, fig.width=7} speaker_with_gender %>% group_by(fraction) %>% summarize(n = n()) -> @@ -137,58 +137,41 @@ speaker_with_gender %>% bar_plot_fractions(women_per_fraction, x_variable=fraction, y_variable=q, title="Frauenanteil nach Partei") ``` -```r -speaker_with_gender %>% - select(fraction, gender) %>% - group_by(fraction, gender) %>% - summarise("count" = n()) %>% - filter(gender %in% c("male", "female")) %>% - filter(!is.na(fraction)) %>% - group_by(fraction) %>% - mutate(portion = 100*count/sum(count)) -> - plot2 - -plot2 %>% - filter(fraction == "AfD") %>% - ggplot(aes(x = "", y = portion, fill = gender))+ - geom_bar(width = 1, stat = "identity") -> - bp -pie1 <- bp + coord_polar("y", start=0) + ggtitle("AfD") + xlab("") + ylab("") -plot2 %>% - filter(fraction == "BÜNDNIS 90 / DIE GRÜNEN") %>% - ggplot(aes(x = "", y = portion, fill = gender))+ - geom_bar(width = 1, stat = "identity") -> - bp -pie2 <- bp + coord_polar("y", start=0) + ggtitle("DIE GRÜNEN") + xlab("") + ylab("") -plot2 %>% - filter(fraction == "CDU/CSU") %>% - ggplot(aes(x = "", y = portion, fill = gender))+ - geom_bar(width = 1, stat = "identity") -> - bp -pie3 <- bp + coord_polar("y", start=0) + ggtitle("CDU/CSU") + xlab("") + ylab("") -plot2 %>% - filter(fraction == "DIE LINKE") %>% - ggplot(aes(x = "", y = portion, fill = gender))+ - geom_bar(width = 1, stat = "identity") -> - bp -pie4 <- bp + coord_polar("y", start=0) + ggtitle("DIE LINKE") + xlab("") + ylab("") -plot2 %>% - filter(fraction == "FDP") %>% - ggplot(aes(x = "", y = portion, fill = gender))+ - geom_bar(width = 1, stat = "identity") -> - bp -pie5 <- bp + coord_polar("y", start=0) + ggtitle("FDP") + xlab("") + ylab("") -plot2 %>% - filter(fraction == "SPD") %>% - ggplot(aes(x = "", y = portion, fill = gender))+ - geom_bar(width = 1, stat = "identity") -> - bp -pie6 <- bp + coord_polar("y", start=0) + ggtitle("SPD") + xlab("") + ylab("") - -gridExtra::grid.arrange(pie1,pie2,pie3,pie4,pie5,pie6,nrow=2) +Prepared with this knowledge, we can now analyse the relative amount of speeches by gender and fraction. + +```{r, fig.width=7} +speaker_with_gender %>% transmute(speaker_id = id, gender, fraction) -> simple_speaker_with_gender +speeches %>% + transmute(id, speaker_id = speaker) %>% + inner_join(simple_speaker_with_gender) %>% + group_by(fraction) %>% + summarize(speeches=n()) -> + fraction_speeches_size + +speeches %>% + transmute(id, speaker_id = speaker) %>% + inner_join(simple_speaker_with_gender) %>% + filter(gender=='female') %>% + group_by(fraction) %>% + summarize(female_speeches=n()) %>% + left_join(fraction_speeches_size) %>% + left_join(women_per_fraction) %>% + mutate(q_speeches = female_speeches/speeches) -> speech_distribution + #bar_plot_fractions(speech_distribution, x_variable=fraction, y_variable=q_speeches, title="Redeanteil Frauen nach Partei") + + +party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", + "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", + "FDP", "AfD", NA_character_)) + +speech_distribution %>% + mutate("Frauenanteil" = q, "Redenanteil Frauen" = q_speeches) %>% + pivot_longer(c(Frauenanteil, "Redenanteil Frauen"), "type") %>% + ggplot(aes(x=factor(fraction, levels = party_order), y=value, fill=factor(type, levels = factor(c("Frauenanteil", "Redenanteil Frauen"))))) + scale_fill_manual(values= c("Frauenanteil"="gray", "Redenanteil Frauen"="red")) + coord_flip() + geom_bar(stat="identity", position="dodge") + labs(fill="Kategorie") + ``` -Now let's analyze whether there are any differences in the amount of speeches given. +For comparison, let's analyze the total differences in the amount of speeches given. ```{r} speeches %>% @@ -208,8 +191,9 @@ speeches %>% mutate(relative2=relative/sum(relative)) -> plot3 ``` + At first lets take a look at the absolute difference in the amount of speeches by the two sexes. -```{r} +```{r,fig.width=7} barplot(plot3$absolute2, ylab = "amount of speeches", main = "Absolute comparison of speech shares", @@ -219,8 +203,9 @@ barplot(plot3$absolute2, font.main = 4, cex.axis = 0.7) ``` + Since there are more men represented in the German Bundestag, we now consider the relative proportions of speeches, depending on the ratio of men and women. -```{r} +```{r, fig.width=7} barplot(plot3$relative2, ylab = "amount of speeches", main = "Relative comparison of speech shares", @@ -230,7 +215,3 @@ barplot(plot3$relative2, font.main = 4, cex.axis = 0.7) ``` - - - - diff --git a/vignettes/hitlercomparison.Rmd b/vignettes/hitlercomparison.Rmd index 3ac5846..f7b2999 100644 --- a/vignettes/hitlercomparison.Rmd +++ b/vignettes/hitlercomparison.Rmd @@ -119,8 +119,22 @@ all_words %>% group_by(Worte) %>% summarize(n = sum(n), part= sum(n)/total) -> a Now we want to extract the words that are more frequently used by a specific fraction. ```{r} -afd_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afd_high_frequent -select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80) +afd_words %>% + transmute(freq, fraction_n = n) %>% + left_join(all_words) %>% + transmute( + fraction_freq = freq, + total_freq = part, + fraction_n, + total_n = n, + rel_quotient = fraction_freq/total_freq, + abs_quotient = fraction_n/total_n) %>% + arrange(-abs_quotient, -fraction_n) %>% + filter(rel_quotient > 1) -> + afd_high_frequent + +select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% + filter(total_n > 80) afdundfraktionslos_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent select(afdundfraktionslos_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)