From 1db9e3f59a9821eb50a8c2e5df3c4722e5244333 Mon Sep 17 00:00:00 2001
From: JosuaKugler <josua.kugler@gmail.com>
Date: Sun, 8 Aug 2021 22:35:08 +0200
Subject: [PATCH] add plots in genderequality and clean up hitlercomparison

---
 vignettes/genderequality.Rmd   | 99 ++++++++++++++--------------------
 vignettes/hitlercomparison.Rmd | 18 ++++++-
 2 files changed, 56 insertions(+), 61 deletions(-)

diff --git a/vignettes/genderequality.Rmd b/vignettes/genderequality.Rmd
index 9e44994..2b16188 100644
--- a/vignettes/genderequality.Rmd
+++ b/vignettes/genderequality.Rmd
@@ -97,7 +97,7 @@ speaker %>%
   speaker_with_gender
 ```
 
-#Analyse
+## Analyse
 
 First, let's look at the relative distribution of the sexes throughout the whole Bundestag.
 
@@ -120,9 +120,9 @@ pie +
   ylab("")
 ```
 
-Next we look at the individual distributions between men and women in the different fractions.
+Next, we look at the individual distributions between men and women in the different fractions.
 
-```{r}
+```{r, fig.width=7}
 speaker_with_gender %>%
   group_by(fraction) %>%
   summarize(n = n()) ->
@@ -137,58 +137,41 @@ speaker_with_gender %>%
   bar_plot_fractions(women_per_fraction, x_variable=fraction, y_variable=q, title="Frauenanteil nach Partei")
 ```
 
-```r
-speaker_with_gender %>% 
-  select(fraction, gender) %>% 
-  group_by(fraction, gender) %>% 
-  summarise("count" = n()) %>% 
-  filter(gender %in% c("male", "female")) %>% 
-  filter(!is.na(fraction)) %>% 
-  group_by(fraction) %>% 
-  mutate(portion = 100*count/sum(count)) -> 
-  plot2
-
-plot2 %>% 
-  filter(fraction == "AfD") %>% 
-  ggplot(aes(x = "", y = portion, fill = gender))+
-  geom_bar(width = 1, stat = "identity") -> 
-  bp
-pie1 <- bp + coord_polar("y", start=0) + ggtitle("AfD") + xlab("") + ylab("")
-plot2 %>% 
-  filter(fraction == "BÜNDNIS 90 / DIE GRÜNEN") %>% 
-  ggplot(aes(x = "", y = portion, fill = gender))+
-  geom_bar(width = 1, stat = "identity") -> 
-  bp
-pie2 <- bp + coord_polar("y", start=0) + ggtitle("DIE GRÜNEN") + xlab("") + ylab("")
-plot2 %>% 
-  filter(fraction == "CDU/CSU") %>% 
-  ggplot(aes(x = "", y = portion, fill = gender))+
-  geom_bar(width = 1, stat = "identity") -> 
-  bp
-pie3 <- bp + coord_polar("y", start=0) + ggtitle("CDU/CSU") + xlab("") + ylab("")
-plot2 %>% 
-  filter(fraction == "DIE LINKE") %>% 
-  ggplot(aes(x = "", y = portion, fill = gender))+
-  geom_bar(width = 1, stat = "identity") -> 
-  bp
-pie4 <- bp + coord_polar("y", start=0) + ggtitle("DIE LINKE") + xlab("") + ylab("")
-plot2 %>% 
-  filter(fraction == "FDP") %>% 
-  ggplot(aes(x = "", y = portion, fill = gender))+
-  geom_bar(width = 1, stat = "identity") -> 
-  bp
-pie5 <- bp + coord_polar("y", start=0) + ggtitle("FDP") + xlab("") + ylab("")
-plot2 %>% 
-  filter(fraction == "SPD") %>% 
-  ggplot(aes(x = "", y = portion, fill = gender))+
-  geom_bar(width = 1, stat = "identity") -> 
-  bp
-pie6 <- bp + coord_polar("y", start=0) + ggtitle("SPD") + xlab("") + ylab("")
-
-gridExtra::grid.arrange(pie1,pie2,pie3,pie4,pie5,pie6,nrow=2)
+Prepared with this knowledge, we can now analyse the relative amount of speeches by gender and fraction.
+
+```{r, fig.width=7}
+speaker_with_gender %>% transmute(speaker_id = id, gender, fraction) -> simple_speaker_with_gender
+speeches %>% 
+  transmute(id, speaker_id = speaker) %>% 
+  inner_join(simple_speaker_with_gender) %>%
+  group_by(fraction) %>%
+  summarize(speeches=n()) -> 
+  fraction_speeches_size
+
+speeches %>% 
+  transmute(id, speaker_id = speaker) %>% 
+  inner_join(simple_speaker_with_gender) %>%
+  filter(gender=='female') %>%
+  group_by(fraction) %>%
+  summarize(female_speeches=n()) %>%
+  left_join(fraction_speeches_size) %>%
+  left_join(women_per_fraction) %>%
+  mutate(q_speeches = female_speeches/speeches) -> speech_distribution
+  #bar_plot_fractions(speech_distribution, x_variable=fraction, y_variable=q_speeches, title="Redeanteil Frauen nach Partei")
+
+
+party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
+                        "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
+                        "FDP", "AfD", NA_character_))
+
+speech_distribution %>% 
+  mutate("Frauenanteil" = q, "Redenanteil Frauen" = q_speeches) %>% 
+  pivot_longer(c(Frauenanteil, "Redenanteil Frauen"), "type") %>% 
+  ggplot(aes(x=factor(fraction, levels = party_order), y=value, fill=factor(type, levels = factor(c("Frauenanteil", "Redenanteil Frauen"))))) + scale_fill_manual(values= c("Frauenanteil"="gray", "Redenanteil Frauen"="red")) + coord_flip() + geom_bar(stat="identity", position="dodge") + labs(fill="Kategorie")
+  
 ```
 
-Now let's analyze whether there are any differences in the amount of speeches given.
+For comparison, let's analyze the total differences in the amount of speeches given.
 ```{r}
 
 speeches %>%
@@ -208,8 +191,9 @@ speeches %>%
   mutate(relative2=relative/sum(relative)) -> 
   plot3
 ```
+
 At first lets take a look at the absolute difference in the amount of speeches by the two sexes.
-```{r}
+```{r,fig.width=7}
 barplot(plot3$absolute2,
         ylab = "amount of speeches",
         main = "Absolute comparison of speech shares",
@@ -219,8 +203,9 @@ barplot(plot3$absolute2,
         font.main = 4,
         cex.axis = 0.7)
 ```
+
 Since there are more men represented in the German Bundestag, we now consider the relative proportions of speeches, depending on the ratio of men and women.
-```{r}
+```{r, fig.width=7}
 barplot(plot3$relative2,
         ylab = "amount of speeches",
         main = "Relative comparison of speech shares",
@@ -230,7 +215,3 @@ barplot(plot3$relative2,
         font.main = 4,
         cex.axis = 0.7)
 ```
-
-
-
-
diff --git a/vignettes/hitlercomparison.Rmd b/vignettes/hitlercomparison.Rmd
index 3ac5846..f7b2999 100644
--- a/vignettes/hitlercomparison.Rmd
+++ b/vignettes/hitlercomparison.Rmd
@@ -119,8 +119,22 @@ all_words %>% group_by(Worte) %>% summarize(n = sum(n), part= sum(n)/total) -> a
 
 Now we want to extract the words that are more frequently used by a specific fraction.
 ```{r}
-afd_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afd_high_frequent
-select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)
+afd_words %>% 
+  transmute(freq, fraction_n = n) %>%
+  left_join(all_words) %>%
+  transmute(
+    fraction_freq = freq, 
+    total_freq = part, 
+    fraction_n, 
+    total_n = n, 
+    rel_quotient = fraction_freq/total_freq, 
+    abs_quotient = fraction_n/total_n) %>% 
+  arrange(-abs_quotient, -fraction_n) %>% 
+  filter(rel_quotient > 1) -> 
+  afd_high_frequent
+
+select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% 
+  filter(total_n > 80)
 
 afdundfraktionslos_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent
 select(afdundfraktionslos_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)