ソースを参照

add plots in genderequality and clean up hitlercomparison

genderequality-alternative
JosuaKugler 4年前
コミット
1db9e3f59a
2個のファイルの変更56行の追加61行の削除
  1. +40
    -59
      vignettes/genderequality.Rmd
  2. +16
    -2
      vignettes/hitlercomparison.Rmd

+ 40
- 59
vignettes/genderequality.Rmd ファイルの表示

@@ -97,7 +97,7 @@ speaker %>%
speaker_with_gender speaker_with_gender
``` ```


#Analyse
## Analyse


First, let's look at the relative distribution of the sexes throughout the whole Bundestag. First, let's look at the relative distribution of the sexes throughout the whole Bundestag.


@@ -120,9 +120,9 @@ pie +
ylab("") ylab("")
``` ```


Next we look at the individual distributions between men and women in the different fractions.
Next, we look at the individual distributions between men and women in the different fractions.


```{r}
```{r, fig.width=7}
speaker_with_gender %>% speaker_with_gender %>%
group_by(fraction) %>% group_by(fraction) %>%
summarize(n = n()) -> summarize(n = n()) ->
@@ -137,58 +137,41 @@ speaker_with_gender %>%
bar_plot_fractions(women_per_fraction, x_variable=fraction, y_variable=q, title="Frauenanteil nach Partei") bar_plot_fractions(women_per_fraction, x_variable=fraction, y_variable=q, title="Frauenanteil nach Partei")
``` ```


```r
speaker_with_gender %>%
select(fraction, gender) %>%
group_by(fraction, gender) %>%
summarise("count" = n()) %>%
filter(gender %in% c("male", "female")) %>%
filter(!is.na(fraction)) %>%
group_by(fraction) %>%
mutate(portion = 100*count/sum(count)) ->
plot2

plot2 %>%
filter(fraction == "AfD") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie1 <- bp + coord_polar("y", start=0) + ggtitle("AfD") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "BÜNDNIS 90 / DIE GRÜNEN") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie2 <- bp + coord_polar("y", start=0) + ggtitle("DIE GRÜNEN") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "CDU/CSU") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie3 <- bp + coord_polar("y", start=0) + ggtitle("CDU/CSU") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "DIE LINKE") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie4 <- bp + coord_polar("y", start=0) + ggtitle("DIE LINKE") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "FDP") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie5 <- bp + coord_polar("y", start=0) + ggtitle("FDP") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "SPD") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie6 <- bp + coord_polar("y", start=0) + ggtitle("SPD") + xlab("") + ylab("")

gridExtra::grid.arrange(pie1,pie2,pie3,pie4,pie5,pie6,nrow=2)
Prepared with this knowledge, we can now analyse the relative amount of speeches by gender and fraction.

```{r, fig.width=7}
speaker_with_gender %>% transmute(speaker_id = id, gender, fraction) -> simple_speaker_with_gender
speeches %>%
transmute(id, speaker_id = speaker) %>%
inner_join(simple_speaker_with_gender) %>%
group_by(fraction) %>%
summarize(speeches=n()) ->
fraction_speeches_size

speeches %>%
transmute(id, speaker_id = speaker) %>%
inner_join(simple_speaker_with_gender) %>%
filter(gender=='female') %>%
group_by(fraction) %>%
summarize(female_speeches=n()) %>%
left_join(fraction_speeches_size) %>%
left_join(women_per_fraction) %>%
mutate(q_speeches = female_speeches/speeches) -> speech_distribution
#bar_plot_fractions(speech_distribution, x_variable=fraction, y_variable=q_speeches, title="Redeanteil Frauen nach Partei")


party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
"DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
"FDP", "AfD", NA_character_))

speech_distribution %>%
mutate("Frauenanteil" = q, "Redenanteil Frauen" = q_speeches) %>%
pivot_longer(c(Frauenanteil, "Redenanteil Frauen"), "type") %>%
ggplot(aes(x=factor(fraction, levels = party_order), y=value, fill=factor(type, levels = factor(c("Frauenanteil", "Redenanteil Frauen"))))) + scale_fill_manual(values= c("Frauenanteil"="gray", "Redenanteil Frauen"="red")) + coord_flip() + geom_bar(stat="identity", position="dodge") + labs(fill="Kategorie")
``` ```


Now let's analyze whether there are any differences in the amount of speeches given.
For comparison, let's analyze the total differences in the amount of speeches given.
```{r} ```{r}


speeches %>% speeches %>%
@@ -208,8 +191,9 @@ speeches %>%
mutate(relative2=relative/sum(relative)) -> mutate(relative2=relative/sum(relative)) ->
plot3 plot3
``` ```

At first lets take a look at the absolute difference in the amount of speeches by the two sexes. At first lets take a look at the absolute difference in the amount of speeches by the two sexes.
```{r}
```{r,fig.width=7}
barplot(plot3$absolute2, barplot(plot3$absolute2,
ylab = "amount of speeches", ylab = "amount of speeches",
main = "Absolute comparison of speech shares", main = "Absolute comparison of speech shares",
@@ -219,8 +203,9 @@ barplot(plot3$absolute2,
font.main = 4, font.main = 4,
cex.axis = 0.7) cex.axis = 0.7)
``` ```

Since there are more men represented in the German Bundestag, we now consider the relative proportions of speeches, depending on the ratio of men and women. Since there are more men represented in the German Bundestag, we now consider the relative proportions of speeches, depending on the ratio of men and women.
```{r}
```{r, fig.width=7}
barplot(plot3$relative2, barplot(plot3$relative2,
ylab = "amount of speeches", ylab = "amount of speeches",
main = "Relative comparison of speech shares", main = "Relative comparison of speech shares",
@@ -230,7 +215,3 @@ barplot(plot3$relative2,
font.main = 4, font.main = 4,
cex.axis = 0.7) cex.axis = 0.7)
``` ```





+ 16
- 2
vignettes/hitlercomparison.Rmd ファイルの表示

@@ -119,8 +119,22 @@ all_words %>% group_by(Worte) %>% summarize(n = sum(n), part= sum(n)/total) -> a


Now we want to extract the words that are more frequently used by a specific fraction. Now we want to extract the words that are more frequently used by a specific fraction.
```{r} ```{r}
afd_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afd_high_frequent
select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)
afd_words %>%
transmute(freq, fraction_n = n) %>%
left_join(all_words) %>%
transmute(
fraction_freq = freq,
total_freq = part,
fraction_n,
total_n = n,
rel_quotient = fraction_freq/total_freq,
abs_quotient = fraction_n/total_n) %>%
arrange(-abs_quotient, -fraction_n) %>%
filter(rel_quotient > 1) ->
afd_high_frequent

select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>%
filter(total_n > 80)


afdundfraktionslos_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent afdundfraktionslos_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent
select(afdundfraktionslos_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80) select(afdundfraktionslos_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)


読み込み中…
キャンセル
保存