Просмотр исходного кода

add plots in genderequality and clean up hitlercomparison

genderequality-alternative
JosuaKugler 4 лет назад
Родитель
Сommit
1db9e3f59a
2 измененных файлов: 56 добавлений и 61 удалений
  1. +40
    -59
      vignettes/genderequality.Rmd
  2. +16
    -2
      vignettes/hitlercomparison.Rmd

+ 40
- 59
vignettes/genderequality.Rmd Просмотреть файл

@@ -97,7 +97,7 @@ speaker %>%
speaker_with_gender speaker_with_gender
``` ```


#Analyse
## Analyse


First, let's look at the relative distribution of the sexes throughout the whole Bundestag. First, let's look at the relative distribution of the sexes throughout the whole Bundestag.


@@ -120,9 +120,9 @@ pie +
ylab("") ylab("")
``` ```


Next we look at the individual distributions between men and women in the different fractions.
Next, we look at the individual distributions between men and women in the different fractions.


```{r}
```{r, fig.width=7}
speaker_with_gender %>% speaker_with_gender %>%
group_by(fraction) %>% group_by(fraction) %>%
summarize(n = n()) -> summarize(n = n()) ->
@@ -137,58 +137,41 @@ speaker_with_gender %>%
bar_plot_fractions(women_per_fraction, x_variable=fraction, y_variable=q, title="Frauenanteil nach Partei") bar_plot_fractions(women_per_fraction, x_variable=fraction, y_variable=q, title="Frauenanteil nach Partei")
``` ```


```r
speaker_with_gender %>%
select(fraction, gender) %>%
group_by(fraction, gender) %>%
summarise("count" = n()) %>%
filter(gender %in% c("male", "female")) %>%
filter(!is.na(fraction)) %>%
group_by(fraction) %>%
mutate(portion = 100*count/sum(count)) ->
plot2

plot2 %>%
filter(fraction == "AfD") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie1 <- bp + coord_polar("y", start=0) + ggtitle("AfD") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "BÜNDNIS 90 / DIE GRÜNEN") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie2 <- bp + coord_polar("y", start=0) + ggtitle("DIE GRÜNEN") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "CDU/CSU") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie3 <- bp + coord_polar("y", start=0) + ggtitle("CDU/CSU") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "DIE LINKE") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie4 <- bp + coord_polar("y", start=0) + ggtitle("DIE LINKE") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "FDP") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie5 <- bp + coord_polar("y", start=0) + ggtitle("FDP") + xlab("") + ylab("")
plot2 %>%
filter(fraction == "SPD") %>%
ggplot(aes(x = "", y = portion, fill = gender))+
geom_bar(width = 1, stat = "identity") ->
bp
pie6 <- bp + coord_polar("y", start=0) + ggtitle("SPD") + xlab("") + ylab("")

gridExtra::grid.arrange(pie1,pie2,pie3,pie4,pie5,pie6,nrow=2)
Prepared with this knowledge, we can now analyse the relative amount of speeches by gender and fraction.

```{r, fig.width=7}
speaker_with_gender %>% transmute(speaker_id = id, gender, fraction) -> simple_speaker_with_gender
speeches %>%
transmute(id, speaker_id = speaker) %>%
inner_join(simple_speaker_with_gender) %>%
group_by(fraction) %>%
summarize(speeches=n()) ->
fraction_speeches_size

speeches %>%
transmute(id, speaker_id = speaker) %>%
inner_join(simple_speaker_with_gender) %>%
filter(gender=='female') %>%
group_by(fraction) %>%
summarize(female_speeches=n()) %>%
left_join(fraction_speeches_size) %>%
left_join(women_per_fraction) %>%
mutate(q_speeches = female_speeches/speeches) -> speech_distribution
#bar_plot_fractions(speech_distribution, x_variable=fraction, y_variable=q_speeches, title="Redeanteil Frauen nach Partei")


party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
"DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
"FDP", "AfD", NA_character_))

speech_distribution %>%
mutate("Frauenanteil" = q, "Redenanteil Frauen" = q_speeches) %>%
pivot_longer(c(Frauenanteil, "Redenanteil Frauen"), "type") %>%
ggplot(aes(x=factor(fraction, levels = party_order), y=value, fill=factor(type, levels = factor(c("Frauenanteil", "Redenanteil Frauen"))))) + scale_fill_manual(values= c("Frauenanteil"="gray", "Redenanteil Frauen"="red")) + coord_flip() + geom_bar(stat="identity", position="dodge") + labs(fill="Kategorie")
``` ```


Now let's analyze whether there are any differences in the amount of speeches given.
For comparison, let's analyze the total differences in the amount of speeches given.
```{r} ```{r}


speeches %>% speeches %>%
@@ -208,8 +191,9 @@ speeches %>%
mutate(relative2=relative/sum(relative)) -> mutate(relative2=relative/sum(relative)) ->
plot3 plot3
``` ```

At first lets take a look at the absolute difference in the amount of speeches by the two sexes. At first lets take a look at the absolute difference in the amount of speeches by the two sexes.
```{r}
```{r,fig.width=7}
barplot(plot3$absolute2, barplot(plot3$absolute2,
ylab = "amount of speeches", ylab = "amount of speeches",
main = "Absolute comparison of speech shares", main = "Absolute comparison of speech shares",
@@ -219,8 +203,9 @@ barplot(plot3$absolute2,
font.main = 4, font.main = 4,
cex.axis = 0.7) cex.axis = 0.7)
``` ```

Since there are more men represented in the German Bundestag, we now consider the relative proportions of speeches, depending on the ratio of men and women. Since there are more men represented in the German Bundestag, we now consider the relative proportions of speeches, depending on the ratio of men and women.
```{r}
```{r, fig.width=7}
barplot(plot3$relative2, barplot(plot3$relative2,
ylab = "amount of speeches", ylab = "amount of speeches",
main = "Relative comparison of speech shares", main = "Relative comparison of speech shares",
@@ -230,7 +215,3 @@ barplot(plot3$relative2,
font.main = 4, font.main = 4,
cex.axis = 0.7) cex.axis = 0.7)
``` ```





+ 16
- 2
vignettes/hitlercomparison.Rmd Просмотреть файл

@@ -119,8 +119,22 @@ all_words %>% group_by(Worte) %>% summarize(n = sum(n), part= sum(n)/total) -> a


Now we want to extract the words that are more frequently used by a specific fraction. Now we want to extract the words that are more frequently used by a specific fraction.
```{r} ```{r}
afd_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afd_high_frequent
select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)
afd_words %>%
transmute(freq, fraction_n = n) %>%
left_join(all_words) %>%
transmute(
fraction_freq = freq,
total_freq = part,
fraction_n,
total_n = n,
rel_quotient = fraction_freq/total_freq,
abs_quotient = fraction_n/total_n) %>%
arrange(-abs_quotient, -fraction_n) %>%
filter(rel_quotient > 1) ->
afd_high_frequent

select(afd_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>%
filter(total_n > 80)


afdundfraktionslos_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent afdundfraktionslos_words %>% transmute(freq, fraction_n = n) %>% left_join(all_words) %>% transmute(fraction_freq = freq, total_freq = part, fraction_n, total_n = n, rel_quotient = fraction_freq/total_freq, abs_quotient = fraction_n/total_n) %>% arrange(-abs_quotient, -fraction_n) %>% filter(rel_quotient > 1) -> afdundfraktionslos_high_frequent
select(afdundfraktionslos_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80) select(afdundfraktionslos_high_frequent, fraction_n, total_n, abs_quotient, rel_quotient) %>% filter(total_n > 80)


Загрузка…
Отмена
Сохранить