| @@ -48,14 +48,14 @@ party_colors <- c( | |||||
| FDP="#FEEB34", | FDP="#FEEB34", | ||||
| "CDU/CSU"="#000000", | "CDU/CSU"="#000000", | ||||
| SPD="#DF0B25", | SPD="#DF0B25", | ||||
| "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B", | |||||
| "BÜNDNIS 90/DIE GRÜNEN"="#4A932B", | |||||
| "DIE LINKE"="#BC3475", | "DIE LINKE"="#BC3475", | ||||
| "AfD&Fraktionslos"="#AAAAFF", | "AfD&Fraktionslos"="#AAAAFF", | ||||
| Fraktionslos="#AAAAAA" | Fraktionslos="#AAAAAA" | ||||
| ) | ) | ||||
| party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", | party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", | ||||
| "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", | |||||
| "DIE LINKE", "BÜNDNIS 90/DIE GRÜNEN", "SPD", "CDU/CSU", | |||||
| "FDP", "AfD", NA_character_)) | "FDP", "AfD", NA_character_)) | ||||
| #' Bar chart visualizing fraction based data | #' Bar chart visualizing fraction based data | ||||
| @@ -173,7 +173,8 @@ parse_speech <- function(speech_xml, date) { | |||||
| } | } | ||||
| fractionpattern <- "BÜNDNIS(SES)?\\W*90/DIE\\W*GRÜNEN|CDU/CSU|AfD|SPD|DIE LINKE|FDP|LINKEN" | fractionpattern <- "BÜNDNIS(SES)?\\W*90/DIE\\W*GRÜNEN|CDU/CSU|AfD|SPD|DIE LINKE|FDP|LINKEN" | ||||
| fractionnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP") | |||||
| fractionnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP", | |||||
| "Fraktionslos") | |||||
| parse_comment <- function(comment, speech_id, on_speaker) { | parse_comment <- function(comment, speech_id, on_speaker) { | ||||
| base <- c(speech_id = speech_id, on_speaker = on_speaker) | base <- c(speech_id = speech_id, on_speaker = on_speaker) | ||||
| @@ -187,7 +188,9 @@ parse_comment <- function(comment, speech_id, on_speaker) { | |||||
| c(base, type = "applause", fraction = by, commenter = NA_character_, content = comment) | c(base, type = "applause", fraction = by, commenter = NA_character_, content = comment) | ||||
| } else { | } else { | ||||
| ps <- str_match(comment, "(.*) \\[(.*?)\\]: (.*)")[1,] | ps <- str_match(comment, "(.*) \\[(.*?)\\]: (.*)")[1,] | ||||
| c(base, type = "comment", fraction = ps[3], commenter = ps[2], content = ps[4]) | |||||
| fraction <- agrep(ps[3], fractionnames, max=0.2, value=T) | |||||
| if (all(is.na(fraction)) || length(fraction) == 0) fraction <- NA_character_ | |||||
| c(base, type = "comment", fraction = fraction, commenter = ps[2], content = ps[4]) | |||||
| } | } | ||||
| } | } | ||||
| @@ -1,7 +1,7 @@ | |||||
| fractions <- c("AFD" = "AfD", | fractions <- c("AFD" = "AfD", | ||||
| "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos", | "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos", | ||||
| "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN", | |||||
| "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN", | |||||
| "BÜNDNIS90/" = "BÜNDNIS 90/DIE GRÜNEN", | |||||
| "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90/DIE GRÜNEN", | |||||
| "FRAKTIONSLOS" = "Fraktionslos", | "FRAKTIONSLOS" = "Fraktionslos", | ||||
| "DIELINKE" = "DIE LINKE", | "DIELINKE" = "DIE LINKE", | ||||
| "SPD" = "SPD", | "SPD" = "SPD", | ||||
| @@ -81,35 +81,39 @@ lookup_speaker <- function(tb, speaker, name_variable) { | |||||
| mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) | mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) | ||||
| } | } | ||||
| repair_comments <- function(comments, speaker) { | |||||
| cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", | |||||
| "Use repair(, repair_commments = FALSE) to skip this.\n")) | |||||
| # try to find a speaker id for each actual comment | |||||
| repair_comments <- function(comments, speaker, lookup_speaker=F) { | |||||
| comments %>% | comments %>% | ||||
| filter(!is.na(commenter)) %>% | |||||
| lookup_speaker(speaker, commenter) %>% | |||||
| left_join(comments, ., by="commenter") %>% | |||||
| select(-commenter) | |||||
| filter(!is.na(commenter) | !is.na(content) | !is.na(fraction)) -> | |||||
| tb | |||||
| if (lookup_speaker) { | |||||
| cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", | |||||
| "Use repair(, lookup_speaker = FALSE) to skip this.\n")) | |||||
| # try to find a speaker id for each actual comment | |||||
| tb %>% | |||||
| filter(!is.na(commenter)) %>% | |||||
| lookup_speaker(speaker, commenter) %>% | |||||
| left_join(tb, ., by="commenter") | |||||
| } else tb | |||||
| } | } | ||||
| #' Repair parsed tables | #' Repair parsed tables | ||||
| #' | #' | ||||
| #' @param parse_output tibble | #' @param parse_output tibble | ||||
| #' @param repair_comments bool | |||||
| #' @param lookup_speaker bool | |||||
| #' | #' | ||||
| #' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. | |||||
| #' If lookup_speaker is TRUE, members of the parliament mentioned in comments are looked up in speaker table. | |||||
| #' | #' | ||||
| #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE | #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE | ||||
| #' Since repaired tables should be a fixpoint of repair. | #' Since repaired tables should be a fixpoint of repair. | ||||
| #' @export | #' @export | ||||
| repair <- function(parse_output, repair_comments = FALSE) { | |||||
| repair <- function(parse_output, lookup_speaker = FALSE) { | |||||
| is_valid_res(parse_output) | |||||
| stopifnot("lookup_speaker must be of type logical" = is.logical(lookup_speaker)) | |||||
| list(speaker = repair_speaker(parse_output$speaker), | list(speaker = repair_speaker(parse_output$speaker), | ||||
| speeches = repair_speeches(parse_output$speeches), | speeches = repair_speeches(parse_output$speeches), | ||||
| talks = repair_talks(parse_output$talks), | talks = repair_talks(parse_output$talks), | ||||
| comments = if(repair_comments) repair_comments(parse_output$comments, | |||||
| parse_output$speaker) | |||||
| else parse_output$comments, | |||||
| applause = parse_output$applause | |||||
| ) | |||||
| comments = repair_comments(parse_output$comments, | |||||
| parse_output$speaker, | |||||
| lookup_speaker), | |||||
| applause = parse_output$applause) | |||||
| } | } | ||||
| @@ -52,7 +52,7 @@ res$applause %>% | |||||
| group_by(on_fraction) %>% | group_by(on_fraction) %>% | ||||
| arrange(on_fraction) %>% | arrange(on_fraction) %>% | ||||
| summarize("AfD" = sum(`AfD`), | summarize("AfD" = sum(`AfD`), | ||||
| "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`), | |||||
| "BÜNDNIS 90/DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`), | |||||
| "CDU/CSU" = sum(`CDU_CSU`), | "CDU/CSU" = sum(`CDU_CSU`), | ||||
| "DIE LINKE" = sum(`DIE_LINKE`), | "DIE LINKE" = sum(`DIE_LINKE`), | ||||
| "FDP" = sum(`FDP`), | "FDP" = sum(`FDP`), | ||||
| @@ -84,7 +84,7 @@ res$comments %>% | |||||
| select(by_fraction = fraction.x, on_fraction = fraction.y) %>% | select(by_fraction = fraction.x, on_fraction = fraction.y) %>% | ||||
| group_by(on_fraction) %>% | group_by(on_fraction) %>% | ||||
| summarize(`AfD` = sum(str_detect(by_fraction, "AfD"), na.rm=T), | summarize(`AfD` = sum(str_detect(by_fraction, "AfD"), na.rm=T), | ||||
| `BÜNDNIS 90 / DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T), | |||||
| `BÜNDNIS 90/DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T), | |||||
| `CDU/CSU` = sum(str_detect(by_fraction, "CDU/CSU"), na.rm = T), | `CDU/CSU` = sum(str_detect(by_fraction, "CDU/CSU"), na.rm = T), | ||||
| `DIE LINKE` = sum(str_detect(by_fraction, "DIE LINKE"), na.rm=T), | `DIE LINKE` = sum(str_detect(by_fraction, "DIE LINKE"), na.rm=T), | ||||
| `FDP` = sum(str_detect(by_fraction, "FDP"), na.rm=T), | `FDP` = sum(str_detect(by_fraction, "FDP"), na.rm=T), | ||||