change spelling of greens fraction, fix parsing issue in comments table

преди 4 години · 77f79f0d86
--- a/R/analyze.R
+++ b/R/analyze.R
@@ -48,14 +48,14 @@ party_colors <- c(
  FDP="#FEEB34",
  "CDU/CSU"="#000000", 
  SPD="#DF0B25",
  "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  "BÜNDNIS 90/DIE GRÜNEN"="#4A932B",
  "DIE LINKE"="#BC3475",
  "AfD&Fraktionslos"="#AAAAFF",
  Fraktionslos="#AAAAAA"
 )
 party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
                        "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
                        "DIE LINKE", "BÜNDNIS 90/DIE GRÜNEN", "SPD", "CDU/CSU",
                        "FDP", "AfD", NA_character_))
 #' Bar chart visualizing fraction based data
--- a/R/parse.R
+++ b/R/parse.R
@@ -173,7 +173,8 @@ parse_speech <- function(speech_xml, date) {
 }
 fractionpattern <- "BÜNDNIS(SES)?\\W*90/DIE\\W*GRÜNEN|CDU/CSU|AfD|SPD|DIE LINKE|FDP|LINKEN"
 fractionnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP")
 fractionnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP",
                   "Fraktionslos")
 parse_comment <- function(comment, speech_id, on_speaker) {
    base <- c(speech_id = speech_id, on_speaker = on_speaker)
@@ -187,7 +188,9 @@ parse_comment <- function(comment, speech_id, on_speaker) {
        c(base, type = "applause", fraction = by, commenter = NA_character_, content = comment)
    } else {
        ps <- str_match(comment, "(.*) \\[(.*?)\\]: (.*)")[1,]
        c(base, type = "comment", fraction = ps[3], commenter = ps[2], content = ps[4])
        fraction <- agrep(ps[3], fractionnames, max=0.2, value=T)
        if (all(is.na(fraction)) || length(fraction) == 0) fraction <- NA_character_
        c(base, type = "comment", fraction = fraction, commenter = ps[2], content = ps[4])
    }
 }
--- a/R/repair.R
+++ b/R/repair.R
@@ -1,7 +1,7 @@
 fractions <- c("AFD" = "AfD",
                "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
                "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
                "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
                "BÜNDNIS90/" = "BÜNDNIS 90/DIE GRÜNEN",
                "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90/DIE GRÜNEN",
                "FRAKTIONSLOS" = "Fraktionslos",
                "DIELINKE" = "DIE LINKE",
                "SPD" = "SPD",
@@ -81,35 +81,39 @@ lookup_speaker <- function(tb, speaker, name_variable) {
        mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
 }
 repair_comments <- function(comments, speaker) {
    cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
               "Use repair(, repair_commments = FALSE) to skip this.\n"))
    # try to find a speaker id for each actual comment
 repair_comments <- function(comments, speaker, lookup_speaker=F) {
    comments %>%
        filter(!is.na(commenter)) %>%
        lookup_speaker(speaker, commenter) %>%
        left_join(comments, ., by="commenter") %>%
        select(-commenter)
        filter(!is.na(commenter) | !is.na(content) | !is.na(fraction)) ->
        tb
    if (lookup_speaker) {
        cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
                   "Use repair(, lookup_speaker = FALSE) to skip this.\n"))
        # try to find a speaker id for each actual comment
        tb %>%
            filter(!is.na(commenter)) %>%
            lookup_speaker(speaker, commenter) %>%
            left_join(tb, ., by="commenter")
    } else tb
 }
 #' Repair parsed tables
 #'
 #' @param parse_output tibble
 #' @param repair_comments bool
 #' @param lookup_speaker bool
 #' 
 #' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
 #' If lookup_speaker is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
 #'
 #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
 #' Since repaired tables should be a fixpoint of repair.
 #' @export
 repair <- function(parse_output, repair_comments = FALSE) {
 repair <- function(parse_output, lookup_speaker = FALSE) {
    is_valid_res(parse_output)
    stopifnot("lookup_speaker must be of type logical" = is.logical(lookup_speaker))
    list(speaker = repair_speaker(parse_output$speaker),
         speeches = repair_speeches(parse_output$speeches),
         talks = repair_talks(parse_output$talks),
         comments = if(repair_comments) repair_comments(parse_output$comments,
                                                        parse_output$speaker)
                    else parse_output$comments,
         applause = parse_output$applause
         )
         comments = repair_comments(parse_output$comments,
                                    parse_output$speaker,
                                    lookup_speaker),
         applause = parse_output$applause)
 }
--- a/vignettes/interaction.Rmd
+++ b/vignettes/interaction.Rmd
@@ -52,7 +52,7 @@ res$applause %>%
    group_by(on_fraction) %>%
    arrange(on_fraction) %>%
    summarize("AfD" = sum(`AfD`),
              "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
              "BÜNDNIS 90/DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
              "CDU/CSU" = sum(`CDU_CSU`),
              "DIE LINKE" = sum(`DIE_LINKE`),
              "FDP" = sum(`FDP`),
@@ -84,7 +84,7 @@ res$comments %>%
    select(by_fraction = fraction.x, on_fraction = fraction.y) %>%
    group_by(on_fraction) %>%
    summarize(`AfD` = sum(str_detect(by_fraction, "AfD"), na.rm=T),
              `BÜNDNIS 90 / DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T),
              `BÜNDNIS 90/DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T),
              `CDU/CSU` = sum(str_detect(by_fraction, "CDU/CSU"), na.rm = T),
              `DIE LINKE` = sum(str_detect(by_fraction, "DIE LINKE"), na.rm=T),
              `FDP` = sum(str_detect(by_fraction, "FDP"), na.rm=T),