From 64ede5c757ae12ec4332c303c1517c9bf41e21c3 Mon Sep 17 00:00:00 2001
From: flavis <christian@flavigny.de>
Date: Tue, 3 Aug 2021 12:48:01 +0200
Subject: [PATCH 1/4] =?UTF-8?q?replace=20=C3=9C=20with=20UE=20in=20colname?=
 =?UTF-8?q?s=20in=20applause,=20move=20check=20of=20empty=20directory=20to?=
 =?UTF-8?q?=20beginning=20in=20read=5Fall?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 R/parse.R                 | 8 ++++----
 vignettes/funwithdata.Rmd | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/parse.R b/R/parse.R
index 40b6045..603c358 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -13,6 +13,9 @@ read_all <- function(path="records/") {
     available_protocols <- list.files(path)
     res <- pblapply(available_protocols, read_one, path=path)
 
+    if (length(available_protocols) == 0)
+        stop("The given directory is empty or does not exist.")
+
     lapply(res, `[[`, "redner") %>%
         bind_rows() %>%
         distinct() ->
@@ -34,9 +37,6 @@ read_all <- function(path="records/") {
         distinct() ->
         commentsandapplause
 
-    if (length(available_protocols) == 0)
-        warning("The given directory is empty or does not exist.")
-
     filter(commentsandapplause, type == "comment") %>%
         select(-type) ->
         comments
@@ -46,7 +46,7 @@ read_all <- function(path="records/") {
                "SPD" = str_detect(fraktion, "SPD"),
                "FDP" = str_detect(fraktion, "FDP"),
                "DIE_LINKE" = str_detect(fraktion, "DIE LINKE"),
-               "BÜNDNIS_90_DIE_GRÜNEN" = str_detect(fraktion, "BÜNDNIS 90/DIE GRÜNEN"),
+               "BUENDNIS_90_DIE_GRUENEN" = str_detect(fraktion, "BÜNDNIS 90/DIE GRÜNEN"),
                "AfD" = str_detect(fraktion, "AfD")) %>%
         select(-fraktion) ->
         applause
diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd
index d5dded1..fbfe4af 100644
--- a/vignettes/funwithdata.Rmd
+++ b/vignettes/funwithdata.Rmd
@@ -107,7 +107,7 @@ res$applause %>%
     group_by(on_fraktion) %>%
     arrange(on_fraktion) %>%
     summarize("AfD" = sum(`AfD`),
-              "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BÜNDNIS_90_DIE_GRÜNEN`),
+              "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
               "CDU/CSU" = sum(`CDU_CSU`),
               "DIE LINKE" = sum(`DIE_LINKE`),
               "FDP" = sum(`FDP`),

From f91456116fa1a06d689ec938e2c9990681d35a07 Mon Sep 17 00:00:00 2001
From: flavis <christian@flavigny.de>
Date: Tue, 3 Aug 2021 12:48:32 +0200
Subject: [PATCH 2/4] remove outdated example code

---
 R/parse.R | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/R/parse.R b/R/parse.R
index 603c358..ca428c3 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -227,15 +227,3 @@ read_from_csv <- function(path="csv/") {
          comments = tibble %$% read.table(str_c(path, "comments.csv")),
          applause = tibble %$% read.table(str_c(path, "applause.csv")))
 }
-
-# -------------------------------
-# EXAMPLE USE
-
-# make sure data ist downloaded via fetch.R
-# res <- read_one("records/19126-data.xml")
-# 
-# res$redner
-# res$reden
-# res$talks
-
-# -------------------------------

From 7771b2ebd8678e19a3c397392840c3cb43719b87 Mon Sep 17 00:00:00 2001
From: flavis <christian@flavigny.de>
Date: Tue, 3 Aug 2021 13:26:39 +0200
Subject: [PATCH 3/4] add option to lookup redner in repair and fix fraction
 list to ensure repaired tables are fixpoints of repair(, repair_comments =
 FALSE)

---
 R/repair.R | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/R/repair.R b/R/repair.R
index 21d5738..ba62357 100644
--- a/R/repair.R
+++ b/R/repair.R
@@ -1,4 +1,5 @@
 fraktionen <- c("AFD" = "AfD",
+                "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
                 "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
                 "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
                 "FRAKTIONSLOS" = "Fraktionslos",
@@ -84,13 +85,20 @@ repair_comments <- function(comments, redner) {
 
 #' Repair parsed tables
 #'
+#' TODO: Explain repair_comments argument
+#' (if TRUE, we try to lookup redner names in redner table)
+#'
+#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
+#' Since repaired tables should be a fixpoint of repair.
 #' @export
-repair <- function(parse_output) {
+repair <- function(parse_output, repair_comments = FALSE) {
+
     list(redner = repair_redner(parse_output$redner),
          reden = repair_reden(parse_output$reden),
          talks = repair_talks(parse_output$talks),
-         #comments = repair_comments(parse_output$comments)
-         comments = parse_output$comments,
+         comments = if(repair_comments) repair_comments(parse_output$comments,
+                                                        parse_output$redner)
+                    else parse_output$comments,
          applause = parse_output$applause
          )
 }

From 5dc308c16e1fcfeab0c65681345191c8d620d7a3 Mon Sep 17 00:00:00 2001
From: flavis <christian@flavigny.de>
Date: Tue, 3 Aug 2021 13:40:02 +0200
Subject: [PATCH 4/4] generalize lookup_redner helper to allow lookup of names
 in arbitrary tables, add info text

---
 R/repair.R | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/R/repair.R b/R/repair.R
index ba62357..e1bfdb9 100644
--- a/R/repair.R
+++ b/R/repair.R
@@ -52,11 +52,20 @@ repair_talks <- function(talks) {
     filter(talks, str_length(content) > 0)
 }
 
-# tries to find the correct redner id given a name
-# this is sufficient since every prename lastname combination in the bundestag is
-# unique (luckily :D)
-# returns a lookup table
-lookup_redner <- function(comments, redner) {
+#' Lookup name in speakers table
+#'
+#' Tries to find the correct speaker id given a name.
+#' This is sufficient since every prename lastname combination in the bundestag is
+#' unique (luckily :D)
+#' 
+#' @param tb tibble
+#' @param redner tibble
+#' @param name_variable name
+#' 
+#' Tries to match the name_variable column with speaker names
+#' 
+#' returns a lookup table
+lookup_redner <- function(tb, redner, name_variable) {
     tobereplaced <- "[-–—‑­­-­­­ ]"
     redner %>%
         unite(name, vorname, nachname, sep=".*") %>%
@@ -69,16 +78,18 @@ lookup_redner <- function(comments, redner) {
         if (length(matches) == 0) return(NA_character_)
         rs[head(matches, 1), ]$id
     }
-    comments %>%
-        distinct(kommentator) %>%
-        mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, "")))
+    tb %>%
+        distinct({{name_variable}}) %>%
+        mutate(redner = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
 }
 
 repair_comments <- function(comments, redner) {
+    cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
+               "Use repair(, repair_commments = FALSE) to skip this.\n"))
     # try to find a redner id for each actual comment
     comments %>%
         filter(!is.na(kommentator)) %>%
-        lookup_redner(redner) %>%
+        lookup_redner(redner, kommentator) %>%
         left_join(comments, ., by="kommentator") %>%
         select(-kommentator)
 }