diff --git a/R/parse.R b/R/parse.R index 520c211..d7fb267 100644 --- a/R/parse.R +++ b/R/parse.R @@ -153,10 +153,10 @@ parse_speech <- function(speech_xml, date) { } } else if (xml_name(node) == "kommentar") { # comments are of the form - # (blabla [Fraktion] – blabla liasdf – bla) + # (blabla [Fraktion] \u2013 blabla liasdf \u2013 bla) xml_text(node) %>% str_sub(2, -2) %>% - str_split("–") %>% + str_split("\u2013") %>% `[[`(1) %>% lapply(parse_comment, speech_id = speech_id, on_speaker = cur_speaker) -> cs diff --git a/R/repair.R b/R/repair.R index c52db56..01be4a0 100644 --- a/R/repair.R +++ b/R/repair.R @@ -64,7 +64,7 @@ repair_talks <- function(talks) { #' #' returns a lookup table lookup_speaker <- function(tb, speaker, name_variable) { - tobereplaced <- "[-–—‑­­-­­­ ]" + tobereplaced <- "[\u002D\u2013\u2014\u2011\u00AD ]" speaker %>% unite(name, prename, lastname, sep=".*") %>% mutate(name = str_replace_all(name, tobereplaced, ".*")) ->