Explorar el Código

replace non ascii characters by unicode points

genderequality-alternative
flavis hace 4 años
padre
commit
01ec0de76f
Se han modificado 2 ficheros con 3 adiciones y 3 borrados
  1. +2
    -2
      R/parse.R
  2. +1
    -1
      R/repair.R

+ 2
- 2
R/parse.R Ver fichero

@@ -153,10 +153,10 @@ parse_speech <- function(speech_xml, date) {
}
} else if (xml_name(node) == "kommentar") {
# comments are of the form
# <kommentar>(blabla [Fraktion] – blabla liasdf – bla)</kommentar>
# <kommentar>(blabla [Fraktion] \u2013 blabla liasdf \u2013 bla)</kommentar>
xml_text(node) %>%
str_sub(2, -2) %>%
str_split("") %>%
str_split("\u2013") %>%
`[[`(1) %>%
lapply(parse_comment, speech_id = speech_id, on_speaker = cur_speaker) ->
cs


+ 1
- 1
R/repair.R Ver fichero

@@ -64,7 +64,7 @@ repair_talks <- function(talks) {
#'
#' returns a lookup table
lookup_speaker <- function(tb, speaker, name_variable) {
tobereplaced <- "[-–—‑­­-­­­ ]"
tobereplaced <- "[\u002D\u2013\u2014\u2011\u00AD ]"
speaker %>%
unite(name, prename, lastname, sep=".*") %>%
mutate(name = str_replace_all(name, tobereplaced, ".*")) ->


Cargando…
Cancelar
Guardar