瀏覽代碼

replace non ascii characters by unicode points

genderequality-alternative
flavis 4 年之前
父節點
當前提交
01ec0de76f
共有 2 個文件被更改,包括 3 次插入3 次删除
  1. +2
    -2
      R/parse.R
  2. +1
    -1
      R/repair.R

+ 2
- 2
R/parse.R 查看文件

@@ -153,10 +153,10 @@ parse_speech <- function(speech_xml, date) {
}
} else if (xml_name(node) == "kommentar") {
# comments are of the form
# <kommentar>(blabla [Fraktion] – blabla liasdf – bla)</kommentar>
# <kommentar>(blabla [Fraktion] \u2013 blabla liasdf \u2013 bla)</kommentar>
xml_text(node) %>%
str_sub(2, -2) %>%
str_split("") %>%
str_split("\u2013") %>%
`[[`(1) %>%
lapply(parse_comment, speech_id = speech_id, on_speaker = cur_speaker) ->
cs


+ 1
- 1
R/repair.R 查看文件

@@ -64,7 +64,7 @@ repair_talks <- function(talks) {
#'
#' returns a lookup table
lookup_speaker <- function(tb, speaker, name_variable) {
tobereplaced <- "[-–—‑­­-­­­ ]"
tobereplaced <- "[\u002D\u2013\u2014\u2011\u00AD ]"
speaker %>%
unite(name, prename, lastname, sep=".*") %>%
mutate(name = str_replace_all(name, tobereplaced, ".*")) ->


Loading…
取消
儲存