瀏覽代碼

Merge branch 'master' of https://git.flavigny.de/christian/hateimparlament

genderequality-alternative
JosuaKugler 4 年之前
父節點
當前提交
5f9343bf7f
共有 3 個檔案被更改,包括 40 行新增33 行删除
  1. +4
    -16
      R/parse.R
  2. +35
    -16
      R/repair.R
  3. +1
    -1
      vignettes/funwithdata.Rmd

+ 4
- 16
R/parse.R 查看文件

@@ -13,6 +13,9 @@ read_all <- function(path="records/") {
available_protocols <- list.files(path)
res <- pblapply(available_protocols, read_one, path=path)

if (length(available_protocols) == 0)
stop("The given directory is empty or does not exist.")

lapply(res, `[[`, "speaker") %>%
bind_rows() %>%
distinct() ->
@@ -34,9 +37,6 @@ read_all <- function(path="records/") {
distinct() ->
commentsandapplause

if (length(available_protocols) == 0)
warning("The given directory is empty or does not exist.")

filter(commentsandapplause, type == "comment") %>%
select(-type) ->
comments
@@ -46,7 +46,7 @@ read_all <- function(path="records/") {
"SPD" = str_detect(fraction, "SPD"),
"FDP" = str_detect(fraction, "FDP"),
"DIE_LINKE" = str_detect(fraction, "DIE LINKE"),
"BÜNDNIS_90_DIE_GRÜNEN" = str_detect(fraction, "BÜNDNIS 90/DIE GRÜNEN"),
"BUENDNIS_90_DIE_GRUENEN" = str_detect(fraction, "BÜNDNIS 90/DIE GRÜNEN"),
"AfD" = str_detect(fraction, "AfD")) %>%
select(-fraction) ->
applause
@@ -227,15 +227,3 @@ read_from_csv <- function(path="csv/") {
comments = tibble %$% read.table(str_c(path, "comments.csv")),
applause = tibble %$% read.table(str_c(path, "applause.csv")))
}

# -------------------------------
# EXAMPLE USE

# make sure data ist downloaded via fetch.R
# res <- read_one("records/19126-data.xml")
#
# res$speaker
# res$speeches
# res$talks

# -------------------------------

+ 35
- 16
R/repair.R 查看文件

@@ -1,4 +1,5 @@
fractions <- c("AFD" = "AfD",
"AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
"BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
"FRAKTIONSLOS" = "Fraktionslos",
@@ -51,11 +52,20 @@ repair_talks <- function(talks) {
filter(talks, str_length(content) > 0)
}

# tries to find the correct speaker id given a name
# this is sufficient since every prename lastname combination in the bundestag is
# unique (luckily :D)
# returns a lookup table
lookup_speaker <- function(comments, speaker) {
#' Lookup name in speakers table
#'
#' Tries to find the correct speaker id given a name.
#' This is sufficient since every prename lastname combination in the bundestag is
#' unique (luckily :D)
#'
#' @param tb tibble
#' @param redner tibble
#' @param name_variable name
#'
#' Tries to match the name_variable column with speaker names
#'
#' returns a lookup table
lookup_speaker <- function(tb, speaker, name_variable) {
tobereplaced <- "[-–—‑­­-­­­ ]"
speaker %>%
unite(name, vorname, nachname, sep=".*") %>%
@@ -68,29 +78,38 @@ lookup_speaker <- function(comments, speaker) {
if (length(matches) == 0) return(NA_character_)
rs[head(matches, 1), ]$id
}
comments %>%
distinct(kommentator) %>%
mutate(speaker = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, "")))
tb %>%
distinct({{name_variable}}) %>%
mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
}

repair_comments <- function(comments, speaker) {
# try to find a speaker id for each actual comment
repair_comments <- function(comments, redner) {
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
"Use repair(, repair_commments = FALSE) to skip this.\n"))
# try to find a redner id for each actual comment
comments %>%
filter(!is.na(kommentator)) %>%
lookup_speaker(speaker) %>%
lookup_redner(redner, kommentator) %>%
left_join(comments, ., by="kommentator") %>%
select(-kommentator)
}

#' Repair parsed tables
#'
#' TODO: Explain repair_comments argument
#' (if TRUE, we try to lookup redner names in redner table)
#'
#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
#' Since repaired tables should be a fixpoint of repair.
#' @export
repair <- function(parse_output) {
list(speaker = repair_speaker(parse_output$speaker),
speeches = repair_speeches(parse_output$speeches),
repair <- function(parse_output, repair_comments = FALSE) {

list(redner = repair_speaker(parse_output$speaker),
reden = repair_speeches(parse_output$speeches),
talks = repair_talks(parse_output$talks),
#comments = repair_comments(parse_output$comments)
comments = parse_output$comments,
comments = if(repair_comments) repair_comments(parse_output$comments,
parse_output$speaker)
else parse_output$comments,
applause = parse_output$applause
)
}

+ 1
- 1
vignettes/funwithdata.Rmd 查看文件

@@ -107,7 +107,7 @@ res$applause %>%
group_by(on_fraction) %>%
arrange(on_fraction) %>%
summarize("AfD" = sum(`AfD`),
"BÜNDNIS 90 / DIE GRÜNEN" = sum(`BÜNDNIS_90_DIE_GRÜNEN`),
"BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
"CDU/CSU" = sum(`CDU_CSU`),
"DIE LINKE" = sum(`DIE_LINKE`),
"FDP" = sum(`FDP`),


Loading…
取消
儲存