Просмотр исходного кода

change spelling of greens fraction, fix parsing issue in comments table

genderequality-alternative
flavis 4 лет назад
Родитель
Сommit
77f79f0d86
4 измененных файлов: 32 добавлений и 25 удалений
  1. +2
    -2
      R/analyze.R
  2. +5
    -2
      R/parse.R
  3. +23
    -19
      R/repair.R
  4. +2
    -2
      vignettes/interaction.Rmd

+ 2
- 2
R/analyze.R Просмотреть файл

@@ -48,14 +48,14 @@ party_colors <- c(
FDP="#FEEB34",
"CDU/CSU"="#000000",
SPD="#DF0B25",
"BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
"BÜNDNIS 90/DIE GRÜNEN"="#4A932B",
"DIE LINKE"="#BC3475",
"AfD&Fraktionslos"="#AAAAFF",
Fraktionslos="#AAAAAA"
)

party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
"DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
"DIE LINKE", "BÜNDNIS 90/DIE GRÜNEN", "SPD", "CDU/CSU",
"FDP", "AfD", NA_character_))

#' Bar chart visualizing fraction based data


+ 5
- 2
R/parse.R Просмотреть файл

@@ -173,7 +173,8 @@ parse_speech <- function(speech_xml, date) {
}

fractionpattern <- "BÜNDNIS(SES)?\\W*90/DIE\\W*GRÜNEN|CDU/CSU|AfD|SPD|DIE LINKE|FDP|LINKEN"
fractionnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP")
fractionnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP",
"Fraktionslos")

parse_comment <- function(comment, speech_id, on_speaker) {
base <- c(speech_id = speech_id, on_speaker = on_speaker)
@@ -187,7 +188,9 @@ parse_comment <- function(comment, speech_id, on_speaker) {
c(base, type = "applause", fraction = by, commenter = NA_character_, content = comment)
} else {
ps <- str_match(comment, "(.*) \\[(.*?)\\]: (.*)")[1,]
c(base, type = "comment", fraction = ps[3], commenter = ps[2], content = ps[4])
fraction <- agrep(ps[3], fractionnames, max=0.2, value=T)
if (all(is.na(fraction)) || length(fraction) == 0) fraction <- NA_character_
c(base, type = "comment", fraction = fraction, commenter = ps[2], content = ps[4])
}
}



+ 23
- 19
R/repair.R Просмотреть файл

@@ -1,7 +1,7 @@
fractions <- c("AFD" = "AfD",
"AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
"BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
"BÜNDNIS90/" = "BÜNDNIS 90/DIE GRÜNEN",
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90/DIE GRÜNEN",
"FRAKTIONSLOS" = "Fraktionslos",
"DIELINKE" = "DIE LINKE",
"SPD" = "SPD",
@@ -81,35 +81,39 @@ lookup_speaker <- function(tb, speaker, name_variable) {
mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
}

repair_comments <- function(comments, speaker) {
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
"Use repair(, repair_commments = FALSE) to skip this.\n"))
# try to find a speaker id for each actual comment
repair_comments <- function(comments, speaker, lookup_speaker=F) {
comments %>%
filter(!is.na(commenter)) %>%
lookup_speaker(speaker, commenter) %>%
left_join(comments, ., by="commenter") %>%
select(-commenter)
filter(!is.na(commenter) | !is.na(content) | !is.na(fraction)) ->
tb
if (lookup_speaker) {
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
"Use repair(, lookup_speaker = FALSE) to skip this.\n"))
# try to find a speaker id for each actual comment
tb %>%
filter(!is.na(commenter)) %>%
lookup_speaker(speaker, commenter) %>%
left_join(tb, ., by="commenter")
} else tb
}

#' Repair parsed tables
#'
#' @param parse_output tibble
#' @param repair_comments bool
#' @param lookup_speaker bool
#'
#' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
#' If lookup_speaker is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
#'
#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
#' Since repaired tables should be a fixpoint of repair.
#' @export
repair <- function(parse_output, repair_comments = FALSE) {

repair <- function(parse_output, lookup_speaker = FALSE) {
is_valid_res(parse_output)
stopifnot("lookup_speaker must be of type logical" = is.logical(lookup_speaker))
list(speaker = repair_speaker(parse_output$speaker),
speeches = repair_speeches(parse_output$speeches),
talks = repair_talks(parse_output$talks),
comments = if(repair_comments) repair_comments(parse_output$comments,
parse_output$speaker)
else parse_output$comments,
applause = parse_output$applause
)
comments = repair_comments(parse_output$comments,
parse_output$speaker,
lookup_speaker),
applause = parse_output$applause)
}

+ 2
- 2
vignettes/interaction.Rmd Просмотреть файл

@@ -52,7 +52,7 @@ res$applause %>%
group_by(on_fraction) %>%
arrange(on_fraction) %>%
summarize("AfD" = sum(`AfD`),
"BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
"BÜNDNIS 90/DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
"CDU/CSU" = sum(`CDU_CSU`),
"DIE LINKE" = sum(`DIE_LINKE`),
"FDP" = sum(`FDP`),
@@ -84,7 +84,7 @@ res$comments %>%
select(by_fraction = fraction.x, on_fraction = fraction.y) %>%
group_by(on_fraction) %>%
summarize(`AfD` = sum(str_detect(by_fraction, "AfD"), na.rm=T),
`BÜNDNIS 90 / DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T),
`BÜNDNIS 90/DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T),
`CDU/CSU` = sum(str_detect(by_fraction, "CDU/CSU"), na.rm = T),
`DIE LINKE` = sum(str_detect(by_fraction, "DIE LINKE"), na.rm=T),
`FDP` = sum(str_detect(by_fraction, "FDP"), na.rm=T),


Загрузка…
Отмена
Сохранить