Przeglądaj źródła

fix errors from refactoring and add documentation to all remaining functions in analyze.R

genderequality-alternative
JosuaKugler 4 lat temu
rodzic
commit
5e9f70627d
10 zmienionych plików z 237 dodań i 16 usunięć
  1. +57
    -4
      R/analyze.R
  2. +3
    -3
      R/parse.R
  3. +10
    -8
      R/repair.R
  4. +54
    -0
      man/bar_plot_fractions.Rd
  5. +18
    -0
      man/find_word.Rd
  6. +21
    -0
      man/join_speaker.Rd
  7. +24
    -0
      man/lookup_speaker.Rd
  8. +16
    -0
      man/party_colors.Rd
  9. +11
    -1
      man/repair.Rd
  10. +23
    -0
      man/word_usage_by_date.Rd

+ 57
- 4
R/analyze.R Wyświetl plik

@@ -1,10 +1,31 @@
#' Count number of occurences of a given word
#'
#' @param res tibble
#' @param word character
#'
#' Add number of occurences of word to talks
#'
#' @export
find_word <- function(res, word) {
talks <- res$talks
mutate(talks, occurences = sapply(str_match_all(talks$content, regex(word, ignore_case = TRUE)),
nrow))
mutate(
talks,
occurences = sapply(
str_match_all(talks$content, regex(word, ignore_case = TRUE)),
nrow
)
)
}

#' add information from speaker table to a tibble containing speaker id
#'
#' @param tb tibble
#' @param res tibble
#' @param fraction_only bool
#'
#' left join speaker information from res$speaker into tb.
#' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble
#'
#' @export
join_speaker <- function(tb, res, fraction_only = F) {
joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
@@ -12,6 +33,8 @@ join_speaker <- function(tb, res, fraction_only = F) {
else joined
}

#' lookup table for party colors
#'
#' @export
party_colors <- c(
AfD="#1A9FDD",
@@ -28,6 +51,28 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
"DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
"FDP", "AfD", NA_character_))

#' plot data depending on fractions in a standardized, configurable way
#'
#' @param tb tibble
#' @param x_variable column in tb
#' @param y_variable column in tb
#' @param fill column in tb
#' @param title char
#' @param xlab char
#' @param ylab char
#' @param filllab char
#' @param flipped bool
#' @param position char
#' @param reorder bool
#'
#' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
#' Then color the plot depending on the fill value
#' Give the plot a title, an x-label xlab as well as an y-label ylab
#' Color the legend according to filllab
#' Setting flipped to TRUE makes the bars horizontal
#' Improve positioning details according to position
#' and finally reorder x_variable (default ist to order fractions according to seat order)
#'
#' @export
bar_plot_fractions <- function(tb,
x_variable = NULL, # default is fraction
@@ -71,8 +116,16 @@ bar_plot_fractions <- function(tb,
if (flipped) plt + coord_flip() else plt
}

# Counts how many talks do match a given pattern and summarises by date
#
#' Counts how many talks do match a given pattern and summarises by date
#'
#' @param res tibble
#' @param patterns char list
#' @param name char ? what is name needed for??
#' @param tidy bool, default F
#'
#' shorter summary if tidy=F
#' if tidy is set to T, the resulting tibble is tidy
#'
#' @export
word_usage_by_date <- function(res, patterns, name, tidy=F) {
tb <- res$talks


+ 3
- 3
R/parse.R Wyświetl plik

@@ -90,7 +90,7 @@ parse_speaker <- function(speaker_xml) {
nm <- xml_child(speaker_xml)
vorname <- xml_get(nm, "vorname")
nachname <- xml_get(nm, "nachname")
fraction <- xml_get(nm, "fraction")
fraction <- xml_get(nm, "fraktion")
titel <- xml_get(nm, "titel")
rolle <- xml_find_all(nm, "rolle")
if (length(rolle) > 0) {
@@ -115,7 +115,7 @@ parse_speech <- function(speech_xml, date) {
for (node in cs) {
if (xml_name(node) == "p" || xml_name(node) == "name") {
klasse <- xml_attr(node, "klasse")
if ((!is.na(klasse) && klasse == "speaker") || xml_name(node) == "name") {
if ((!is.na(klasse) && klasse == "redner") || xml_name(node) == "name") {
if (!is.na(cur_speaker)) {
speech <- c(speech_id = speech_id,
speaker = cur_speaker,
@@ -194,7 +194,7 @@ parse_speechlist <- function(speechlist_xml, date) {
}

# create a tibble of speaker from a list of xml nodes representing speaker
parse_speakerliste <- function(speakerliste_xml) {
parse_speakerlist <- function(speakerliste_xml) {
d <- sapply(speakerliste_xml, parse_speaker)
tibble(id = d["id",],
vorname = d["vorname",],


+ 10
- 8
R/repair.R Wyświetl plik

@@ -59,7 +59,7 @@ repair_talks <- function(talks) {
#' unique (luckily :D)
#'
#' @param tb tibble
#' @param redner tibble
#' @param speaker tibble
#' @param name_variable name
#'
#' Tries to match the name_variable column with speaker names
@@ -83,29 +83,31 @@ lookup_speaker <- function(tb, speaker, name_variable) {
mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
}

repair_comments <- function(comments, redner) {
repair_comments <- function(comments, speaker) {
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
"Use repair(, repair_commments = FALSE) to skip this.\n"))
# try to find a redner id for each actual comment
# try to find a speaker id for each actual comment
comments %>%
filter(!is.na(kommentator)) %>%
lookup_redner(redner, kommentator) %>%
lookup_speaker(speaker, kommentator) %>%
left_join(comments, ., by="kommentator") %>%
select(-kommentator)
}

#' Repair parsed tables
#'
#' TODO: Explain repair_comments argument
#' (if TRUE, we try to lookup redner names in redner table)
#' @param parse_output tibble
#' @param repair_comments bool
#'
#' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
#'
#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
#' Since repaired tables should be a fixpoint of repair.
#' @export
repair <- function(parse_output, repair_comments = FALSE) {

list(redner = repair_speaker(parse_output$speaker),
reden = repair_speeches(parse_output$speeches),
list(speaker = repair_speaker(parse_output$speaker),
speeches = repair_speeches(parse_output$speeches),
talks = repair_talks(parse_output$talks),
comments = if(repair_comments) repair_comments(parse_output$comments,
parse_output$speaker)


+ 54
- 0
man/bar_plot_fractions.Rd Wyświetl plik

@@ -0,0 +1,54 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyze.R
\name{bar_plot_fractions}
\alias{bar_plot_fractions}
\title{plot data depending on fractions in a standardized, configurable way}
\usage{
bar_plot_fractions(
tb,
x_variable = NULL,
y_variable = NULL,
fill = NULL,
title = NULL,
xlab = "Fraction",
ylab = "n",
filllab = "Fraction",
flipped = TRUE,
position = "dodge",
reorder = FALSE
)
}
\arguments{
\item{tb}{tibble}

\item{x_variable}{column in tb}

\item{y_variable}{column in tb}

\item{fill}{column in tb}

\item{title}{char}

\item{xlab}{char}

\item{ylab}{char}

\item{filllab}{char}

\item{flipped}{bool}

\item{position}{char}

\item{reorder}{bool

plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
Then color the plot depending on the fill value
Give the plot a title, an x-label xlab as well as an y-label ylab
Color the legend according to filllab
Setting flipped to TRUE makes the bars horizontal
Improve positioning details according to position
and finally reorder x_variable (default ist to order fractions according to seat order)}
}
\description{
plot data depending on fractions in a standardized, configurable way
}

+ 18
- 0
man/find_word.Rd Wyświetl plik

@@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyze.R
\name{find_word}
\alias{find_word}
\title{Count number of occurences of a given word}
\usage{
find_word(res, word)
}
\arguments{
\item{res}{tibble}

\item{word}{character

Add number of occurences of word to talks}
}
\description{
Count number of occurences of a given word
}

+ 21
- 0
man/join_speaker.Rd Wyświetl plik

@@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyze.R
\name{join_speaker}
\alias{join_speaker}
\title{add information from speaker table to a tibble containing speaker id}
\usage{
join_speaker(tb, res, fraction_only = F)
}
\arguments{
\item{tb}{tibble}

\item{res}{tibble}

\item{fraction_only}{bool

left join speaker information from res$speaker into tb.
if fraction_only is TRUE, only fraction is selected from the resulting joined tibble}
}
\description{
add information from speaker table to a tibble containing speaker id
}

+ 24
- 0
man/lookup_speaker.Rd Wyświetl plik

@@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/repair.R
\name{lookup_speaker}
\alias{lookup_speaker}
\title{Lookup name in speakers table}
\usage{
lookup_speaker(tb, speaker, name_variable)
}
\arguments{
\item{tb}{tibble}

\item{speaker}{tibble}

\item{name_variable}{name

Tries to match the name_variable column with speaker names

returns a lookup table}
}
\description{
Tries to find the correct speaker id given a name.
This is sufficient since every prename lastname combination in the bundestag is
unique (luckily :D)
}

+ 16
- 0
man/party_colors.Rd Wyświetl plik

@@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyze.R
\docType{data}
\name{party_colors}
\alias{party_colors}
\title{lookup table for party colors}
\format{
An object of class \code{character} of length 8.
}
\usage{
party_colors
}
\description{
lookup table for party colors
}
\keyword{datasets}

+ 11
- 1
man/repair.Rd Wyświetl plik

@@ -4,7 +4,17 @@
\alias{repair}
\title{Repair parsed tables}
\usage{
repair(parse_output)
repair(parse_output, repair_comments = FALSE)
}
\arguments{
\item{parse_output}{tibble}

\item{repair_comments}{bool

If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.

Possible test: check identical(repair(res), repair(repair(res))) == TRUE
Since repaired tables should be a fixpoint of repair.}
}
\description{
Repair parsed tables


+ 23
- 0
man/word_usage_by_date.Rd Wyświetl plik

@@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/analyze.R
\name{word_usage_by_date}
\alias{word_usage_by_date}
\title{Counts how many talks do match a given pattern and summarises by date}
\usage{
word_usage_by_date(res, patterns, name, tidy = F)
}
\arguments{
\item{res}{tibble}

\item{patterns}{char list}

\item{name}{char ? what is name needed for??}

\item{tidy}{bool, default F

shorter summary if tidy=F
if tidy is set to T, the resulting tibble is tidy}
}
\description{
Counts how many talks do match a given pattern and summarises by date
}

Ładowanie…
Anuluj
Zapisz