An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

121 lignes
3.8KB

  1. source("config.R")
  2. source("../utils/helpers.R")
  3. library("xml2")
  4. library(tibble)
  5. library(magrittr)
  6. # for usage see the example at the end
  7. # this reads all currently parseable data from one xml
  8. read_one <- function(name) {
  9. x <- read_xml(paste0(DOWNLOAD_DIR, name))
  10. cs <- xml_children(x)
  11. verlauf <- xml_find_first(x, "sitzungsverlauf")
  12. rednerl <- xml_find_first(x, "rednerliste")
  13. xml_children(rednerl) %>%
  14. parse_rednerliste() ->
  15. redner
  16. xml_children(verlauf) %>%
  17. xml_find_all("rede") %>%
  18. parse_redenliste() ->
  19. res
  20. list(redner = redner, reden = res$reden, talks = res$talks)
  21. }
  22. xml_get <- function(node, name) {
  23. res <- xml_text %$% xml_find_all(node, name)
  24. if (length(res) == 0) NA_character_
  25. else res
  26. }
  27. # parse one redner
  28. parse_redner <- function(redner_xml) {
  29. redner_id <- xml_attr(redner_xml, "id")
  30. nm <- xml_child(redner_xml)
  31. vorname <- xml_get(nm, "vorname")
  32. nachname <- xml_get(nm, "nachname")
  33. fraktion <- xml_get(nm, "fraktion")
  34. titel <- xml_get(nm, "titel")
  35. rolle <- xml_find_all(nm, "rolle")
  36. if (length(rolle) > 0) {
  37. rolle_lang <- xml_get(rolle, "rolle_lang")
  38. rolle_kurz <- xml_get(rolle, "rolle_kurz")
  39. } else rolle_kurz <- rolle_lang <- NA_character_
  40. c(id = redner_id, vorname = vorname, nachname = nachname, fraktion = fraktion, titel = titel,
  41. rolle_kurz = rolle_kurz, rolle_lang = rolle_lang)
  42. }
  43. # parse one rede
  44. # returns: - a rede (with rede id and redner id)
  45. # - all talks appearing in the rede (with corresponding content)
  46. parse_rede <- function(rede_xml) {
  47. rede_id <- xml_attr(rede_xml, "id")
  48. cs <- xml_children(rede_xml)
  49. cur_redner <- NA_character_
  50. principal_redner <- NA_character_
  51. cur_content <- ""
  52. reden <- list()
  53. for (node in cs) {
  54. if (xml_name(node) == "p") {
  55. if (xml_attr(node, "klasse") == "redner") {
  56. if (!is.na(cur_redner)) {
  57. rede <- c(rede_id = rede_id,
  58. redner = cur_redner,
  59. content = cur_content)
  60. reden <- c(reden, list(rede))
  61. cur_content <- ""
  62. } else {
  63. principal_redner <- xml_child(node) %>% xml_attr("id")
  64. }
  65. cur_redner <- xml_child(node) %>% xml_attr("id")
  66. } else {
  67. cur_content <- paste0(cur_content, xml_text(node), sep="\n")
  68. }
  69. }
  70. }
  71. rede <- c(rede_id = rede_id,
  72. redner = cur_redner,
  73. content = cur_content)
  74. reden <- c(reden, list(rede))
  75. list(rede = c(id = rede_id, redner = principal_redner),
  76. parts = reden)
  77. }
  78. # creates a tibble of reden and a tibble of talks from a list of xml nodes representing reden
  79. parse_redenliste <- function(redenliste_xml) {
  80. d <- sapply(redenliste_xml, parse_rede)
  81. reden <- simplify2array(d["rede", ])
  82. parts <- simplify2array %$% unlist(d["parts", ], recursive=FALSE)
  83. list(reden = tibble(id = reden["id",], redner = reden["redner",]),
  84. talks = tibble(rede_id = parts["rede_id", ],
  85. redner = parts["redner", ],
  86. content = parts["content", ]))
  87. }
  88. # create a tibble of redner from a list of xml nodes representing redner
  89. parse_rednerliste <- function(rednerliste_xml) {
  90. d <- sapply(rednerliste_xml, parse_redner)
  91. tibble(id = d["id",],
  92. vorname = d["vorname",],
  93. nachname = d["nachname",],
  94. fraktion = d["fraktion",],
  95. titel = d["titel",],
  96. rolle_kurz = d["rolle_kurz",],
  97. rolle_lang = d["rolle_lang",])
  98. }
  99. # -------------------------------
  100. # EXAMPLE USE
  101. # make sure data ist downloaded via fetch.R
  102. res <- read_one("19038-data.xml")
  103. res$redner
  104. res$reden
  105. res$talks
  106. # -------------------------------