An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

121 rinda
3.8KB

  1. source("config.R")
  2. source("../utils/helpers.R")
  3. library("xml2")
  4. library(tibble)
  5. library(magrittr)
  6. # for usage see the example at the end
  7. # this reads all currently parseable data from one xml
  8. read_one <- function(name) {
  9. x <- read_xml(paste0(DOWNLOAD_DIR, name))
  10. cs <- xml_children(x)
  11. verlauf <- xml_find_first(x, "sitzungsverlauf")
  12. rednerl <- xml_find_first(x, "rednerliste")
  13. xml_children(rednerl) %>%
  14. parse_rednerliste() ->
  15. redner
  16. xml_children(verlauf) %>%
  17. xml_find_all("rede") %>%
  18. parse_redenliste() ->
  19. res
  20. list(redner = redner, reden = res$reden, talks = res$talks)
  21. }
  22. xml_get <- function(node, name) {
  23. res <- xml_text %$% xml_find_all(node, name)
  24. if (length(res) == 0) NA_character_
  25. else res
  26. }
  27. # parse one redner
  28. parse_redner <- function(redner_xml) {
  29. redner_id <- xml_attr(redner_xml, "id")
  30. nm <- xml_child(redner_xml)
  31. vorname <- xml_get(nm, "vorname")
  32. nachname <- xml_get(nm, "nachname")
  33. fraktion <- xml_get(nm, "fraktion")
  34. titel <- xml_get(nm, "titel")
  35. rolle <- xml_find_all(nm, "rolle")
  36. if (length(rolle) > 0) {
  37. rolle_lang <- xml_get(rolle, "rolle_lang")
  38. rolle_kurz <- xml_get(rolle, "rolle_kurz")
  39. } else rolle_kurz <- rolle_lang <- NA_character_
  40. c(id = redner_id, vorname = vorname, nachname = nachname, fraktion = fraktion, titel = titel,
  41. rolle_kurz = rolle_kurz, rolle_lang = rolle_lang)
  42. }
  43. # parse one rede
  44. # returns: - a rede (with rede id and redner id)
  45. # - all talks appearing in the rede (with corresponding content)
  46. parse_rede <- function(rede_xml) {
  47. rede_id <- xml_attr(rede_xml, "id")
  48. cs <- xml_children(rede_xml)
  49. cur_redner <- NA_character_
  50. principal_redner <- NA_character_
  51. cur_content <- ""
  52. reden <- list()
  53. for (node in cs) {
  54. if (xml_name(node) == "p") {
  55. if (xml_attr(node, "klasse") == "redner") {
  56. if (!is.na(cur_redner)) {
  57. rede <- c(rede_id = rede_id,
  58. redner = cur_redner,
  59. content = cur_content)
  60. reden <- c(reden, list(rede))
  61. cur_content <- ""
  62. } else {
  63. principal_redner <- xml_child(node) %>% xml_attr("id")
  64. }
  65. cur_redner <- xml_child(node) %>% xml_attr("id")
  66. } else {
  67. cur_content <- paste0(cur_content, xml_text(node), sep="\n")
  68. }
  69. }
  70. }
  71. rede <- c(rede_id = rede_id,
  72. redner = cur_redner,
  73. content = cur_content)
  74. reden <- c(reden, list(rede))
  75. list(rede = c(id = rede_id, redner = principal_redner),
  76. parts = reden)
  77. }
  78. # creates a tibble of reden and a tibble of talks from a list of xml nodes representing reden
  79. parse_redenliste <- function(redenliste_xml) {
  80. d <- sapply(redenliste_xml, parse_rede)
  81. reden <- simplify2array(d["rede", ])
  82. parts <- simplify2array %$% unlist(d["parts", ], recursive=FALSE)
  83. list(reden = tibble(id = reden["id",], redner = reden["redner",]),
  84. talks = tibble(rede_id = parts["rede_id", ],
  85. redner = parts["redner", ],
  86. content = parts["content", ]))
  87. }
  88. # create a tibble of redner from a list of xml nodes representing redner
  89. parse_rednerliste <- function(rednerliste_xml) {
  90. d <- sapply(rednerliste_xml, parse_redner)
  91. tibble(id = d["id",],
  92. vorname = d["vorname",],
  93. nachname = d["nachname",],
  94. fraktion = d["fraktion",],
  95. titel = d["titel",],
  96. rolle_kurz = d["rolle_kurz",],
  97. rolle_lang = d["rolle_lang",])
  98. }
  99. # -------------------------------
  100. # EXAMPLE USE
  101. # make sure data ist downloaded via fetch.R
  102. res <- read_one("19038-data.xml")
  103. res$redner
  104. res$reden
  105. res$talks
  106. # -------------------------------