Für Vorlesungen, bitte die Webseite verwenden. https://flavigny.de/lecture
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1336 lines
27KB

  1. library(tidyverse)
  2. library(rlang)
  3. library(sloop) # for OOP
  4. library(nycflights13) # for examples
  5. flights2 <- flights %>% select(year:day, hour, tailnum, carrier, origin, dest)
  6. airports2 <- airports %>% select(faa, name)
  7. stocks <- tibble(year = sample(2011:2015, rep=T, 8),
  8. qtr = sample(1:4, rep=T, 8),
  9. return = c(1.88, 0.59, 0.35, NA, 0.92, NA, 2.66, 3.55)) %>% unique
  10. # definitionen
  11. students <- tibble(
  12. name = c("Alice", "Bob", "Carl", "Dave", "Eve"),
  13. grade = c(1, 2, 2, 3, 1)
  14. )
  15. info <- tibble(
  16. grade = 3:1,
  17. desc = c("Poor", "Good", "Excellent"),
  18. fail = c(T, F, F)
  19. )
  20. # Sequenzen
  21. 1:42
  22. 0.5:-3.9
  23. seq(0, 3, by=0.75)
  24. seq(0.5, -3.9, len=5)
  25. rep(c(0,2), times=5)
  26. rep(c(0,2), each=5)
  27. cumsum(1:10)
  28. # Indexing
  29. x <- 21:42
  30. x[c(3, 7, 6, 7.9)] # position
  31. x[-c(1, 3, 5)] # ausschluss von angegebenen positionen
  32. x[FALSE] # integer(0)
  33. seq_along(x[FALSE]) # zuverlässiger
  34. 1:length(x[FALSE])
  35. x[] # nichts: alles wird zurückgegeben
  36. x[x %% 2 == 0] # wahrheitswerte
  37. x[(seq_along(x)-1) %% 5 == 0]
  38. x[c(T, F)] # jedes zweite, da T, F recycled wird
  39. y <- structure(x, names=letters[seq_along(x)])
  40. y[c("a", "j", "e", "e", "z")] # namen
  41. y[setdiff(names(y), c("a", "e", "z"))] # analogon zu negativen zahlen
  42. m <- outer(1:5, 1:4, paste, sep=",")
  43. m[2:7] # indizierung auf zugrundeliegendem vektor
  44. m[c(4,3), c(1,4,3)] # indizierung nach dimensionen
  45. m[1,, drop=FALSE] # dimensionsvereinfachung verhindern
  46. select <- rbind(
  47. c(1, 1),
  48. c(3, 1),
  49. c(2, 4)
  50. )
  51. m[select] # matrix: jede zeile gibt eine koordinatenkombination an
  52. m <- matrix(1:9, nrow=3)
  53. which(m %% 2 == 0, arr.ind=F) # gibt koordinaten der TRUE values
  54. arrayInd(which(m %% 2 == 0), dim(m)) # wandelt vektor koordinaten in koordinatenmatrix um
  55. ## tibbles
  56. tb <- tibble(x = 1:3, y = 3:1, z = letters[1:3])
  57. tb[tb$x == 2, ] # wie bei matrizen
  58. tb[c("x", "z")] # wie bei listen, gibt spalten x und z
  59. tb[tb$x == 2, "z", drop=T] # drop um tibble struktur zu vergessen
  60. ## Einzelne Elemente
  61. x <- list(bla=1:3, blub=list("fritz", "hans"), 4:6)
  62. x[1] # liste mit einem element
  63. x[[1]] # element der liste
  64. x[[c(2, 1)]] # verschachtelte indizierung
  65. x[["blub"]][[1]] # das selbe
  66. x <- 1:10
  67. x[[3]] # geht auch fuer atomare vektoren
  68. m[[2, 3]] # bei arrays mit entsprechenden dimensionen
  69. key <- "bla"
  70. x$bla
  71. x[[key]]
  72. options(warnPartialMatchDollar=T)
  73. x$blu # partial matching von "blub"
  74. ## Fehlende Indizes
  75. x <- 1:3
  76. x[4] # NA
  77. x[[4]] # Error
  78. lst <- as.list(x)
  79. lst[4] # NA
  80. lst[[4]] # Error
  81. names(x) <- letters[1:3]
  82. names(lst) <- letters[1:3]
  83. x[["d"]] # Error, ABER:
  84. lst[["d"]] # NULL
  85. lst$d # NULL
  86. ## assignment
  87. x <- 1:5
  88. x[c(1, 2, NA)] <- c(1,2) # Error
  89. x[c(1, 2, NA)] <- 1 # funktioniert
  90. lst <- list(abc=1)
  91. lst$ab # partial matching
  92. lst$ab <- 2 # kein partial matching, neuer eintrag
  93. x <- 1:9
  94. x[1:6] <- 11:13 # recycling
  95. x <- 7
  96. x[1e5] <- 5 # funktioniert, out of bound assignment vergroessert vektor
  97. tb <- tibble(x=2^(1:4), y=c("1.2", "2.3", "3.4", "4.2"))
  98. tb[] <- lapply(tb, as.integer) # erhaelt tibble struktur, weil subsetting assignment
  99. lst["abc"] <- NULL # element aus liste entfernen
  100. lst["ab"] <- list(NULL) # NULL liste einfügen
  101. ## helpers for indexing
  102. ### match and merge
  103. id <- match(students$grade, info$grade) # find index of entries in students$grade in info$grade
  104. cbind(students, info[id, -1])
  105. merge(students, info, by.x="grade", by.y="grade") # kurzform
  106. ### sorting
  107. x <- runif(100, min=-40, max=52)
  108. order(x) # x[order(x)] = sort(x)
  109. sort(x) # sortiert
  110. rank(x) # ranked alle elemente aufsteigend
  111. students[order(students$grade), ] # sortiere nach noten
  112. # Missing values
  113. ## NULL
  114. c() # NULL
  115. c(typeof(NULL), length(NULL)) # NULL, 0
  116. ## BASE_TYPE(0)
  117. c(typeof(integer(0)), length(integer(0)))
  118. ## NA...
  119. c(typeof(NA_integer_), length(NA)) # laenge 1 und entsprechenden typ
  120. ## indexing
  121. x <- 1:3
  122. x[0]
  123. x[NULL]
  124. x[]
  125. x[NA]
  126. x[NA_integer_]
  127. ## pruefen
  128. x <- list(1, NA, NaN, NA_real_, NA_integer_, NA_character_)
  129. is.na(x)
  130. is.null(x)
  131. is.nan(NaN)
  132. ## vergleich
  133. 1:3 == NA
  134. NA == NA
  135. identical(NA, NA)
  136. NULL == NULL
  137. identical(NULL, NULL)
  138. # Plots
  139. x <- seq(-3, 3, len=100)
  140. plot(x, sin(x),
  141. type="l",
  142. main="Titel",
  143. xlab="x-Achse",
  144. ylab="y-Achse",
  145. xlim=c(-2, 2),
  146. ylim=c(-1, 1),
  147. lwd=3,
  148. lty=2,
  149. col="cyan")
  150. points(x, sin(x), pch=3, col="red")
  151. hist(rnorm(1000))
  152. barplot(1:5)
  153. x <- seq(0, 1, len=100)
  154. y <- seq(0, pi, len=100)
  155. f <- function(x,y) x^2 - sin(y)
  156. outer(x,y,f)
  157. contour(x,y,outer(x,y,f))
  158. # Randomness
  159. ## random deviates
  160. set.seed(42)
  161. rnorm(8, mean=12, sd=3)
  162. rbinom(42, size=13, prob=0.45)
  163. runif(10, min=-13, max=26)
  164. # Printing
  165. print(1:10)
  166. cat("3 + 5 ist", 3+5, "\n", sep=" ")
  167. paste("3 + 5 ist", 3+5, "\n", sep=" ")
  168. as.character(pi)
  169. sprintf("Eine Zufallszahl aus der %s ist %.2f.", "Normalverteilung", rnorm(1))
  170. # Listen
  171. lst <- list(1:3,
  172. "a",
  173. c(T, F, T),
  174. runif(10),
  175. list(1, "b"),
  176. sum)
  177. identical(lst[[c(5,2)]], "b")
  178. str(lst)
  179. c(typeof(lst), class(lst), length(lst))
  180. is.vector(lst)
  181. is.list(lst)
  182. unlist(list(1:10))
  183. as.list(1:10)
  184. lst[2]
  185. lst[[2]]
  186. lst[[2]] <- NULL
  187. lst[1] <- list(NULL)
  188. str(c(list(), letters[1:5], 42))
  189. # Matrizen
  190. n <- 1e3
  191. m1 <- matrix(rnorm(n^2), nrow=n)
  192. m2 <- matrix(rnorm(n^2), nrow=n)
  193. m <- matrix(1:16, nrow=4, byrow=F)
  194. m[2,2] <- 0
  195. m[1,1] <- 0
  196. dim(m)
  197. length(m)
  198. nrow(m)
  199. ncol(m)
  200. rowSums(m)
  201. colSums(m)
  202. det(m)
  203. r <- sample(4)
  204. y <- solve(m, r)
  205. m %*% y
  206. solve(m) # inverse
  207. qr(m) # qr zerlegung
  208. chol(m) # cholesky zerlegung
  209. svd(m) # singulaerwertzerlegung
  210. eigen(m) # eigenwerte und vektoren
  211. m1 + m2
  212. m1 * m2 # elementweise mult
  213. m1 %*% m2 # matrix mult
  214. t(m1) # transpose
  215. aperm(m1) # auf arrays verallgemeinert
  216. cbind(m, 1:4)
  217. rbind(m, 1:4)
  218. ## allgemeiner
  219. x <- 1:6
  220. attr(x, "dim") <- c(2,3)
  221. dim(x) <- c(2,3,1)
  222. array(1:16, dim=c(2,2,2))
  223. matrix(list("a", 1, T, list()),
  224. nrow=2,
  225. dimnames=list(Zeilen=NULL, Spalten=c("hi", "fritz")))
  226. rownames(x) <- letters[1:2]
  227. colnames(x) <- LETTERS[1:3]
  228. NROW(1:12) == nrow(matrix(1:12)) # aber nrow(1:12) ist NULL
  229. # Tibbles
  230. tb <- tibble(x=1:3, y=letters[1:3])
  231. as_tibble %$% list(x=1:3, y=letters[1:3])
  232. as_tibble %$% matrix(1:4, ncol=2, dimnames=list(NULL, c("A", "B")))
  233. add_column(tb, z=-1:1, w = 0)
  234. add_row(tb, x=4:5, y=letters[4:5], .before=2)
  235. head(tb, 2)
  236. tail(tb, 1)
  237. dplyr::bind_rows(tb, tibble(x=42:45, y="E"))
  238. dplyr::bind_cols(tb, tibble(e=42:44, d="E"))
  239. options(tibble.print_min=6, # makes output more compact
  240. tibble.print_max=6,
  241. tibble.max_etra_cols=0)
  242. readr::read_csv("bla.csv")
  243. ## dplyr verbes
  244. filter(flights, (arr_delay - dep_delay) < -60, dep_delay > 30) # zeilen filtern
  245. arrange(flights, desc(dep_delay)) # zeilen sortieren
  246. nms <- c("month", "year")
  247. select(flights, 5, 5, carrier, "origin", arr_time:flight, -flight,
  248. all_of(nms), -any_of(nms)) # position select
  249. select(flights, # condition select
  250. where(is.character) | contains("delay"),
  251. fritz = ends_with("delay"),
  252. -starts_with("dep_"),
  253. tag = contains("day"), # no regex
  254. matches("^y"), # regex
  255. everything()) # alle restlichen spalten (umsortierung)
  256. rename(flights, monat = month, tag = day)
  257. mutate(airlines, name_length = nchar(name), carrier=NULL)
  258. transmute(flights, carrier, gain = dep_delay - arr_delay)
  259. summarize(flights, total_delay = sum(abs(dep_delay), na.rm = T),
  260. delay_range = range(dep_delay, na.rm = T),
  261. count_origin = n_distinct(origin))
  262. count(flights, year, month, day, origin, sort=T) # group by variables and summarize n()
  263. group_by(flights, month) %>% summarize(delay_mean = mean(dep_delay, na.rm=T))
  264. group_by(flights, month, carrier, dep_time) %>% select(group_cols())
  265. group_by(flights, carrier) %>% filter(sum(distance, na.rm = T) > 5e7)
  266. group_by(flights, carrier) %>% mutate(prop = distance / sum(distance, na.rm=T))
  267. summarize(flights,
  268. across(where(is.numeric) & !(year | month | day | hour),
  269. list(min = min,max = max,mean=mean),
  270. na.rm = T,
  271. .names = "{fn}.{col}"))
  272. mutate(flights, across(dep_time:arr_delay, partial(`*`, 2)))
  273. filter(flights, across(where(is.numeric) & !flight & !year, partial(`>`, 700) %.% abs))
  274. rowwise(flights) %>% mutate(m = mean(c_across(dep_time:arr_time)), .keep="used") # vermeiden
  275. ## non standard evaluation austricksen
  276. tobefiltered <- "dep_delay"
  277. filter(flights, .data[[tobefiltered]] > +120)
  278. ## join und co.
  279. left_join(flights2, airports2, by=c("origin" = "faa")) %>%
  280. left_join(airports2, by=c("dest" = "faa"), suffix=c("_origin", "_dest"))
  281. right_join(flights2, airports2, by=c("dest" = "faa")) # priorisiert rechts
  282. inner_join(flights2, airports2, by=c("dest" = "faa")) # keins na
  283. full_join(flights2, airports2, by=c("dest" = "faa")) # egal
  284. top_dest <- count(flights, dest, sort=T) %>% head(10)
  285. semi_join(flights, top_dest) # only keep rows that join with row in top_dest
  286. anti_join(flights, top_dest) # only keep rows that DO NOT join with row in top dest
  287. ## tidy tables
  288. pivot_longer(table4a, cols=where(is.numeric), names_to="year", values_to="cases")
  289. pivot_wider(table2, names_from=type, values_from=count)
  290. separate(table3, rate, into = c("cases", "population"), sep = "/", convert = T)
  291. extract(table3, rate, into = c("cases", "population"), regex = "^(.*?)/(.*?)$", convert = T)
  292. unite(table5, "year", century, year, sep="") %>% mutate(year = as.integer(year))
  293. ## fix missing values
  294. complete(stocks, year, qtr) # make implicit missing values explicit
  295. drop_na(stocks)
  296. replace_na(stocks, list(return = 42))
  297. mutate(stocks, return = na_if(return, "my custom na"))
  298. # Apply und co.
  299. f <- function(x) sprintf("a random number is %.2f which is %.3f probable", x, pnorm(x))
  300. lapply(rnorm(100), f)
  301. sapply(rnorm(100), f)
  302. simplify2array(lapply(rnorm(100), f))
  303. replicate(5, runif(10), simplify = TRUE)
  304. mapply(`+`, 1:100, 1:100)
  305. m <- matrix(1:12, nrow=3)
  306. dimnames(m) <- list(cols=NULL, rows=NULL)
  307. apply(m, 1, mean)
  308. a <- array(1:24, dim=2:4, dimnames=list(x1=NULL, x2=NULL, x3=NULL))
  309. apply(a, c(1,2), range)
  310. outer(1:3, 1:5, `*`)
  311. funs <- list(mean=mean, median=median, var=var)
  312. X <- replicate(5, runif(sample(20, 1)), simplify=F)
  313. outer(X, funs, function(x_list, f_list) mapply(function(f, x) f(x), f_list, x_list))
  314. ## mathematische funktionale
  315. integrate(sin, 0, pi)
  316. optimize(sin, c(0, 2*pi), maximum=T)
  317. x <- seq(0, 2*pi, len=10)
  318. f_lin <- approxfun(x, sin(x))
  319. f_cub <- splinefun(x, sin(x))
  320. ## funktionsoperatoren
  321. fib <- function(n) {
  322. if (n<2) return(1)
  323. fib(n-2) + fib(n-1)
  324. }
  325. fib <- memoise::memoise(fib)
  326. capture.output(print("hi"))
  327. sample2 <- Vectorize(sample, "size")
  328. # types
  329. sapply(list(NULL,
  330. TRUE, # -- basetypes --
  331. 1L,
  332. 1,
  333. "",
  334. raw(3),
  335. 1+3i, # ^^ basetypes ^^
  336. list(),
  337. expr(a),
  338. expr(1+2),
  339. expression(1+2),
  340. env(),
  341. mean,
  342. sin,
  343. `[`),
  344. typeof) %>% unique()
  345. sapply(list(NULL,
  346. TRUE, # -- basetypes --
  347. 1L,
  348. 1,
  349. "",
  350. raw(3),
  351. 1+3i, # ^^ basetypes ^^
  352. list(),
  353. expr(a),
  354. expr(1+2),
  355. expression(1+2),
  356. env(),
  357. mean,
  358. tibble(), # tidyverse
  359. data.frame(),
  360. matrix(),
  361. array(),
  362. factor(),
  363. Sys.Date(),
  364. Sys.time(),
  365. proc.time()),
  366. class) %>% unique()
  367. ## basetypes
  368. misc <- list(T, 1:3, pi, "hi", 1+3i, raw(4))
  369. sapply(misc, typeof)
  370. is.numeric(c(0.3, 3)) # TRUE
  371. is.integer(0.3) # FALSE
  372. is.atomic(list()) # FALSE
  373. str(list(logical(3), integer(3), double(3), character(3), complex(3), raw(3)))
  374. ### integer
  375. c(42L, NA_integer_, 1:3, .Machine$integer.max)
  376. ### double
  377. c(3, 4.2, pi, 1e4, .Machine$xmax, Inf, NaN, NA_real_)
  378. c(1/0, -1/0, 0/0, 1/Inf)
  379. # numeric synonym zu double in class(), as_numeric(), numeric(n), print(), str()
  380. # ABER:
  381. is.numeric(1L) # TRUE
  382. # Not available type fuer double ist:
  383. NA_real_
  384. ### character
  385. "bla 'einca' asdf"
  386. cat('asdf "as" asdf \\ hi\n')
  387. x <- c("ä", "Hallo", "", NA_character_, "\n")
  388. nchar(x)
  389. cat(r"(\n \ )", "\n")
  390. letters
  391. LETTERS
  392. month.name
  393. month.abb
  394. ## coercion
  395. as.double("2.2")
  396. c(TRUE, 15)
  397. c(TRUE, 1L, 1, "eins")
  398. c(c(TRUE, 1L), 1, "eins")
  399. # attributes
  400. x <- seq(-2, 3, len=42)
  401. attr(x, "asdf") <- c(1,2)
  402. structure(x, "asdf" = c(1,2))
  403. attributes(x)$asdf <- c(1,2) # geht auch wenn x <- NULL
  404. attr(x, "asdf")
  405. attributes(x)$asdf
  406. identical(x, seq(-2, 3, len=42))
  407. ## names
  408. x <- 1:4
  409. attr(x, "names") <- letters[1:4]
  410. names(x) <- letters[1:4]
  411. x <- c("a"=1, "b"=2, "c"=3, "d"=4)
  412. unname(x) # does not modify x
  413. names(x) <- NULL # modifies x
  414. # S3 Objekte / Klassen
  415. x <- runif(100)
  416. attr(x, "class") <- "Toller Hecht"
  417. class(x) <- "Toller Hecht"
  418. unclass(x) # does not modify x
  419. class(x) <- NULL # modifies x
  420. ## implizite klassen
  421. # falls class nicht gesetzt -> implizite klasse: matrix, array, function, numeric oder typeof(x)
  422. ## spezielle S3 Objekte
  423. ### factors
  424. # integer vektoren mit class = factor und attribut levels
  425. x <- sample(13, 100, rep=T)
  426. x_factor <- factor(letters[x*2])
  427. typeof(x_factor) # integer
  428. class(x_factor) # factor
  429. str(attributes(x_factor)) # levels
  430. table(x_factor) # summarize factor
  431. x_factor[c(3,7), drop = TRUE] # wegwerfen der anderen level
  432. ### Date
  433. # double vektoren mit class = Date
  434. Sys.Date()
  435. str(attributes(Sys.Date()))
  436. unclass(Sys.Date()) # anzahl tage seit 1.1.1970
  437. ### POSIXct
  438. # double vektoren mit class = c(POSIXct, POSIXt)
  439. Sys.time()
  440. str(attributes(Sys.time()))
  441. unclass(Sys.time()) # anzahl sekunden seit 1.1.1970
  442. ### proc_time
  443. # double vektor der länge 5 mit class = proc_time
  444. proc.time()
  445. str(attributes(proc.time())) # in sekunden, genauer:
  446. # user: beanspruchung der CPU durch R
  447. # system: beanspruchung der CPU durch betriebssystem im auftrag von R
  448. # elapsed: gesamt
  449. str(unclass(proc.time()))
  450. system.time({
  451. data <- runif(1e6)
  452. stats <- c(mean(data), var(data))
  453. }) # gibt auch proc_time objekt
  454. ### tibble
  455. # data.frame mit class = c(tbl_df, tbl). ein data.frame ist wiederum eine liste seiner spalten
  456. tb <- tibble(x = 1:3, y = letters[3:1])
  457. str(attributes(tb))
  458. typeof(tb)
  459. str(unclass(tb))
  460. tb$x
  461. tb$z <- matrix(1:15, nrow=3) # kann matrizen
  462. tb$e <- tibble(a=sample(100, 3), b=rnorm(3)) # und tibbles
  463. as_tibble(as.matrix(tb[,-2]))
  464. # funktionen
  465. ## pipe
  466. pi %>% `*`(2,.) %>% sin %>% abs %>% `<`(., 1e-14) -> res
  467. ## eigene operatoren
  468. `%.%` <- function(f, g) function(...) f(g(...))
  469. `%$%` <- function(f, x) f(x)
  470. flip <- function(f) function(x, y) f(y, x)
  471. str %.% attributes %$% tibble (a = sample(100, 3), b = rnorm(3))
  472. ## ersetzungsfunktionen
  473. `modify<-` <- function(x, position, value) {
  474. x[position] <- value
  475. x
  476. }
  477. x <- 1:10
  478. modify(x, 3) <- 42L
  479. modify(names(x), 3) <- "hi"
  480. ## do.call
  481. do.call(mean, list(x=sample(100), na.rm=T))
  482. ## funktionstypen
  483. # funktionen haben class = function, aber
  484. c(typeof(mean), typeof(sum), typeof(`[`)) # builtin und special sind primitive, alle anderen closure
  485. f <- function(x) {
  486. # kommentar
  487. 10
  488. }
  489. formals(f)
  490. body(f)
  491. environment(f)
  492. formals(args(sum)) # argument liste primitiver funktionen
  493. ## lazy evaluation
  494. f(stop("hi")) # argument wird nicht ausgewertet
  495. ## default arguments
  496. f <- function(x = y*2, y = 1, z = a+b, e=ls()) { # defaults refer to other arguments
  497. a <- 10
  498. b <- 100
  499. print(e)
  500. cat(sprintf("missing x: %s, missing y: %s\n", missing(x), missing(y)))
  501. c(x, y, z)
  502. }
  503. f(x=3, e=ls()) # ls is evaluated in caller env
  504. f() # ls is evaluated in execution env
  505. ## dot-dot-dot
  506. f <- function(x, ...) { # potentially infinite additional parameters
  507. str(list(first = ..1, third = ..3)) # access additional parameters
  508. mean(unlist(list(...)))
  509. }
  510. f(1,2,3,42)
  511. ## return values
  512. invisible(42) # hide return value
  513. f <- function(x) {
  514. print("Hello")
  515. on.exit(print(1))
  516. on.exit(print(2), add = TRUE, after = FALSE)
  517. if (x) return(10) else stop("Error")
  518. print("Well...")
  519. }
  520. f(TRUE)
  521. f(FALSE)
  522. # strings
  523. ## basics
  524. str_length(c("a", "abs", NA, ""))
  525. str_c("prefix", LETTERS[sample(26, 3, rep=T)], "suffix", # vektorisiert in allen args
  526. sep = "-",
  527. collapse = " * ")
  528. x <- "Fritz Walter ist toll. "
  529. str_sub(x, start=3, end=5)
  530. str_sub(x, end=-5) # only drop last 4 chars
  531. str_sub(x, start=7, end=12) <- "Hasl"
  532. str_trim(x) # remove whitespace in beginning and end
  533. str_squish(x) # remove repeated whitespace inside
  534. str_to_upper(x)
  535. str_to_lower(x)
  536. str_to_title(x)
  537. str_dup(c("a", "bB"), 2:5)
  538. ## formatting
  539. str_glue("3 * pi = {format(3*pi, digits=3)} und das ist {'{toll}'}, weil {x}.") # auch vektorisiert
  540. str_glue_data(tibble(x=1:3, y=letters[1:3]), "Es ist x={x} <-> y={y}")
  541. ## regex
  542. str_detect(x, "[aeiou]") # alles vektorisiert
  543. str_subset(letters, "[aeiou]") # recycling
  544. str_which(letters, "[aeiou]")
  545. str_count(letters, letters[1:13])
  546. str_extract(c(x, "Stopp"), ".t")
  547. str_extract_all(c(x, "Stopp"), ".t")
  548. str_locate(c(x, "Stopp"), ".t")
  549. str_locate_all(c(x, "Stopp"), ".t")
  550. str_split(c("Kaffee,Klettern_Sport.Wald", "Bla.Furz"), "[,_\\.]")
  551. str_split("Fritz.kann.kochen", fixed(".K", ignore_case=T))
  552. str_match("Fritz.kann.kochen", "(.)\\.(.)") # gibt gruppen aus
  553. str_match_all("Fritz.kann.kochen", "(.)\\.(.)")
  554. str_replace("Fritz.kann.kochen", "(.)\\.(.)", "\\2$\\1") # use \1.. in replacement to access groups
  555. str_replace_all("Fritz.kann.kochen", "(.)\\.(.)", "\\2$\\1")
  556. # environments
  557. e1 <- env(a = FALSE, b = "a", c = 2.3, d = 1:3)
  558. c(class(e1), typeof(e1), length(e1))
  559. e2 <- e1 # environments werden per referenz uebergeben
  560. e2$a = T
  561. e1$a
  562. identical(e1, e2)
  563. e1$d <- e1
  564. ## env infos
  565. env_print(e1)
  566. env_names(e1)
  567. env_names(empty_env()) # leere umgebung
  568. env_names(global_env()) # globale umgebung (konsole)
  569. env_label(global_env())
  570. env_label(e1)
  571. env_label(current_env()) # aktuelle umgebung
  572. e2 <- env(e1, z = 42)
  573. env_parent(e1)
  574. env_parent(e2)
  575. env_parent(empty_env()) # error, leere umgebung hat kein parent
  576. env_parents(e1)
  577. env_parents(e2)
  578. ## manipulate environments
  579. env_bind(e1, z=69, asdf="fritz")
  580. env_get(e2, "b", inherit=T) # inherit = T search in parent env if not found here
  581. str(env_get_list(e1, c("a", "b", "d")))
  582. env_has(e2, c("b", "z"), inherit=F)
  583. env_unbind(e2, c("b", "z"))
  584. ## function environments
  585. a <- 42
  586. f <- function(x) x + a
  587. env_label(current_env())
  588. env_label(fn_env(f))
  589. fn_env(f) <- e1
  590. g <- function(x) {
  591. print(env_label(current_env()))
  592. function(y) x+y
  593. }
  594. h <- g(3)
  595. ## lazy evaluation
  596. x <- 1
  597. y <- 2
  598. env_bind_lazy(current_env(), v = c(x,y)) # creates promise v pointing to c(x,y)
  599. x <- 5
  600. v
  601. # OOP
  602. # sloop inspectino
  603. otype(tibble(x=1:3, y=3:1))
  604. s3_class(matrix(1:10, nrow=2))
  605. ftype(print)
  606. ftype(apply)
  607. s3_methods_generic("elapse_time")
  608. inherits(tibble(x=1:12), "tbl_df")
  609. s3_dispatch(print(tibble(x=1:12)))
  610. ## S3
  611. tyrion <- structure(list(name = "Tyrion", age = 42), class = c("Noble", "Person"))
  612. print.Person <- function(p) cat(str_glue("{p$name} ({p$age})"), "\n")
  613. elapse_time <- function(obj, time) { # generic function
  614. UseMethod("elapse_time")
  615. }
  616. elapse_time.Person <- function(p, time) { # implementierte methode
  617. p$age <- p$age + time
  618. p
  619. }
  620. elapse_time.default <- function(o, t) {
  621. stop(str_glue("Cannot elapse time for object of class {class(o)}."))
  622. }
  623. elapse_time.Noble <- function(p, time) {
  624. cat("A noble person does not age\n") # actually wrong
  625. NextMethod() # go to next method in dispatch list
  626. }
  627. ### constructor helpers
  628. new_Person <- function(x, subclass = NULL) {
  629. stopifnot("Person must be a list" = is.list(x))
  630. structure(x, class=c(subclass, "Person"))
  631. }
  632. new_Noble <- function(x, subclass = NULL) {
  633. new_Person(x, subclass=c(subclass, "Noble"))
  634. }
  635. validate_Person <- function(p) {
  636. if(!inherits(p, "Person")) stop("Object must inherit Person")
  637. invisible(p)
  638. }
  639. Person <- function(name, age) {
  640. p <- new_Person(list(name = name, age = age))
  641. validate_Person(p)
  642. p
  643. }
  644. ### coercion
  645. as_Person <- function(x, ...) UseMethod("as_Person")
  646. as_Person.Person <- `(`
  647. as_Person.character <- function(name, age) Person(name, age)
  648. ### internal generics
  649. c.numeric <- function(...) return("c: numeric")
  650. c(42) # does not use c.numeric because class is implicit
  651. c(structure(42, class="numeric"))
  652. ### group generics
  653. units <- c(kg = 1, g = 1e-3, mg = 1e-6)
  654. m <- structure(1:12, class="Mass", unit="kg")
  655. m2 <- structure(350, class="Mass", unit="g")
  656. print.Mass <- function(x) print(str_glue("{x} {attr(x, 'unit')}"))
  657. Summary.Mass <- function(x, ...) {
  658. m <- NextMethod()
  659. structure(m, class="Mass", unit=attr(x, "unit"))
  660. }
  661. ### double dispatch
  662. `+.Person` <- function(x, y) print("+ on person")
  663. `+.Mass` <- function(x, y) {
  664. a <- unclass(x)
  665. b <- unclass(y)
  666. structure(a * units[[attr(x, "unit")]] + b * units[[attr(y, "unit")]], class="Mass",
  667. unit = "kg")
  668. }
  669. m + m2
  670. m + tyrion # error because of incompatible methods
  671. m + 1 # error in +.Mass
  672. ## S4
  673. .Person <- setClass("Person", slots = c(name = "character", age = "numeric"))
  674. .Employee <- setClass("Employee", contains = "Person", slots = c(boss = "Person"))
  675. Person <- function(name, age) {
  676. stopifnot(length(name) == 1 && length(age))
  677. .Person(name = name, age = age)
  678. }
  679. Employee <- function(name, age, boss) {
  680. p <- Person(name = name, age = age)
  681. .Employee(p, boss = boss)
  682. }
  683. tyrion <- Person(name = "Tyrion", age = 42)
  684. peter <- Employee(name = "Peter", age = 13, boss = tyrion)
  685. tyrion@age <- tyrion@age + 1
  686. tyrion@name <- 42
  687. slotNames(tyrion)
  688. is(tyrion)
  689. is(peter, "Person")
  690. typeof(tyrion)
  691. class(tyrion)
  692. ### generics
  693. setGeneric("laugh", function(x) standardGeneric("laugh"))
  694. setMethod("laugh", "Person", function(x) {
  695. cat(str_glue("{x@name} laughs out loudly."), "\n")
  696. })
  697. setMethod("laugh", "Employee", function(x) {
  698. cat(str_glue("{x@name} laughs out shyly."), "\n")
  699. })
  700. formals(getGeneric("show"))
  701. setMethod("show", "Person", function(object) {
  702. cat(str_glue("{object@name} ({object@age})"), "\n")
  703. })
  704. setMethod("show", "Employee", function(object) {
  705. cat(str_glue("{object@name} ({object@age}) an Employee"), "\n")
  706. })
  707. ### getters and setters
  708. setGeneric("name", function(x) standardGeneric("name"))
  709. setMethod("name", "Person", function(x) x@name)
  710. setGeneric("name<-", function(x, value) standardGeneric("name<-"))
  711. setMethod("name<-", "Person", function(x, value) {
  712. x@name <- value
  713. x
  714. })
  715. ### coercion
  716. as(peter, "Person") # default
  717. emp <- as(tyrion, "Employee") # default
  718. emp@boss
  719. setAs("Person", "Employee", function(from) {
  720. stop("Can not coerce a Person to an Employee")
  721. })
  722. ### introspection
  723. s4_methods_generic("laugh")
  724. s4_methods_class("Person")
  725. ### inheriting S3 classes
  726. RangedNumeric <- setClass(
  727. "RangedNumeric",
  728. contains = "numeric",
  729. slots = c(min = "numeric", max = "numeric")
  730. )
  731. rn <- RangedNumeric(1:10, min=1, max=10)
  732. rn@.Data # access underlying S3 or implicite class
  733. # meta programming
  734. ## basics
  735. x <- expr(3+5*y)
  736. y <- 12
  737. eval(x)
  738. str(exprs(name = 1+2, f(x), sin(42)))
  739. typeof(expr(1))
  740. c(typeof(expr(variable)), class(expr(variable))) # symbol name
  741. c(typeof(expr(sin(x))), class(expr(sin(x)))) # language call
  742. lobstr::ast(f(g(x, h(), 1), x))
  743. a <- expr(f(g(x, h(), 1), x))
  744. str(as.list(a))
  745. a[[2]][[3]][[1]]
  746. a[[c(2,3,1)]] <- expr(q)
  747. expr_text(a)
  748. identical(parse_expr("f(g(x, q(), 1), x)"), a)
  749. parse_exprs("x <- 4\nprint(x+5)") # parse multiple expressions as expr list
  750. ## arguments
  751. f <- function(x, ...) {
  752. print(enexpr(x)) # does not use promise
  753. force(x)
  754. print(enexpr(x))
  755. enexprs(...)
  756. }
  757. f(3+4, sin(pi), 0/0)
  758. ## evaluation
  759. x <- y <- 3
  760. eval(expr(x+y), envir = env(`+` = `-`))
  761. ## quosures
  762. f <- function() {
  763. x <- 10
  764. quo(1+x)
  765. }
  766. x <- 100
  767. q <- f()
  768. eval(quo_get_expr(q)) # evaluates in current environment
  769. eval_tidy(q) # evaluates in captured environment
  770. expr_text(quo(1+2)) # prints weird ~ in front
  771. quo_text(quo(1+2))
  772. foo <- function(x, y) {
  773. z <- 1
  774. x <- enexpr(x)
  775. y <- enquo(y)
  776. c(eval(x), eval_tidy(y))
  777. }
  778. z <- 100
  779. foo(z*2, z*2) # 2 200 (x is evaluated in execution env of foo, y is evaluated in global env)
  780. eval_tidy(expr(a <- 42)) # does not actually assign
  781. eval(expr(a <- 42)) # does assign
  782. x <- 1
  783. eval_tidy(quo(x+y), list(y = 100))
  784. ## tidy eval framework
  785. ### bang bang
  786. a <- expr(f(x))
  787. expr(1 + !!a) # Bang-Bang Operator
  788. lobstr::ast(expr(f(x)))
  789. lobstr::ast(!!expr(f(x)))
  790. ### big bang
  791. x <- 1:3
  792. str(exprs(!!x))
  793. str(exprs(!!!x)) # big bang
  794. ### walrus
  795. name <- "asdf"
  796. tibble(!!name := 1:5)
  797. ### pronouns
  798. y <- 4
  799. tb <- tibble(x = 1:3, y=3:1)
  800. eval_tidy(quo(.data$y + .env$y*10), tb) # .data datamask, .env environment
  801. ### geschachtelte quosures
  802. x <- 1
  803. lobstr::ast(7 + !!(x+2))
  804. lobstr::ast(7 + !!quo(x+2)) # unwraps quosure and puts it in main quosure
  805. y <- 1
  806. create_q <- function() {
  807. y <- 10
  808. quo(y)
  809. }
  810. qu <- create_q()
  811. eval_tidy(quo(y + !!qu))
  812. ## applications
  813. var <- quo(y)
  814. tb <- tibble(x=1:5, y = 5:1)
  815. eval_tidy(quo(!!var == 4), tb)
  816. var <- "y"
  817. eval_tidy(quo(.data[[var]] == 4), tb)
  818. mysummarise <- function(tb, mean_var, ...) {
  819. qg <- enquos(...)
  820. qm <- enquo(mean_var)
  821. mean_name <- str_glue("mean_{quo_name(qm)}")
  822. tb %>%
  823. group_by(!!!qg) %>%
  824. summarise(!!mean_name := mean(!!qm))
  825. }
  826. filterx <- function(var) {
  827. filter2(tb, {{var}} == 1)
  828. }
  829. source2 <- function(path) {
  830. l <- readLines(path)
  831. exs <- lapply(l, parse_expr)
  832. model <- env(base_env())
  833. for (ex in exs)
  834. res <- eval_tidy(ex, model)
  835. res
  836. }
  837. source2("test.R")
  838. namedlist <- function(...) {
  839. qs <- enquos(...)
  840. l <- list(...)
  841. names(l) <- lapply(qs, quo_text)
  842. l
  843. }
  844. namedlist(letters, 1:10)
  845. filter2 <- function(tb, ex) {
  846. ex <- enquo(ex)
  847. tb[eval_tidy(ex, tb), ]
  848. }
  849. mkInfix <- function(f) {
  850. f <- enexpr(f)
  851. e <- caller_env()
  852. name <- expr_text(f)
  853. e[[str_glue("%{name}%")]] <- eval(f)
  854. }
  855. `%->%` <- function(x, y) {
  856. y <- enexpr(y)
  857. e <- caller_env()
  858. e[[expr_text(y)]] <- x
  859. invisible(x)
  860. }
  861. # conditions
  862. stop("Error!")
  863. stopifnot("Error!" = 3 == 4)
  864. warning("Warning!")
  865. warnings()
  866. last.warning
  867. message("Message!")
  868. {
  869. try(stop("hi"), silent = FALSE)
  870. print("hallo")
  871. }
  872. suppressWarnings({warning("warn"); message("msg")})
  873. suppressMessages({warning("warn"); message("msg")})
  874. tryCatch(
  875. message = msgHandler,
  876. warning = warnHandler,
  877. error = errorHandler,
  878. expr = {
  879. }
  880. )
  881. withCallingHandlers(
  882. message = msgHandler,
  883. warning = warnHandler,
  884. error = errorHandler,
  885. expr = {
  886. }
  887. )
  888. outerVec <- function(a, b, f) {
  889. res <- array(0, c(length(a), length(b), length(f(a[[1]], b[[1]]))))
  890. for (i in seq_along(a)) {
  891. for (j in seq_along(b)) {
  892. res[i,j,] <- f(a[[i]], b[[j]])
  893. }
  894. }
  895. res
  896. }
  897. outerVec(1:5, 1:5, function(x, y) c(x,y))
  898. cons <- function(h, t) function(x) if(x) h else t
  899. `%:%` <- function(h, t) cons(h, t)
  900. itake <- function(x, k) {
  901. res <- c()
  902. for (i in 1:k) {
  903. res <- c(res, ihead(x))
  904. x <- itail(x)
  905. }
  906. res
  907. }
  908. itail <- function(x) if(is.null(x)) NULL else x(F)
  909. ihead <- function(x) if(is.null(x)) NULL else x(T)
  910. nocons <- function(t) function(x) if(x) ihead(t) else itail(t)
  911. izip <- function(f, x, y) {
  912. cons(f(ihead(x), ihead(y)), izip(f, itail(x), itail(y)))
  913. }
  914. iapply <- function(x, f) f(ihead(x)) %:% iapply(itail(x), f)
  915. ifilter <- function(x, pred) {
  916. if(pred(ihead(x))) {
  917. ihead(x) %:% ifilter(itail(x), pred)
  918. } else {
  919. nocons(ifilter(itail(x), pred))
  920. }
  921. }
  922. ifold <- function(xs, f, acc) {
  923. ifold(itail(xs), f, f(ihead(xs), acc))
  924. }
  925. fibs <- 1 %:% (1 %:% izip(`+`, fibs, itail(fibs)))
  926. seq1 <- cons(1, seq1)
  927. ns <- 1 %:% izip(`+`, seq1, ns)
  928. iseq <- function(a, b=NULL, by=1) {
  929. d <- cons(by, d)
  930. res <- a %:% izip(`+`, d, res)
  931. if (is.null(b)) return(res)
  932. ifilter(res, function(x) x <= b)
  933. }
  934. mk_primes <- function(xs) {
  935. ihead(xs) %:% mk_primes(ifilter(itail(xs), function(a) a %% ihead(xs) != 0))
  936. }
  937. primes <- mk_primes(itail(ns))