Skip to content

Commit 279397d

Browse files
jakejhjimhester
andauthored
adding flatten argument for xml_find_all (#312)
Co-authored-by: Jim Hester <james.f.hester@gmail.com>
1 parent b5d305f commit 279397d

File tree

5 files changed

+61
-21
lines changed

5 files changed

+61
-21
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: xml2
22
Title: Parse XML
3-
Version: 1.3.2.9000
3+
Version: 1.3.2.9001
44
Authors@R:
55
c(person(given = "Hadley",
66
family = "Wickham",
@@ -40,7 +40,7 @@ VignetteBuilder:
4040
knitr
4141
Encoding: UTF-8
4242
Roxygen: list(markdown = TRUE)
43-
RoxygenNote: 7.1.0
43+
RoxygenNote: 7.1.1
4444
SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel
4545
(rpm)
4646
Collate:

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# xml2 (development version)
22

3+
* `xml_find_all.xml_nodeset()` gains a `flatten` argument to control whether to return a single nodeset or a list of nodesets (#311, @jakejh)
4+
35
* `write_xml()` and `write_html()` now return NULL invisibly, as they did prior to version 1.3.0 (#307)
46

57
# xml2 1.3.2

R/xml_find.R

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212

1313
#' @param xpath A string containing a xpath (1.0) expression.
1414
#' @inheritParams xml_name
15-
#' @return `xml_find_all` always returns a nodeset: if there are no matches
16-
#' the nodeset will be empty. The result will always be unique; repeated
17-
#' nodes are automatically de-duplicated.
15+
#' @param ... Further arguments passed to or from other methods.
16+
#' @return `xml_find_all` returns a nodeset if applied to a node, and a nodeset
17+
#' or a list of nodesets if applied to a nodeset. If there are no matches,
18+
#' the nodeset(s) will be empty. Within each nodeset, the result will always
19+
#' be unique; repeated nodes are automatically de-duplicated.
1820
#'
1921
#' `xml_find_first` returns a node if applied to a node, and a nodeset
2022
#' if applied to a nodeset. The output is *always* the same size as
@@ -46,11 +48,16 @@
4648
#' </body>")
4749
#' para <- xml_find_all(x, ".//p")
4850
#'
49-
#' # If you apply xml_find_all to a nodeset, it finds all matches,
50-
#' # de-duplicates them, and returns as a single list. This means you
51+
#' # By default, if you apply xml_find_all to a nodeset, it finds all matches,
52+
#' # de-duplicates them, and returns as a single nodeset. This means you
5153
#' # never know how many results you'll get
5254
#' xml_find_all(para, ".//b")
5355
#'
56+
#' # If you set flatten to FALSE, though, xml_find_all will return a list of
57+
#' # nodesets, where each nodeset contains the matches for the corresponding
58+
#' # node in the original nodeset.
59+
#' xml_find_all(para, ".//b", flatten = FALSE)
60+
#'
5461
#' # xml_find_first only returns the first match per input node. If there are 0
5562
#' # matches it will return a missing node
5663
#' xml_find_first(para, ".//b")
@@ -67,31 +74,37 @@
6774
#' ')
6875
#' xml_find_all(x, ".//f:doc")
6976
#' xml_find_all(x, ".//f:doc", xml_ns(x))
70-
xml_find_all <- function(x, xpath, ns = xml_ns(x)) {
77+
xml_find_all <- function(x, xpath, ns = xml_ns(x), ...) {
7178
UseMethod("xml_find_all")
7279
}
7380

7481
#' @export
75-
xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x)) {
82+
xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x), ...) {
7683
xml_nodeset()
7784
}
7885

7986
#' @export
80-
xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x)) {
87+
xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x), ...) {
8188
nodes <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf)
8289
xml_nodeset(nodes)
8390
}
8491

92+
#' @param flatten A logical indicating whether to return a single, flattened
93+
#' nodeset or a list of nodesets.
8594
#' @export
86-
xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) {
95+
#' @rdname xml_find_all
96+
xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x), flatten = TRUE, ...) {
8797
if (length(x) == 0)
8898
return(xml_nodeset())
8999

90-
nodes <- unlist(recursive = FALSE,
91-
lapply(x, function(x)
92-
.Call(xpath_search, x$node, x$doc, xpath, ns, Inf)))
100+
res <- lapply(x, function(x) .Call(xpath_search, x$node, x$doc, xpath, ns, Inf))
93101

94-
xml_nodeset(nodes)
102+
if (isTRUE(flatten)) {
103+
return(xml_nodeset(unlist(recursive = FALSE, res)))
104+
}
105+
106+
res[] <- lapply(res, xml_nodeset)
107+
res
95108
}
96109

97110
#' @export

man/xml_find_all.Rd

Lines changed: 20 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-xml_find.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,17 @@ test_that("no matches returns empty nodeset", {
4949
expect_equal(length(xml_find_all(x, "//baz")), 0)
5050
})
5151

52+
test_that("xml_find_all returns nodeset or list of nodesets based on flatten", {
53+
x <- read_xml("<body><p>Some <b>text</b>.</p>
54+
<p>Some <b>other</b> <b>text</b>.</p>
55+
<p>No bold here!</p></body>")
56+
y <- xml_find_all(x, './/p')
57+
z <- xml_find_all(y, './/b', flatten = FALSE)
58+
expect_s3_class(xml_find_all(y, './/b'), 'xml_nodeset')
59+
expect_type(z, 'list')
60+
expect_s3_class(z[[1L]], 'xml_nodeset')
61+
})
62+
5263
# Find num ---------------------------------------------------------------------
5364
test_that("xml_find_num errors with non numeric results", {
5465
x <- read_xml("<x><y/><y/></x>")

0 commit comments

Comments
 (0)