Skip to content

Commit 55f4cc8

Browse files
authored
Merge pull request #2238 from r-spatial/paleolimbot-stream-reading
Paleolimbot stream reading
2 parents 9f302ee + 63afb30 commit 55f4cc8

21 files changed

+345
-68
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Imports:
7373
utils
7474
Suggests:
7575
blob,
76+
nanoarrow,
7677
covr,
7778
dplyr (>= 0.8-3),
7879
ggplot2,

R/RcppExports.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, f
113113
.Call(`_sf_CPL_read_ogr`, datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
114114
}
115115

116+
CPL_read_gdal_stream <- function(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width) {
117+
.Call(`_sf_CPL_read_gdal_stream`, stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width)
118+
}
119+
116120
CPL_gdalinfo <- function(obj, options, oo, co) {
117121
.Call(`_sf_CPL_gdalinfo`, obj, options, oo, co)
118122
}

R/cast_sfg.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ ClosePol <- function(mtrx) {
4343
#' @examples
4444
#' # example(st_read)
4545
#' nc = st_read(system.file("shape/nc.shp", package="sf"))
46-
#' mpl <- nc$geometry[[4]]
46+
#' mpl <- st_geometry(nc)[[4]]
4747
#' #st_cast(x) ## error 'argument "to" is missing, with no default'
4848
#' cast_all <- function(xg) {
4949
#' lapply(c("MULTIPOLYGON", "MULTILINESTRING", "MULTIPOINT", "POLYGON", "LINESTRING", "POINT"),
@@ -81,7 +81,7 @@ st_cast.MULTIPOLYGON <- function(x, to, ...) {
8181
#' @name st_cast
8282
#' @export
8383
#' @examples
84-
#' mls <- st_cast(nc$geometry[[4]], "MULTILINESTRING")
84+
#' mls <- st_cast(st_geometry(nc)[[4]], "MULTILINESTRING")
8585
#' st_sfc(cast_all(mls))
8686
st_cast.MULTILINESTRING <- function(x, to, ...) {
8787
switch(to,
@@ -108,7 +108,7 @@ st_cast.MULTILINESTRING <- function(x, to, ...) {
108108
#' @name st_cast
109109
#' @export
110110
#' @examples
111-
#' mpt <- st_cast(nc$geometry[[4]], "MULTIPOINT")
111+
#' mpt <- st_cast(st_geometry(nc)[[4]], "MULTIPOINT")
112112
#' st_sfc(cast_all(mpt))
113113
st_cast.MULTIPOINT <- function(x, to, ...) {
114114
switch(to,
@@ -135,7 +135,7 @@ st_cast.MULTIPOINT <- function(x, to, ...) {
135135
#' @name st_cast
136136
#' @export
137137
#' @examples
138-
#' pl <- st_cast(nc$geometry[[4]], "POLYGON")
138+
#' pl <- st_cast(st_geometry(nc)[[4]], "POLYGON")
139139
#' st_sfc(cast_all(pl))
140140
st_cast.POLYGON <- function(x, to, ...) {
141141
switch(to,
@@ -156,7 +156,7 @@ st_cast.POLYGON <- function(x, to, ...) {
156156
#' @name st_cast
157157
#' @export
158158
#' @examples
159-
#' ls <- st_cast(nc$geometry[[4]], "LINESTRING")
159+
#' ls <- st_cast(st_geometry(nc)[[4]], "LINESTRING")
160160
#' st_sfc(cast_all(ls))
161161
st_cast.LINESTRING <- function(x, to, ...) {
162162
switch(to,
@@ -173,7 +173,7 @@ st_cast.LINESTRING <- function(x, to, ...) {
173173
#' @name st_cast
174174
#' @export
175175
#' @examples
176-
#' pt <- st_cast(nc$geometry[[4]], "POINT")
176+
#' pt <- st_cast(st_geometry(nc)[[4]], "POINT")
177177
#' ## st_sfc(cast_all(pt)) ## Error: cannot create MULTIPOLYGON from POINT
178178
#' st_sfc(lapply(c("POINT", "MULTIPOINT"), function(x) st_cast(pt, x)))
179179
st_cast.POINT <- function(x, to, ...) {

R/read.R

Lines changed: 82 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ set_utf8 = function(x) {
5151
#' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert
5252
#' all to the Multi variety; defaults to \code{TRUE}
5353
#' @param stringsAsFactors logical; logical: should character vectors be
54-
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
54+
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
5555
#' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to
5656
#' \code{default.stringsAsFactors()}
5757
#' @param int64_as_string logical; if TRUE, Int64 attributes are returned as
@@ -146,7 +146,7 @@ st_read.default = function(dsn, layer, ...) {
146146
}
147147

148148
process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
149-
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
149+
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
150150
geometry_column = 1, as_tibble = FALSE, optional = FALSE) {
151151

152152
which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE))
@@ -156,7 +156,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
156156

157157
# in case no geometry is present:
158158
if (length(which.geom) == 0) {
159-
if (! quiet)
159+
if (! quiet)
160160
warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE)
161161
x = if (!as_tibble) {
162162
if (any(sapply(x, is.list)))
@@ -192,8 +192,13 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
192192
for (i in seq_along(lc.other))
193193
x[[ nm.lc[i] ]] = list.cols[[i]]
194194

195-
for (i in seq_along(geom))
196-
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
195+
for (i in seq_along(geom)) {
196+
if (is.null(attr(geom[[i]], "bbox"))) {
197+
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
198+
} else {
199+
x[[ nm[i] ]] = geom[[i]]
200+
}
201+
}
197202

198203
x = st_as_sf(x, ...,
199204
sf_column_name = if (is.character(geometry_column)) geometry_column else nm[geometry_column],
@@ -204,20 +209,72 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
204209
x
205210
}
206211

212+
# Allow setting the default to TRUE to make it easier to run existing tests
213+
# of st_read() through the stream interface
214+
default_st_read_use_stream = function() {
215+
getOption(
216+
"sf.st_read_use_stream",
217+
identical(Sys.getenv("R_SF_ST_READ_USE_STREAM"), "true")
218+
)
219+
}
220+
221+
process_cpl_read_ogr_stream = function(x, default_crs, num_features, fid_column_name,
222+
crs = NULL, ...) {
223+
is_geometry_column = vapply(
224+
x$get_schema()$children,
225+
function(s) identical(s$metadata[["ARROW:extension:name"]], "ogc.wkb"),
226+
logical(1)
227+
)
228+
229+
crs = if (is.null(crs)) st_crs(default_crs) else st_crs(crs)
230+
if (num_features == -1) {
231+
num_features = NULL
232+
}
233+
df = suppressWarnings(nanoarrow::convert_array_stream(x, size = num_features))
234+
235+
df[is_geometry_column] = lapply(df[is_geometry_column], function(x) {
236+
class(x) <- "WKB"
237+
x <- st_as_sfc(x)
238+
st_set_crs(x, crs)
239+
})
240+
241+
# # Prefer "geometry" as the geometry column name
242+
# if (any(is_geometry_column) && !("geometry" %in% names(df))) {
243+
# names(df)[which(is_geometry_column)[1]] = "geometry"
244+
# }
245+
246+
# Rename OGC_FID to fid_column_name and move to end
247+
if (length(fid_column_name) == 1 && "OGC_FID" %in% names(df)) {
248+
df <- df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")]
249+
names(df)[names(df) == "OGC_FID"] = fid_column_name
250+
}
251+
252+
# Move geometry to the end
253+
# if ("geometry" %in% names(df)) {
254+
# df <- df[c(setdiff(names(df), "geometry"), "geometry")]
255+
# }
256+
gc1 = which(is_geometry_column)[1]
257+
df = df[c(setdiff(seq_along(df), gc1), gc1)]
258+
259+
process_cpl_read_ogr(df, ...)
260+
}
261+
207262
#' @name st_read
208263
#' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this
209264
#' @param drivers character; limited set of driver short names to be tried (default: try all)
210265
#' @param wkt_filter character; WKT representation of a spatial filter (may be used as bounding box, selecting overlapping geometries); see examples
211266
#' @param optional logical; passed to \link[base]{as.data.frame}; always \code{TRUE} when \code{as_tibble} is \code{TRUE}
267+
#' @param use_stream Use TRUE to use the experimental columnar interface introduced in GDAL 3.6.
212268
#' @note The use of \code{system.file} in examples make sure that examples run regardless where R is installed:
213269
#' typical users will not use \code{system.file} but give the file name directly, either with full path or relative
214270
#' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename
215271
#' that reside in the same directory, only one of them having extension \code{.shp}.
216272
#' @export
217-
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
273+
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
218274
type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(),
219275
int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0),
220-
drivers = character(0), wkt_filter = character(0), optional = FALSE) {
276+
drivers = character(0), wkt_filter = character(0), optional = FALSE,
277+
use_stream = default_st_read_use_stream()) {
221278

222279
layer = if (missing(layer))
223280
character(0)
@@ -233,11 +290,22 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
233290
if (length(promote_to_multi) > 1)
234291
stop("`promote_to_multi' should have length one, and applies to all geometry columns")
235292

236-
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
237-
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
238-
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
239-
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
240-
optional = optional, ...)
293+
294+
295+
if (use_stream) {
296+
stream = nanoarrow::nanoarrow_allocate_array_stream()
297+
info = CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet,
298+
drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column_name, getOption("width"))
299+
process_cpl_read_ogr_stream(stream, default_crs = info[[1]], num_features = info[[2]],
300+
fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet, ...)
301+
} else {
302+
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
303+
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
304+
305+
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
306+
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
307+
optional = optional, ...)
308+
}
241309
}
242310

243311
#' @name st_read
@@ -606,7 +674,7 @@ print.sf_layers = function(x, ...) {
606674
#' @param options character; driver dependent dataset open options, multiple options supported.
607675
#' @param do_count logical; if TRUE, count the features by reading them, even if their count is not reported by the driver
608676
#' @name st_layers
609-
#' @return list object of class \code{sf_layers} with elements
677+
#' @return list object of class \code{sf_layers} with elements
610678
#' \describe{
611679
#' \item{name}{name of the layer}
612680
#' \item{geomtype}{list with for each layer the geometry types}
@@ -751,7 +819,7 @@ check_append_delete <- function(append, delete) {
751819

752820
#' @name st_write
753821
#' @export
754-
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
822+
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
755823
#' omitted; it returns TRUE on success, FALSE on failure, invisibly.
756824
st_delete = function(dsn, layer = character(0), driver = guess_driver_can_write(dsn), quiet = FALSE) {
757825
invisible(CPL_delete_ogr(dsn, layer, driver, quiet) == 0)

R/tidyverse.R

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ mutate.sf <- function(.data, ..., .dots) {
129129
#' @name tidyverse
130130
#' @examples
131131
#' if (require(dplyr, quietly = TRUE)) {
132-
#' nc %>% transmute(AREA = AREA/10, geometry = geometry) %>% class()
133132
#' nc %>% transmute(AREA = AREA/10) %>% class()
134133
#' }
135134
transmute.sf <- function(.data, ..., .dots) {
@@ -144,9 +143,7 @@ transmute.sf <- function(.data, ..., .dots) {
144143
#' @examples
145144
#' if (require(dplyr, quietly = TRUE)) {
146145
#' nc %>% select(SID74, SID79) %>% names()
147-
#' nc %>% select(SID74, SID79, geometry) %>% names()
148146
#' nc %>% select(SID74, SID79) %>% class()
149-
#' nc %>% select(SID74, SID79, geometry) %>% class()
150147
#' }
151148
#' @details \code{select} keeps the geometry regardless whether it is selected or not; to deselect it, first pipe through \code{as.data.frame} to let dplyr's own \code{select} drop it.
152149
select.sf <- function(.data, ...) {
@@ -391,7 +388,7 @@ distinct.sf <- function(.data, ..., .keep_all = FALSE) {
391388
#' @param na.rm see original function docs
392389
#' @param factor_key see original function docs
393390
#' @examples
394-
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE)) {
391+
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE) && "geometry" %in% names(nc)) {
395392
#' nc %>% select(SID74, SID79) %>% gather("VAR", "SID", -geometry) %>% summary()
396393
#' }
397394
gather.sf <- function(data, key, value, ..., na.rm = FALSE, convert = FALSE, factor_key = FALSE) {
@@ -527,7 +524,7 @@ pivot_wider.sf = function(data,
527524
#' @param fill see original function docs
528525
#' @param drop see original function docs
529526
#' @examples
530-
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE)) {
527+
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE) && "geometry" %in% names(nc)) {
531528
#' nc$row = 1:100 # needed for spread to work
532529
#' nc %>% select(SID74, SID79, geometry, row) %>%
533530
#' gather("VAR", "SID", -geometry, -row) %>%

R/transform.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ chk_mpol = function(x) {
2525
sanity_check = function(x) {
2626
d = st_dimension(x) # flags empty geoms as NA
2727
if (any(d == 2, na.rm = TRUE)) { # the polygon stuff
28+
x = st_cast(x[d == 2]) # convert GEOMETRY to POLYGON or MULTIPOLYGON, if possible
2829
if (inherits(x, "sfc_POLYGON"))
2930
st_sfc(lapply(x, chk_pol), crs = st_crs(x))
3031
else if (inherits(x, "sfc_MULTIPOLYGON"))

man/st_cast.Rd

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/st_layers.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/st_read.Rd

Lines changed: 5 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/st_write.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)