Skip to content

Commit ca3a5ff

Browse files
committed
start to add stream interface
1 parent a88e29c commit ca3a5ff

File tree

6 files changed

+252
-28
lines changed

6 files changed

+252
-28
lines changed

R/RcppExports.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,17 @@ CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, f
113113
.Call(`_sf_CPL_read_ogr`, datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
114114
}
115115

116+
<<<<<<< HEAD
116117
CPL_gdalinfo <- function(obj, options, oo, co) {
117118
.Call(`_sf_CPL_gdalinfo`, obj, options, oo, co)
119+
=======
120+
CPL_read_gdal_stream <- function(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists = TRUE, dsn_isdb = FALSE, width = 80L) {
121+
.Call('_sf_CPL_read_gdal_stream', PACKAGE = 'sf', stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, width)
122+
}
123+
124+
CPL_gdalinfo <- function(obj, options, oo) {
125+
.Call('_sf_CPL_gdalinfo', PACKAGE = 'sf', obj, options, oo)
126+
>>>>>>> 6829782b (start to add stream interface)
118127
}
119128

120129
CPL_ogrinfo <- function(obj, options, oo, co) {

R/read.R

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ set_utf8 = function(x) {
5151
#' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert
5252
#' all to the Multi variety; defaults to \code{TRUE}
5353
#' @param stringsAsFactors logical; logical: should character vectors be
54-
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
54+
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
5555
#' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to
5656
#' \code{default.stringsAsFactors()}
5757
#' @param int64_as_string logical; if TRUE, Int64 attributes are returned as
@@ -146,7 +146,7 @@ st_read.default = function(dsn, layer, ...) {
146146
}
147147

148148
process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
149-
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
149+
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
150150
geometry_column = 1, as_tibble = FALSE, optional = FALSE) {
151151

152152
which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE))
@@ -156,7 +156,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
156156

157157
# in case no geometry is present:
158158
if (length(which.geom) == 0) {
159-
if (! quiet)
159+
if (! quiet)
160160
warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE)
161161
x = if (!as_tibble) {
162162
if (any(sapply(x, is.list)))
@@ -204,6 +204,10 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
204204
x
205205
}
206206

207+
process_cpl_read_ogr_stream = function(x, ...) {
208+
x
209+
}
210+
207211
#' @name st_read
208212
#' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this
209213
#' @param drivers character; limited set of driver short names to be tried (default: try all)
@@ -214,10 +218,11 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
214218
#' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename
215219
#' that reside in the same directory, only one of them having extension \code{.shp}.
216220
#' @export
217-
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
221+
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
218222
type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(),
219223
int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0),
220-
drivers = character(0), wkt_filter = character(0), optional = FALSE) {
224+
drivers = character(0), wkt_filter = character(0), optional = FALSE,
225+
use_stream = FALSE) {
221226

222227
layer = if (missing(layer))
223228
character(0)
@@ -233,11 +238,24 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
233238
if (length(promote_to_multi) > 1)
234239
stop("`promote_to_multi' should have length one, and applies to all geometry columns")
235240

236-
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
237-
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
238-
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
239-
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
240-
optional = optional, ...)
241+
242+
243+
if (use_stream) {
244+
stream = nanoarrow::nanoarrow_allocate_array_stream()
245+
CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet,
246+
drivers, wkt_filter, dsn_exists, dsn_isdb, getOption("width"))
247+
process_cpl_read_ogr_stream(stream, quiet, check_ring_dir = check_ring_dir,
248+
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
249+
optional = optional, ...)
250+
} else {
251+
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
252+
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
253+
254+
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
255+
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
256+
optional = optional, ...)
257+
}
258+
241259
}
242260

243261
#' @name st_read
@@ -606,7 +624,7 @@ print.sf_layers = function(x, ...) {
606624
#' @param options character; driver dependent dataset open options, multiple options supported.
607625
#' @param do_count logical; if TRUE, count the features by reading them, even if their count is not reported by the driver
608626
#' @name st_layers
609-
#' @return list object of class \code{sf_layers} with elements
627+
#' @return list object of class \code{sf_layers} with elements
610628
#' \describe{
611629
#' \item{name}{name of the layer}
612630
#' \item{geomtype}{list with for each layer the geometry types}
@@ -751,7 +769,7 @@ check_append_delete <- function(append, delete) {
751769

752770
#' @name st_write
753771
#' @export
754-
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
772+
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
755773
#' omitted; it returns TRUE on success, FALSE on failure, invisibly.
756774
st_delete = function(dsn, layer = character(0), driver = guess_driver_can_write(dsn), quiet = FALSE) {
757775
invisible(CPL_delete_ogr(dsn, layer, driver, quiet) == 0)

src/RcppExports.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,27 @@ BEGIN_RCPP
345345
return rcpp_result_gen;
346346
END_RCPP
347347
}
348+
// CPL_read_gdal_stream
349+
Rcpp::CharacterVector CPL_read_gdal_stream(Rcpp::RObject stream_xptr, Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, Rcpp::CharacterVector query, Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers, Rcpp::CharacterVector wkt_filter, bool dsn_exists, bool dsn_isdb, int width);
350+
RcppExport SEXP _sf_CPL_read_gdal_stream(SEXP stream_xptrSEXP, SEXP datasourceSEXP, SEXP layerSEXP, SEXP querySEXP, SEXP optionsSEXP, SEXP quietSEXP, SEXP driversSEXP, SEXP wkt_filterSEXP, SEXP dsn_existsSEXP, SEXP dsn_isdbSEXP, SEXP widthSEXP) {
351+
BEGIN_RCPP
352+
Rcpp::RObject rcpp_result_gen;
353+
Rcpp::RNGScope rcpp_rngScope_gen;
354+
Rcpp::traits::input_parameter< Rcpp::RObject >::type stream_xptr(stream_xptrSEXP);
355+
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type datasource(datasourceSEXP);
356+
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type layer(layerSEXP);
357+
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type query(querySEXP);
358+
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type options(optionsSEXP);
359+
Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
360+
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type drivers(driversSEXP);
361+
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type wkt_filter(wkt_filterSEXP);
362+
Rcpp::traits::input_parameter< bool >::type dsn_exists(dsn_existsSEXP);
363+
Rcpp::traits::input_parameter< bool >::type dsn_isdb(dsn_isdbSEXP);
364+
Rcpp::traits::input_parameter< int >::type width(widthSEXP);
365+
rcpp_result_gen = Rcpp::wrap(CPL_read_gdal_stream(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, width));
366+
return rcpp_result_gen;
367+
END_RCPP
368+
}
348369
// CPL_gdalinfo
349370
Rcpp::CharacterVector CPL_gdalinfo(Rcpp::CharacterVector obj, Rcpp::CharacterVector options, Rcpp::CharacterVector oo, Rcpp::CharacterVector co);
350371
RcppExport SEXP _sf_CPL_gdalinfo(SEXP objSEXP, SEXP optionsSEXP, SEXP ooSEXP, SEXP coSEXP) {
@@ -1442,6 +1463,7 @@ static const R_CallMethodDef CallEntries[] = {
14421463
{"_sf_CPL_gdal_linestring_sample", (DL_FUNC) &_sf_CPL_gdal_linestring_sample, 2},
14431464
{"_sf_CPL_get_layers", (DL_FUNC) &_sf_CPL_get_layers, 3},
14441465
{"_sf_CPL_read_ogr", (DL_FUNC) &_sf_CPL_read_ogr, 14},
1466+
<<<<<<< HEAD
14451467
{"_sf_CPL_gdalinfo", (DL_FUNC) &_sf_CPL_gdalinfo, 4},
14461468
{"_sf_CPL_ogrinfo", (DL_FUNC) &_sf_CPL_ogrinfo, 4},
14471469
{"_sf_CPL_gdaladdo", (DL_FUNC) &_sf_CPL_gdaladdo, 8},
@@ -1456,6 +1478,22 @@ static const R_CallMethodDef CallEntries[] = {
14561478
{"_sf_CPL_gdalmdiminfo", (DL_FUNC) &_sf_CPL_gdalmdiminfo, 4},
14571479
{"_sf_CPL_gdalmdimtranslate", (DL_FUNC) &_sf_CPL_gdalmdimtranslate, 6},
14581480
{"_sf_CPL_gdal_warper", (DL_FUNC) &_sf_CPL_gdal_warper, 7},
1481+
=======
1482+
{"_sf_CPL_read_gdal_stream", (DL_FUNC) &_sf_CPL_read_gdal_stream, 11},
1483+
{"_sf_CPL_gdalinfo", (DL_FUNC) &_sf_CPL_gdalinfo, 3},
1484+
{"_sf_CPL_gdaladdo", (DL_FUNC) &_sf_CPL_gdaladdo, 7},
1485+
{"_sf_CPL_gdalwarp", (DL_FUNC) &_sf_CPL_gdalwarp, 7},
1486+
{"_sf_CPL_gdalrasterize", (DL_FUNC) &_sf_CPL_gdalrasterize, 7},
1487+
{"_sf_CPL_gdaltranslate", (DL_FUNC) &_sf_CPL_gdaltranslate, 5},
1488+
{"_sf_CPL_gdalvectortranslate", (DL_FUNC) &_sf_CPL_gdalvectortranslate, 6},
1489+
{"_sf_CPL_gdalbuildvrt", (DL_FUNC) &_sf_CPL_gdalbuildvrt, 5},
1490+
{"_sf_CPL_gdaldemprocessing", (DL_FUNC) &_sf_CPL_gdaldemprocessing, 7},
1491+
{"_sf_CPL_gdalnearblack", (DL_FUNC) &_sf_CPL_gdalnearblack, 6},
1492+
{"_sf_CPL_gdalgrid", (DL_FUNC) &_sf_CPL_gdalgrid, 5},
1493+
{"_sf_CPL_gdalmdiminfo", (DL_FUNC) &_sf_CPL_gdalmdiminfo, 3},
1494+
{"_sf_CPL_gdalmdimtranslate", (DL_FUNC) &_sf_CPL_gdalmdimtranslate, 5},
1495+
{"_sf_CPL_gdal_warper", (DL_FUNC) &_sf_CPL_gdal_warper, 6},
1496+
>>>>>>> 6829782b (start to add stream interface)
14591497
{"_sf_CPL_write_ogr", (DL_FUNC) &_sf_CPL_write_ogr, 16},
14601498
{"_sf_CPL_delete_ogr", (DL_FUNC) &_sf_CPL_delete_ogr, 4},
14611499
{"_sf_CPL_geos_binop", (DL_FUNC) &_sf_CPL_geos_binop, 6},

src/gdal_read.cpp

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -288,14 +288,14 @@ Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
288288
// POSIXlt: sec min hour mday mon year wday yday isdst ...
289289
Rcpp::List dtlst =
290290
Rcpp::List::create(
291-
Rcpp::_["sec"] = (double) Second,
291+
Rcpp::_["sec"] = (double) Second,
292292
Rcpp::_["min"] = (int) Minute,
293293
Rcpp::_["hour"] = (int) Hour,
294294
Rcpp::_["mday"] = (int) Day,
295295
Rcpp::_["mon"] = (int) Month - 1,
296296
Rcpp::_["year"] = (int) Year - 1900,
297-
Rcpp::_["wday"] = NA_INTEGER,
298-
Rcpp::_["yday"] = NA_INTEGER,
297+
Rcpp::_["wday"] = NA_INTEGER,
298+
Rcpp::_["yday"] = NA_INTEGER,
299299
Rcpp::_["isdst"] = NA_INTEGER,
300300
Rcpp::_["zone"] = tzone,
301301
Rcpp::_["gmtoff"] = NA_INTEGER);
@@ -502,22 +502,25 @@ Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
502502
return out;
503503
}
504504

505-
// [[Rcpp::export]]
506-
Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
505+
static void finalize_dataset_xptr(SEXP dataset_xptr) {
506+
GDALDataset *poDS = (GDALDataset*)R_ExternalPtrAddr(dataset_xptr);
507+
if (poDS != nullptr) {
508+
GDALClose(poDS);
509+
}
510+
}
511+
512+
Rcpp::List CPL_ogr_layer_setup(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
507513
Rcpp::CharacterVector query,
508-
Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser,
509-
Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers,
514+
Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers,
510515
Rcpp::CharacterVector wkt_filter,
511-
bool promote_to_multi = true, bool int64_as_string = false,
512-
bool dsn_exists = true,
513-
bool dsn_isdb = false,
514-
int width = 80) {
515-
516-
// adapted from the OGR tutorial @ www.gdal.org
516+
bool dsn_exists,
517+
bool dsn_isdb,
518+
int width) {
519+
// adapted from the OGR tutorial @ www.gdal.org
517520
std::vector <char *> open_options = create_options(options, quiet);
518521
std::vector <char *> drivers_v = create_options(drivers, quiet);
519522
GDALDataset *poDS;
520-
poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY,
523+
poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY,
521524
drivers.size() ? drivers_v.data() : NULL, open_options.data(), NULL );
522525
if( poDS == NULL ) {
523526
// could not open dsn
@@ -533,6 +536,11 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
533536
Rcpp::stop("Cannot open %s; The file doesn't seem to exist.", datasource);
534537
}
535538

539+
// Will close the dataset if some early return/exception prevents GDALClose() from being
540+
// called/allows the result to be accessed by the caller.
541+
Rcpp::RObject dataset_xptr = R_MakeExternalPtr(poDS, R_NilValue, R_NilValue);
542+
R_RegisterCFinalizer(dataset_xptr, &finalize_dataset_xptr);
543+
536544
if (layer.size() == 0 && Rcpp::CharacterVector::is_na(query[0])) { // no layer specified
537545
switch (poDS->GetLayerCount()) {
538546
case 0: { // error:
@@ -596,7 +604,7 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
596604
Rcpp::Rcout << "Reading layer `" << layer[0] << "' from data source ";
597605
// if (LENGTH(datasource[0]) > (width - (34 + LENGTH(layer[0]))))
598606
Rcpp::String ds(datasource(0));
599-
if (layer.size()) {
607+
if (layer.size()) {
600608
Rcpp::String la(layer(0));
601609
if (strlen(ds.get_cstring()) > (width - (34 + strlen(la.get_cstring()))))
602610
Rcpp::Rcout << std::endl << " ";
@@ -607,6 +615,29 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
607615
Rcpp::Rcout << "using driver `" << poDS->GetDriverName() << "'" << std::endl; // #nocov
608616
}
609617

618+
// Keeps the dataset external pointer alive as long as the layer external pointer is alive
619+
Rcpp::RObject layer_xptr = R_MakeExternalPtr(poLayer, R_NilValue, dataset_xptr);
620+
621+
return Rcpp::List::create(dataset_xptr, layer_xptr);
622+
}
623+
624+
// [[Rcpp::export]]
625+
Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
626+
Rcpp::CharacterVector query,
627+
Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser,
628+
Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers,
629+
Rcpp::CharacterVector wkt_filter,
630+
bool promote_to_multi = true, bool int64_as_string = false,
631+
bool dsn_exists = true,
632+
bool dsn_isdb = false,
633+
int width = 80) {
634+
Rcpp::List prep = CPL_ogr_layer_setup(datasource, layer, query, options,
635+
quiet, drivers,
636+
wkt_filter,
637+
dsn_exists, dsn_isdb, width);
638+
OGRDataSource* poDS = (OGRDataSource*)(R_ExternalPtrAddr(prep[0]));
639+
OGRLayer* poLayer = (OGRLayer*)R_ExternalPtrAddr(prep[1]);
640+
610641
Rcpp::List out = sf_from_ogrlayer(poLayer, quiet, int64_as_string, toTypeUser, fid_column_name,
611642
promote_to_multi);
612643

@@ -615,5 +646,6 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
615646
poDS->ReleaseResultSet(poLayer);
616647

617648
GDALClose(poDS);
649+
R_SetExternalPtrAddr(prep[0], nullptr);
618650
return out;
619651
}

src/gdal_read.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
1+
2+
Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
23
Rcpp::NumericVector toTypeUser, Rcpp::CharacterVector fid_column, bool promote_to_multi);
4+
5+
Rcpp::List CPL_ogr_layer_setup(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
6+
Rcpp::CharacterVector query,
7+
Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers,
8+
Rcpp::CharacterVector wkt_filter,
9+
bool dsn_exists,
10+
bool dsn_isdb,
11+
int width);
12+
313
Rcpp::List CPL_read_gdal(Rcpp::CharacterVector fname, Rcpp::CharacterVector options, Rcpp::CharacterVector driver,
414
bool read_data, Rcpp::NumericVector NA_value, Rcpp::List RasterIO_parameters);

0 commit comments

Comments
 (0)