From 7ba84649914b436c24e60fd87f23bb632dbb8977 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 05:36:31 +0200
Subject: [PATCH 1/9] Add copy from dplyr's generics.R

---
 R/reconstruct.R | 255 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 R/reconstruct.R

diff --git a/R/reconstruct.R b/R/reconstruct.R
new file mode 100644
index 000000000..fd2f58820
--- /dev/null
+++ b/R/reconstruct.R
@@ -0,0 +1,255 @@
+#' Extending dplyr with new data frame subclasses
+#'
+#' @description
+#' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
+#'
+#' These three functions, along with `names<-` and 1d numeric `[`
+#' (i.e. `x[loc]`) methods, provide a minimal interface for extending dplyr
+#' to work with new data frame subclasses. This means that for simple cases
+#' you should only need to provide a couple of methods, rather than a method
+#' for every dplyr verb.
+#'
+#' These functions are a stop-gap measure until we figure out how to solve
+#' the problem more generally, but it's likely that any code you write to
+#' implement them will find a home in what comes next.
+#'
+#' # Basic advice
+#'
+#' This section gives you basic advice if you want to extend dplyr to work with
+#' your custom data frame subclass, and you want the dplyr methods to behave
+#' in basically the same way.
+#'
+#' * If you have data frame attributes that don't depend on the rows or columns
+#'   (and should unconditionally be preserved), you don't need to do anything.
+#'
+#' * If you have __scalar__ attributes that depend on __rows__, implement a
+#'   `dplyr_reconstruct()` method. Your method should recompute the attribute
+#'   depending on rows now present.
+#'
+#' * If you have __scalar__ attributes that depend on __columns__, implement a
+#'   `dplyr_reconstruct()` method and a 1d `[` method. For example, if your
+#'   class requires that certain columns be present, your method should return
+#'   a data.frame or tibble when those columns are removed.
+#'
+#' * If your attributes are __vectorised__ over __rows__, implement a
+#'   `dplyr_row_slice()` method. This gives you access to `i` so you can
+#'   modify the row attribute accordingly. You'll also need to think carefully
+#'   about how to recompute the attribute in `dplyr_reconstruct()`, and
+#'   you will need to carefully verify the behaviour of each verb, and provide
+#'   additional methods as needed.
+#'
+#' * If your attributes that are __vectorised__ over __columns__, implement
+#'   `dplyr_col_modify()`, 1d `[`, and `names<-` methods. All of these methods
+#'   know which columns are being modified, so you can update the column
+#'   attribute according. You'll also need to think carefully about how to
+#'   recompute the attribute in `dplyr_reconstruct()`, and you will need to
+#'   carefully verify the behaviour of each verb, and provide additional
+#'   methods as needed.
+#'
+#' # Current usage
+#'
+#' * `arrange()`, `filter()`, `slice()`, `semi_join()`, and `anti_join()`
+#'   work by generating a vector of row indices, and then subsetting
+#'   with `dplyr_row_slice()`.
+#'
+#' * `mutate()` generates a list of new column value (using `NULL` to indicate
+#'   when columns should be deleted), then passes that to `dplyr_col_modify()`.
+#'   `transmute()` does the same then uses 1d `[` to select the columns.
+#'
+#' * `summarise()` works similarly to `mutate()` but the data modified by
+#'   `dplyr_col_modify()` comes from `group_data()`.
+#'
+#' * `select()` uses 1d `[` to select columns, then `names<-` to rename them.
+#'   `rename()` just uses `names<-`. `relocate()` just uses 1d `[`.
+#'
+#' * `inner_join()`, `left_join()`, `right_join()`, and `full_join()`
+#'   coerces `x` to a tibble, modify the rows, then uses `dplyr_reconstruct()`
+#'   to convert back to the same type as `x`.
+#'
+#' * `nest_join()` uses `dplyr_col_modify()` to cast the key variables to
+#'   common type and add the nested-df that `y` becomes.
+#'
+#' * `distinct()` does a `mutate()` if any expressions are present, then
+#'   uses 1d `[` to select variables to keep, then `dplyr_row_slice()` to
+#'   select distinct rows.
+#'
+#' Note that `group_by()` and `ungroup()` don't use any these generics and
+#' you'll need to provide methods directly.
+#'
+#' @keywords internal
+#' @param data A tibble. We use tibbles because they avoid some inconsistent
+#'    subset-assignment use cases
+#' @name dplyr_extending
+NULL
+
+#' @export
+#' @rdname dplyr_extending
+#' @param i A numeric or logical vector that indexes the rows of `.data`.
+dplyr_row_slice <- function(data, i, ...) {
+  if (!is.numeric(i) && !is.logical(i)) {
+    abort("`i` must be an numeric or logical vector.")
+  }
+
+  UseMethod("dplyr_row_slice")
+}
+
+#' @export
+dplyr_row_slice.data.frame <- function(data, i, ...) {
+  dplyr_reconstruct(vec_slice(data, i), data)
+}
+
+#' @export
+dplyr_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) {
+  out <- vec_slice(as.data.frame(data), i)
+
+  # Index into group_indices, then use that to restore the grouping structure
+  groups <- group_data(data)
+  new_id <- vec_slice(group_indices(data), i)
+  new_grps <- vec_group_loc(new_id)
+
+  rows <- rep(list_of(integer()), length.out = nrow(groups))
+  rows[new_grps$key] <- new_grps$loc
+  groups$.rows <- rows
+  if (!preserve && isTRUE(attr(groups, ".drop"))) {
+    groups <- group_data_trim(groups)
+  }
+
+  new_grouped_df(out, groups)
+}
+
+#' @export
+dplyr_row_slice.rowwise_df <- function(data, i, ..., preserve = FALSE) {
+  out <- vec_slice(data, i)
+  group_data <- vec_slice(group_keys(data), i)
+  new_rowwise_df(out, group_data)
+}
+
+#' @export
+#' @rdname dplyr_extending
+#' @param cols A named list used modify columns. A `NULL` value should remove
+#'   an existing column.
+dplyr_col_modify <- function(data, cols) {
+  UseMethod("dplyr_col_modify")
+}
+
+#' @export
+dplyr_col_modify.data.frame <- function(data, cols) {
+  # Must be implemented from first principles to avoiding edge cases in
+  # [.data.frame and [.tibble (2.1.3 and earlier).
+
+  # Apply tidyverse recycling rules
+  cols <- vec_recycle_common(!!!cols, .size = nrow(data))
+
+  # Transform to list to avoid stripping inner names with `[[<-`
+  out <- as.list(dplyr_vec_data(data))
+
+  nms <- as_utf8_character(names2(cols))
+  names(out) <- as_utf8_character(names2(out))
+
+  for (i in seq_along(cols)) {
+    nm <- nms[[i]]
+    out[[nm]] <- cols[[i]]
+  }
+
+  # Transform back to data frame before reconstruction
+  row_names <- .row_names_info(data, type = 0L)
+  out <- new_data_frame(out, n = nrow(data), row.names = row_names)
+
+  dplyr_reconstruct(out, data)
+}
+
+#' @export
+dplyr_col_modify.grouped_df <- function(data, cols) {
+  out <- dplyr_col_modify(as_tibble(data), cols)
+
+  if (any(names(cols) %in% group_vars(data))) {
+    # regroup
+    grouped_df(out, group_vars(data), drop = group_by_drop_default(data))
+  } else {
+    new_grouped_df(out, group_data(data))
+  }
+}
+
+#' @export
+dplyr_col_modify.rowwise_df <- function(data, cols) {
+  out <- dplyr_col_modify(as_tibble(data), cols)
+  rowwise_df(out, group_vars(data))
+}
+
+#' @param template Template to use for restoring attributes
+#' @export
+#' @rdname dplyr_extending
+dplyr_reconstruct <- function(data, template) {
+  # Strip attributes before dispatch to make it easier to implement
+  # methods and prevent unexpected leaking of irrelevant attributes.
+  data <- dplyr_new_data_frame(data)
+  return(dplyr_reconstruct_dispatch(data, template))
+  UseMethod("dplyr_reconstruct", template)
+}
+dplyr_reconstruct_dispatch <- function(data, template) {
+  UseMethod("dplyr_reconstruct", template)
+}
+
+#' @export
+dplyr_reconstruct.data.frame <- function(data, template) {
+  attrs <- attributes(template)
+  attrs$names <- names(data)
+  attrs$row.names <- .row_names_info(data, type = 0L)
+
+  attributes(data) <- attrs
+  data
+}
+
+#' @export
+dplyr_reconstruct.grouped_df <- function(data, template) {
+  group_vars <- group_intersect(template, data)
+  grouped_df(data, group_vars, drop = group_by_drop_default(template))
+}
+
+#' @export
+dplyr_reconstruct.rowwise_df <- function(data, template) {
+  group_vars <- group_intersect(template, data)
+  rowwise_df(data, group_vars)
+}
+
+dplyr_col_select <- function(.data, loc, names = NULL) {
+  loc <- vec_as_location(loc, n = ncol(.data), names = names(.data))
+
+  out <- .data[loc]
+  if (!inherits(out, "data.frame")) {
+    abort(c(
+      "Can't reconstruct data frame.",
+      x = glue("The `[` method for class <{classes_data}> must return a data frame.",
+        classes_data = glue_collapse(class(.data), sep = "/")
+      ),
+      i = glue("It returned a <{classes_out}>.",
+        classes_out = glue_collapse(class(out), sep = "/")
+      )
+    ))
+  }
+  if (length(out) != length(loc)) {
+    abort(c(
+      "Can't reconstruct data frame.",
+      x = glue("The `[` method for class <{classes_data}> must return a data frame with {length(loc)} column{s}.",
+        classes_data = glue_collapse(class(.data), sep = "/"),
+        s = if(length(loc) == 1) "" else "s"
+      ),
+      i = glue("It returned a <{classes_out}> of {length(out)} column{s}.",
+        classes_out = glue_collapse(class(out), sep = "/"),
+        s = if(length(out) == 1) "" else "s"
+      )
+    ))
+  }
+
+  # Patch base data frames to restore extra attributes that `[.data.frame` drops.
+  # We require `[` methods to keep extra attributes for all data frame subclasses.
+  if (identical(class(.data), "data.frame")) {
+    out <- dplyr_reconstruct(out, .data)
+  }
+
+  if (!is.null(names)) {
+    names(out) <- names
+  }
+
+  out
+}

From 85b27996deae40451de5a023fd921930b3f32912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 05:37:11 +0200
Subject: [PATCH 2/9] dplyr -> tibble

---
 R/reconstruct.R | 94 ++++++++++++++++++++++++-------------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/reconstruct.R b/R/reconstruct.R
index fd2f58820..b326a122b 100644
--- a/R/reconstruct.R
+++ b/R/reconstruct.R
@@ -1,13 +1,13 @@
-#' Extending dplyr with new data frame subclasses
+#' Extending tibble with new data frame subclasses
 #'
 #' @description
 #' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
 #'
 #' These three functions, along with `names<-` and 1d numeric `[`
-#' (i.e. `x[loc]`) methods, provide a minimal interface for extending dplyr
+#' (i.e. `x[loc]`) methods, provide a minimal interface for extending tibble
 #' to work with new data frame subclasses. This means that for simple cases
 #' you should only need to provide a couple of methods, rather than a method
-#' for every dplyr verb.
+#' for every tibble verb.
 #'
 #' These functions are a stop-gap measure until we figure out how to solve
 #' the problem more generally, but it's likely that any code you write to
@@ -15,34 +15,34 @@
 #'
 #' # Basic advice
 #'
-#' This section gives you basic advice if you want to extend dplyr to work with
-#' your custom data frame subclass, and you want the dplyr methods to behave
+#' This section gives you basic advice if you want to extend tibble to work with
+#' your custom data frame subclass, and you want the tibble methods to behave
 #' in basically the same way.
 #'
 #' * If you have data frame attributes that don't depend on the rows or columns
 #'   (and should unconditionally be preserved), you don't need to do anything.
 #'
 #' * If you have __scalar__ attributes that depend on __rows__, implement a
-#'   `dplyr_reconstruct()` method. Your method should recompute the attribute
+#'   `tibble_reconstruct()` method. Your method should recompute the attribute
 #'   depending on rows now present.
 #'
 #' * If you have __scalar__ attributes that depend on __columns__, implement a
-#'   `dplyr_reconstruct()` method and a 1d `[` method. For example, if your
+#'   `tibble_reconstruct()` method and a 1d `[` method. For example, if your
 #'   class requires that certain columns be present, your method should return
 #'   a data.frame or tibble when those columns are removed.
 #'
 #' * If your attributes are __vectorised__ over __rows__, implement a
-#'   `dplyr_row_slice()` method. This gives you access to `i` so you can
+#'   `tibble_row_slice()` method. This gives you access to `i` so you can
 #'   modify the row attribute accordingly. You'll also need to think carefully
-#'   about how to recompute the attribute in `dplyr_reconstruct()`, and
+#'   about how to recompute the attribute in `tibble_reconstruct()`, and
 #'   you will need to carefully verify the behaviour of each verb, and provide
 #'   additional methods as needed.
 #'
 #' * If your attributes that are __vectorised__ over __columns__, implement
-#'   `dplyr_col_modify()`, 1d `[`, and `names<-` methods. All of these methods
+#'   `tibble_col_modify()`, 1d `[`, and `names<-` methods. All of these methods
 #'   know which columns are being modified, so you can update the column
 #'   attribute according. You'll also need to think carefully about how to
-#'   recompute the attribute in `dplyr_reconstruct()`, and you will need to
+#'   recompute the attribute in `tibble_reconstruct()`, and you will need to
 #'   carefully verify the behaviour of each verb, and provide additional
 #'   methods as needed.
 #'
@@ -50,27 +50,27 @@
 #'
 #' * `arrange()`, `filter()`, `slice()`, `semi_join()`, and `anti_join()`
 #'   work by generating a vector of row indices, and then subsetting
-#'   with `dplyr_row_slice()`.
+#'   with `tibble_row_slice()`.
 #'
 #' * `mutate()` generates a list of new column value (using `NULL` to indicate
-#'   when columns should be deleted), then passes that to `dplyr_col_modify()`.
+#'   when columns should be deleted), then passes that to `tibble_col_modify()`.
 #'   `transmute()` does the same then uses 1d `[` to select the columns.
 #'
 #' * `summarise()` works similarly to `mutate()` but the data modified by
-#'   `dplyr_col_modify()` comes from `group_data()`.
+#'   `tibble_col_modify()` comes from `group_data()`.
 #'
 #' * `select()` uses 1d `[` to select columns, then `names<-` to rename them.
 #'   `rename()` just uses `names<-`. `relocate()` just uses 1d `[`.
 #'
 #' * `inner_join()`, `left_join()`, `right_join()`, and `full_join()`
-#'   coerces `x` to a tibble, modify the rows, then uses `dplyr_reconstruct()`
+#'   coerces `x` to a tibble, modify the rows, then uses `tibble_reconstruct()`
 #'   to convert back to the same type as `x`.
 #'
-#' * `nest_join()` uses `dplyr_col_modify()` to cast the key variables to
+#' * `nest_join()` uses `tibble_col_modify()` to cast the key variables to
 #'   common type and add the nested-df that `y` becomes.
 #'
 #' * `distinct()` does a `mutate()` if any expressions are present, then
-#'   uses 1d `[` to select variables to keep, then `dplyr_row_slice()` to
+#'   uses 1d `[` to select variables to keep, then `tibble_row_slice()` to
 #'   select distinct rows.
 #'
 #' Note that `group_by()` and `ungroup()` don't use any these generics and
@@ -79,27 +79,27 @@
 #' @keywords internal
 #' @param data A tibble. We use tibbles because they avoid some inconsistent
 #'    subset-assignment use cases
-#' @name dplyr_extending
+#' @name tibble_extending
 NULL
 
 #' @export
-#' @rdname dplyr_extending
+#' @rdname tibble_extending
 #' @param i A numeric or logical vector that indexes the rows of `.data`.
-dplyr_row_slice <- function(data, i, ...) {
+tibble_row_slice <- function(data, i, ...) {
   if (!is.numeric(i) && !is.logical(i)) {
     abort("`i` must be an numeric or logical vector.")
   }
 
-  UseMethod("dplyr_row_slice")
+  UseMethod("tibble_row_slice")
 }
 
 #' @export
-dplyr_row_slice.data.frame <- function(data, i, ...) {
-  dplyr_reconstruct(vec_slice(data, i), data)
+tibble_row_slice.data.frame <- function(data, i, ...) {
+  tibble_reconstruct(vec_slice(data, i), data)
 }
 
 #' @export
-dplyr_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) {
+tibble_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) {
   out <- vec_slice(as.data.frame(data), i)
 
   # Index into group_indices, then use that to restore the grouping structure
@@ -118,22 +118,22 @@ dplyr_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) {
 }
 
 #' @export
-dplyr_row_slice.rowwise_df <- function(data, i, ..., preserve = FALSE) {
+tibble_row_slice.rowwise_df <- function(data, i, ..., preserve = FALSE) {
   out <- vec_slice(data, i)
   group_data <- vec_slice(group_keys(data), i)
   new_rowwise_df(out, group_data)
 }
 
 #' @export
-#' @rdname dplyr_extending
+#' @rdname tibble_extending
 #' @param cols A named list used modify columns. A `NULL` value should remove
 #'   an existing column.
-dplyr_col_modify <- function(data, cols) {
-  UseMethod("dplyr_col_modify")
+tibble_col_modify <- function(data, cols) {
+  UseMethod("tibble_col_modify")
 }
 
 #' @export
-dplyr_col_modify.data.frame <- function(data, cols) {
+tibble_col_modify.data.frame <- function(data, cols) {
   # Must be implemented from first principles to avoiding edge cases in
   # [.data.frame and [.tibble (2.1.3 and earlier).
 
@@ -141,7 +141,7 @@ dplyr_col_modify.data.frame <- function(data, cols) {
   cols <- vec_recycle_common(!!!cols, .size = nrow(data))
 
   # Transform to list to avoid stripping inner names with `[[<-`
-  out <- as.list(dplyr_vec_data(data))
+  out <- as.list(tibble_vec_data(data))
 
   nms <- as_utf8_character(names2(cols))
   names(out) <- as_utf8_character(names2(out))
@@ -155,12 +155,12 @@ dplyr_col_modify.data.frame <- function(data, cols) {
   row_names <- .row_names_info(data, type = 0L)
   out <- new_data_frame(out, n = nrow(data), row.names = row_names)
 
-  dplyr_reconstruct(out, data)
+  tibble_reconstruct(out, data)
 }
 
 #' @export
-dplyr_col_modify.grouped_df <- function(data, cols) {
-  out <- dplyr_col_modify(as_tibble(data), cols)
+tibble_col_modify.grouped_df <- function(data, cols) {
+  out <- tibble_col_modify(as_tibble(data), cols)
 
   if (any(names(cols) %in% group_vars(data))) {
     # regroup
@@ -171,27 +171,27 @@ dplyr_col_modify.grouped_df <- function(data, cols) {
 }
 
 #' @export
-dplyr_col_modify.rowwise_df <- function(data, cols) {
-  out <- dplyr_col_modify(as_tibble(data), cols)
+tibble_col_modify.rowwise_df <- function(data, cols) {
+  out <- tibble_col_modify(as_tibble(data), cols)
   rowwise_df(out, group_vars(data))
 }
 
 #' @param template Template to use for restoring attributes
 #' @export
-#' @rdname dplyr_extending
-dplyr_reconstruct <- function(data, template) {
+#' @rdname tibble_extending
+tibble_reconstruct <- function(data, template) {
   # Strip attributes before dispatch to make it easier to implement
   # methods and prevent unexpected leaking of irrelevant attributes.
-  data <- dplyr_new_data_frame(data)
-  return(dplyr_reconstruct_dispatch(data, template))
-  UseMethod("dplyr_reconstruct", template)
+  data <- tibble_new_data_frame(data)
+  return(tibble_reconstruct_dispatch(data, template))
+  UseMethod("tibble_reconstruct", template)
 }
-dplyr_reconstruct_dispatch <- function(data, template) {
-  UseMethod("dplyr_reconstruct", template)
+tibble_reconstruct_dispatch <- function(data, template) {
+  UseMethod("tibble_reconstruct", template)
 }
 
 #' @export
-dplyr_reconstruct.data.frame <- function(data, template) {
+tibble_reconstruct.data.frame <- function(data, template) {
   attrs <- attributes(template)
   attrs$names <- names(data)
   attrs$row.names <- .row_names_info(data, type = 0L)
@@ -201,18 +201,18 @@ dplyr_reconstruct.data.frame <- function(data, template) {
 }
 
 #' @export
-dplyr_reconstruct.grouped_df <- function(data, template) {
+tibble_reconstruct.grouped_df <- function(data, template) {
   group_vars <- group_intersect(template, data)
   grouped_df(data, group_vars, drop = group_by_drop_default(template))
 }
 
 #' @export
-dplyr_reconstruct.rowwise_df <- function(data, template) {
+tibble_reconstruct.rowwise_df <- function(data, template) {
   group_vars <- group_intersect(template, data)
   rowwise_df(data, group_vars)
 }
 
-dplyr_col_select <- function(.data, loc, names = NULL) {
+tibble_col_select <- function(.data, loc, names = NULL) {
   loc <- vec_as_location(loc, n = ncol(.data), names = names(.data))
 
   out <- .data[loc]
@@ -244,7 +244,7 @@ dplyr_col_select <- function(.data, loc, names = NULL) {
   # Patch base data frames to restore extra attributes that `[.data.frame` drops.
   # We require `[` methods to keep extra attributes for all data frame subclasses.
   if (identical(class(.data), "data.frame")) {
-    out <- dplyr_reconstruct(out, .data)
+    out <- tibble_reconstruct(out, .data)
   }
 
   if (!is.null(names)) {

From 1857d3523a989a61c6ae75e5c55ed6dae3591613 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 05:45:23 +0200
Subject: [PATCH 3/9] Strip

---
 NAMESPACE          |   2 +
 R/reconstruct.R    | 236 ++++++---------------------------------------
 R/tibble-package.R |   1 +
 3 files changed, 32 insertions(+), 207 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 51aec1283..07144a8da 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -97,12 +97,14 @@ importFrom(vctrs,vec_as_names_legacy)
 importFrom(vctrs,vec_as_subscript2)
 importFrom(vctrs,vec_assign)
 importFrom(vctrs,vec_c)
+importFrom(vctrs,vec_data)
 importFrom(vctrs,vec_is)
 importFrom(vctrs,vec_names)
 importFrom(vctrs,vec_names2)
 importFrom(vctrs,vec_ptype_abbr)
 importFrom(vctrs,vec_rbind)
 importFrom(vctrs,vec_recycle)
+importFrom(vctrs,vec_recycle_common)
 importFrom(vctrs,vec_set_names)
 importFrom(vctrs,vec_size)
 importFrom(vctrs,vec_slice)
diff --git a/R/reconstruct.R b/R/reconstruct.R
index b326a122b..ca1116f7c 100644
--- a/R/reconstruct.R
+++ b/R/reconstruct.R
@@ -1,139 +1,15 @@
-#' Extending tibble with new data frame subclasses
-#'
-#' @description
-#' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
-#'
-#' These three functions, along with `names<-` and 1d numeric `[`
-#' (i.e. `x[loc]`) methods, provide a minimal interface for extending tibble
-#' to work with new data frame subclasses. This means that for simple cases
-#' you should only need to provide a couple of methods, rather than a method
-#' for every tibble verb.
-#'
-#' These functions are a stop-gap measure until we figure out how to solve
-#' the problem more generally, but it's likely that any code you write to
-#' implement them will find a home in what comes next.
-#'
-#' # Basic advice
-#'
-#' This section gives you basic advice if you want to extend tibble to work with
-#' your custom data frame subclass, and you want the tibble methods to behave
-#' in basically the same way.
-#'
-#' * If you have data frame attributes that don't depend on the rows or columns
-#'   (and should unconditionally be preserved), you don't need to do anything.
-#'
-#' * If you have __scalar__ attributes that depend on __rows__, implement a
-#'   `tibble_reconstruct()` method. Your method should recompute the attribute
-#'   depending on rows now present.
-#'
-#' * If you have __scalar__ attributes that depend on __columns__, implement a
-#'   `tibble_reconstruct()` method and a 1d `[` method. For example, if your
-#'   class requires that certain columns be present, your method should return
-#'   a data.frame or tibble when those columns are removed.
-#'
-#' * If your attributes are __vectorised__ over __rows__, implement a
-#'   `tibble_row_slice()` method. This gives you access to `i` so you can
-#'   modify the row attribute accordingly. You'll also need to think carefully
-#'   about how to recompute the attribute in `tibble_reconstruct()`, and
-#'   you will need to carefully verify the behaviour of each verb, and provide
-#'   additional methods as needed.
-#'
-#' * If your attributes that are __vectorised__ over __columns__, implement
-#'   `tibble_col_modify()`, 1d `[`, and `names<-` methods. All of these methods
-#'   know which columns are being modified, so you can update the column
-#'   attribute according. You'll also need to think carefully about how to
-#'   recompute the attribute in `tibble_reconstruct()`, and you will need to
-#'   carefully verify the behaviour of each verb, and provide additional
-#'   methods as needed.
-#'
-#' # Current usage
-#'
-#' * `arrange()`, `filter()`, `slice()`, `semi_join()`, and `anti_join()`
-#'   work by generating a vector of row indices, and then subsetting
-#'   with `tibble_row_slice()`.
-#'
-#' * `mutate()` generates a list of new column value (using `NULL` to indicate
-#'   when columns should be deleted), then passes that to `tibble_col_modify()`.
-#'   `transmute()` does the same then uses 1d `[` to select the columns.
-#'
-#' * `summarise()` works similarly to `mutate()` but the data modified by
-#'   `tibble_col_modify()` comes from `group_data()`.
-#'
-#' * `select()` uses 1d `[` to select columns, then `names<-` to rename them.
-#'   `rename()` just uses `names<-`. `relocate()` just uses 1d `[`.
-#'
-#' * `inner_join()`, `left_join()`, `right_join()`, and `full_join()`
-#'   coerces `x` to a tibble, modify the rows, then uses `tibble_reconstruct()`
-#'   to convert back to the same type as `x`.
-#'
-#' * `nest_join()` uses `tibble_col_modify()` to cast the key variables to
-#'   common type and add the nested-df that `y` becomes.
-#'
-#' * `distinct()` does a `mutate()` if any expressions are present, then
-#'   uses 1d `[` to select variables to keep, then `tibble_row_slice()` to
-#'   select distinct rows.
-#'
-#' Note that `group_by()` and `ungroup()` don't use any these generics and
-#' you'll need to provide methods directly.
-#'
-#' @keywords internal
-#' @param data A tibble. We use tibbles because they avoid some inconsistent
-#'    subset-assignment use cases
-#' @name tibble_extending
-NULL
-
-#' @export
-#' @rdname tibble_extending
-#' @param i A numeric or logical vector that indexes the rows of `.data`.
+# Keep in sync with generics.R in dplyr
+# Imported from 3de24a738243a3d07c87b3f4e4afa5f6b02ff561
+
 tibble_row_slice <- function(data, i, ...) {
   if (!is.numeric(i) && !is.logical(i)) {
     abort("`i` must be an numeric or logical vector.")
   }
 
-  UseMethod("tibble_row_slice")
-}
-
-#' @export
-tibble_row_slice.data.frame <- function(data, i, ...) {
   tibble_reconstruct(vec_slice(data, i), data)
 }
 
-#' @export
-tibble_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) {
-  out <- vec_slice(as.data.frame(data), i)
-
-  # Index into group_indices, then use that to restore the grouping structure
-  groups <- group_data(data)
-  new_id <- vec_slice(group_indices(data), i)
-  new_grps <- vec_group_loc(new_id)
-
-  rows <- rep(list_of(integer()), length.out = nrow(groups))
-  rows[new_grps$key] <- new_grps$loc
-  groups$.rows <- rows
-  if (!preserve && isTRUE(attr(groups, ".drop"))) {
-    groups <- group_data_trim(groups)
-  }
-
-  new_grouped_df(out, groups)
-}
-
-#' @export
-tibble_row_slice.rowwise_df <- function(data, i, ..., preserve = FALSE) {
-  out <- vec_slice(data, i)
-  group_data <- vec_slice(group_keys(data), i)
-  new_rowwise_df(out, group_data)
-}
-
-#' @export
-#' @rdname tibble_extending
-#' @param cols A named list used modify columns. A `NULL` value should remove
-#'   an existing column.
 tibble_col_modify <- function(data, cols) {
-  UseMethod("tibble_col_modify")
-}
-
-#' @export
-tibble_col_modify.data.frame <- function(data, cols) {
   # Must be implemented from first principles to avoiding edge cases in
   # [.data.frame and [.tibble (2.1.3 and earlier).
 
@@ -141,7 +17,7 @@ tibble_col_modify.data.frame <- function(data, cols) {
   cols <- vec_recycle_common(!!!cols, .size = nrow(data))
 
   # Transform to list to avoid stripping inner names with `[[<-`
-  out <- as.list(tibble_vec_data(data))
+  out <- as.list(dplyr_vec_data(data))
 
   nms <- as_utf8_character(names2(cols))
   names(out) <- as_utf8_character(names2(out))
@@ -158,40 +34,11 @@ tibble_col_modify.data.frame <- function(data, cols) {
   tibble_reconstruct(out, data)
 }
 
-#' @export
-tibble_col_modify.grouped_df <- function(data, cols) {
-  out <- tibble_col_modify(as_tibble(data), cols)
-
-  if (any(names(cols) %in% group_vars(data))) {
-    # regroup
-    grouped_df(out, group_vars(data), drop = group_by_drop_default(data))
-  } else {
-    new_grouped_df(out, group_data(data))
-  }
-}
-
-#' @export
-tibble_col_modify.rowwise_df <- function(data, cols) {
-  out <- tibble_col_modify(as_tibble(data), cols)
-  rowwise_df(out, group_vars(data))
-}
-
-#' @param template Template to use for restoring attributes
-#' @export
-#' @rdname tibble_extending
 tibble_reconstruct <- function(data, template) {
   # Strip attributes before dispatch to make it easier to implement
   # methods and prevent unexpected leaking of irrelevant attributes.
-  data <- tibble_new_data_frame(data)
-  return(tibble_reconstruct_dispatch(data, template))
-  UseMethod("tibble_reconstruct", template)
-}
-tibble_reconstruct_dispatch <- function(data, template) {
-  UseMethod("tibble_reconstruct", template)
-}
+  data <- dplyr_new_data_frame(data)
 
-#' @export
-tibble_reconstruct.data.frame <- function(data, template) {
   attrs <- attributes(template)
   attrs$names <- names(data)
   attrs$row.names <- .row_names_info(data, type = 0L)
@@ -200,56 +47,31 @@ tibble_reconstruct.data.frame <- function(data, template) {
   data
 }
 
-#' @export
-tibble_reconstruct.grouped_df <- function(data, template) {
-  group_vars <- group_intersect(template, data)
-  grouped_df(data, group_vars, drop = group_by_drop_default(template))
-}
-
-#' @export
-tibble_reconstruct.rowwise_df <- function(data, template) {
-  group_vars <- group_intersect(template, data)
-  rowwise_df(data, group_vars)
-}
-
-tibble_col_select <- function(.data, loc, names = NULL) {
-  loc <- vec_as_location(loc, n = ncol(.data), names = names(.data))
-
-  out <- .data[loc]
-  if (!inherits(out, "data.frame")) {
-    abort(c(
-      "Can't reconstruct data frame.",
-      x = glue("The `[` method for class <{classes_data}> must return a data frame.",
-        classes_data = glue_collapse(class(.data), sep = "/")
-      ),
-      i = glue("It returned a <{classes_out}>.",
-        classes_out = glue_collapse(class(out), sep = "/")
-      )
-    ))
-  }
-  if (length(out) != length(loc)) {
-    abort(c(
-      "Can't reconstruct data frame.",
-      x = glue("The `[` method for class <{classes_data}> must return a data frame with {length(loc)} column{s}.",
-        classes_data = glue_collapse(class(.data), sep = "/"),
-        s = if(length(loc) == 1) "" else "s"
-      ),
-      i = glue("It returned a <{classes_out}> of {length(out)} column{s}.",
-        classes_out = glue_collapse(class(out), sep = "/"),
-        s = if(length(out) == 1) "" else "s"
-      )
-    ))
-  }
+# Until fixed upstream. `vec_data()` should not return lists from data
+# frames.
+dplyr_vec_data <- function(x) {
+  out <- vec_data(x)
 
-  # Patch base data frames to restore extra attributes that `[.data.frame` drops.
-  # We require `[` methods to keep extra attributes for all data frame subclasses.
-  if (identical(class(.data), "data.frame")) {
-    out <- tibble_reconstruct(out, .data)
-  }
-
-  if (!is.null(names)) {
-    names(out) <- names
+  if (is.data.frame(x)) {
+    new_data_frame(out, n = nrow(x))
+  } else {
+    out
   }
+}
 
-  out
+# Until vctrs::new_data_frame() forwards row names automatically
+dplyr_new_data_frame <- function(x = data.frame(),
+                                 n = NULL,
+                                 ...,
+                                 row.names = NULL,
+                                 class = NULL) {
+  row.names <- row.names %||% .row_names_info(x, type = 0L)
+
+  new_data_frame(
+    x,
+    n = n,
+    ...,
+    row.names = row.names,
+    class = class
+  )
 }
diff --git a/R/tibble-package.R b/R/tibble-package.R
index c076a8854..0ac249bf8 100644
--- a/R/tibble-package.R
+++ b/R/tibble-package.R
@@ -10,6 +10,7 @@
 #' @importFrom vctrs vec_names vec_names2 vec_set_names
 #' @importFrom vctrs new_rcrd
 #' @importFrom vctrs new_data_frame
+#' @importFrom vctrs vec_recycle_common vec_data
 #' @aliases NULL tibble-package
 #' @details
 #' `r lifecycle::badge("stable")`

From 298063bca8cdc35b48fbed5bd346c2125fc72006 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 17:31:49 +0200
Subject: [PATCH 4/9] Fix tibble_row_slice()

---
 R/reconstruct.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/reconstruct.R b/R/reconstruct.R
index ca1116f7c..d427610be 100644
--- a/R/reconstruct.R
+++ b/R/reconstruct.R
@@ -3,10 +3,10 @@
 
 tibble_row_slice <- function(data, i, ...) {
   if (!is.numeric(i) && !is.logical(i)) {
-    abort("`i` must be an numeric or logical vector.")
+    abort("`i` must be a numeric or logical vector.")
   }
 
-  tibble_reconstruct(vec_slice(data, i), data)
+  tibble_reconstruct(vec_slice(remove_rownames(data), i), data)
 }
 
 tibble_col_modify <- function(data, cols) {

From 8c146f44c5e47022870492d275924baf5a6c19fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 06:54:51 +0200
Subject: [PATCH 5/9] Use tibble_reconstruct()

---
 R/subsetting.R   |  2 +-
 src/attributes.c | 18 ------------------
 src/init.c       |  1 -
 src/tibble.h     |  2 --
 4 files changed, 1 insertion(+), 22 deletions(-)
 delete mode 100644 src/attributes.c

diff --git a/R/subsetting.R b/R/subsetting.R
index f6a4b2d56..5712da08c 100644
--- a/R/subsetting.R
+++ b/R/subsetting.R
@@ -757,7 +757,7 @@ set_tibble_class <- function(x, nrow) {
 # External ----------------------------------------------------------------
 
 vectbl_restore <- function(xo, x) {
-  .Call(`tibble_restore_impl`, xo, x)
+  tibble_reconstruct(xo, x)
 }
 
 # Errors ------------------------------------------------------------------
diff --git a/src/attributes.c b/src/attributes.c
deleted file mode 100644
index 7770b41d9..000000000
--- a/src/attributes.c
+++ /dev/null
@@ -1,18 +0,0 @@
-#include "tibble.h"
-
-SEXP tibble_restore_impl(SEXP xo, SEXP x) {
-  xo = PROTECT(Rf_shallow_duplicate(xo));
-
-  // copy over all attributes except `names` and `row.names`
-  SEXP attr_x = ATTRIB(x);
-  while(attr_x != R_NilValue) {
-    SEXP tag = TAG(attr_x);
-    if (tag != R_NamesSymbol && tag != R_RowNamesSymbol) {
-      Rf_setAttrib(xo, tag, CAR(attr_x));
-    }
-    attr_x = CDR(attr_x);
-  }
-
-  UNPROTECT(1);
-  return xo;
-}
diff --git a/src/init.c b/src/init.c
index 4588296a2..34a41b670 100644
--- a/src/init.c
+++ b/src/init.c
@@ -7,7 +7,6 @@
 static const R_CallMethodDef CallEntries[] = {
   {"tibble_matrixToDataFrame", (DL_FUNC) &tibble_matrixToDataFrame, 1},
   {"tibble_string_to_indices", (DL_FUNC) &tibble_string_to_indices, 1},
-  {"tibble_restore_impl", (DL_FUNC) &tibble_restore_impl, 2},
   {"tibble_need_coerce", (DL_FUNC) &tibble_need_coerce, 1},
 
   {NULL, NULL, 0}
diff --git a/src/tibble.h b/src/tibble.h
index b1c762fb2..6902a0b9f 100644
--- a/src/tibble.h
+++ b/src/tibble.h
@@ -7,7 +7,5 @@
 SEXP tibble_matrixToDataFrame(SEXP xSEXP);
 SEXP tibble_string_to_indices(SEXP x);
 SEXP tibble_need_coerce(SEXP x);
-SEXP tibble_update_attrs(SEXP x, SEXP dots);
-SEXP tibble_restore_impl(SEXP xo, SEXP x);
 
 #endif /* TIBBLE_H */

From 5e390dee7a700b9d9a4f360fb91024a21912aabe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 06:55:36 +0200
Subject: [PATCH 6/9] Inline

---
 R/add.R        |  4 ++--
 R/subsetting.R | 10 ++--------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/R/add.R b/R/add.R
index 369842471..ae1a69476 100644
--- a/R/add.R
+++ b/R/add.R
@@ -63,7 +63,7 @@ add_row <- function(.data, ..., .before = NULL, .after = NULL) {
   pos <- pos_from_before_after(.before, .after, nrow(.data))
   out <- rbind_at(.data, df, pos)
 
-  vectbl_restore(out, .data)
+  tibble_reconstruct(out, .data)
 }
 
 #' @export
@@ -166,7 +166,7 @@ add_column <- function(.data, ..., .before = NULL, .after = NULL,
   out <- new_data[indexes]
 
   out <- set_repaired_names(out, repair_hint = TRUE, .name_repair)
-  vectbl_restore(out, .data)
+  tibble_reconstruct(out, .data)
 }
 
 
diff --git a/R/subsetting.R b/R/subsetting.R
index 5712da08c..4a22407f3 100644
--- a/R/subsetting.R
+++ b/R/subsetting.R
@@ -256,7 +256,7 @@ NULL
   if (drop && length(xo) == 1L) {
     tbl_subset2(xo, 1L, j_arg)
   } else {
-    vectbl_restore(xo, x)
+    tibble_reconstruct(xo, x)
   }
 }
 
@@ -463,7 +463,7 @@ tbl_subassign <- function(x, i, j, value, i_arg, j_arg, value_arg) {
     }
   }
 
-  vectbl_restore(xo, x)
+  tibble_reconstruct(xo, x)
 }
 
 vectbl_as_new_row_index <- function(i, x, i_arg) {
@@ -754,12 +754,6 @@ set_tibble_class <- function(x, nrow) {
   x
 }
 
-# External ----------------------------------------------------------------
-
-vectbl_restore <- function(xo, x) {
-  tibble_reconstruct(xo, x)
-}
-
 # Errors ------------------------------------------------------------------
 
 error_need_rhs_vector <- function(value_arg) {

From a31caf0bdb6c8393b4d5a6224b2f92c09976c7fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 07:14:55 +0200
Subject: [PATCH 7/9] Mark entry points

---
 R/add.R        | 2 ++
 R/subsetting.R | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/R/add.R b/R/add.R
index ae1a69476..d34bebfb0 100644
--- a/R/add.R
+++ b/R/add.R
@@ -95,6 +95,8 @@ rbind_at <- function(old, new, pos) {
     seq2(pos + 1L, nrow(old))
   )
   vec_slice(out, idx)
+
+  # tibble_reconstruct
 }
 
 #' Add columns to a data frame
diff --git a/R/subsetting.R b/R/subsetting.R
index 4a22407f3..9c46bdcfb 100644
--- a/R/subsetting.R
+++ b/R/subsetting.R
@@ -410,6 +410,7 @@ tbl_subset2 <- function(x, j, j_arg) {
 tbl_subset_row <- function(x, i, i_arg) {
   if (is.null(i)) return(x)
   i <- vectbl_as_row_index(i, x, i_arg)
+  # tibble_row_slice
   xo <- lapply(unclass(x), vec_slice, i = i)
   set_tibble_class(xo, nrow = length(i))
 }
@@ -602,6 +603,8 @@ is_tight_sequence_at_end <- function(i_new, n) {
 }
 
 tbl_subassign_col <- function(x, j, value) {
+  # tibble_col_modify
+
   is_data <- !vapply(value, is.null, NA)
   nrow <- fast_nrow(x)
 
@@ -640,6 +643,7 @@ tbl_expand_to_nrow <- function(x, i) {
   if (new_nrow != nrow) {
     # FIXME: vec_expand()?
     i_expand <- c(seq_len(nrow), rep(NA_integer_, new_nrow - nrow))
+    # tibble_row_slice
     x <- vec_slice(x, i_expand)
   }
 

From 4108aeb30bd15d926895130fe0f7b86fe8dfa450 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 08:56:00 +0200
Subject: [PATCH 8/9] Use tibble_row_slice()

---
 R/subsetting.R                   | 7 ++-----
 tests/testthat/test-subsetting.R | 9 +++++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/R/subsetting.R b/R/subsetting.R
index 9c46bdcfb..d11756ebf 100644
--- a/R/subsetting.R
+++ b/R/subsetting.R
@@ -410,9 +410,7 @@ tbl_subset2 <- function(x, j, j_arg) {
 tbl_subset_row <- function(x, i, i_arg) {
   if (is.null(i)) return(x)
   i <- vectbl_as_row_index(i, x, i_arg)
-  # tibble_row_slice
-  xo <- lapply(unclass(x), vec_slice, i = i)
-  set_tibble_class(xo, nrow = length(i))
+  tibble_row_slice(x, i)
 }
 
 tbl_subassign <- function(x, i, j, value, i_arg, j_arg, value_arg) {
@@ -643,8 +641,7 @@ tbl_expand_to_nrow <- function(x, i) {
   if (new_nrow != nrow) {
     # FIXME: vec_expand()?
     i_expand <- c(seq_len(nrow), rep(NA_integer_, new_nrow - nrow))
-    # tibble_row_slice
-    x <- vec_slice(x, i_expand)
+    x <- tibble_row_slice(x, i_expand)
   }
 
   x
diff --git a/tests/testthat/test-subsetting.R b/tests/testthat/test-subsetting.R
index ffe5fef33..5d5c87062 100644
--- a/tests/testthat/test-subsetting.R
+++ b/tests/testthat/test-subsetting.R
@@ -16,6 +16,15 @@ test_that("[ retains class", {
   expect_identical(class(mtcars2), class(mtcars2[1:5, 1:5]))
 })
 
+test_that("[ removes row names", {
+  tbl <- tibble(a = 1:3)
+  expect_warning(rownames(tbl) <- letters[1:3], "deprecated")
+
+  expect_equal(rownames(tbl), letters[1:3])
+  expect_equal(rownames(tbl[1, ]), "1")
+  expect_equal(rownames(tbl["a"]), as.character(1:3))
+})
+
 test_that("[ and as_tibble commute", {
   mtcars2 <- as_tibble(mtcars)
   expect_identical(mtcars2, as_tibble(mtcars))

From 5836a30e27ca3d7395026d80fb9298544e896183 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <krlmlr+r@mailbox.org>
Date: Sat, 31 Jul 2021 11:41:52 +0200
Subject: [PATCH 9/9] Use tibble_col_modify()

---
 R/subsetting.R | 46 ++++++++++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/R/subsetting.R b/R/subsetting.R
index d11756ebf..5266868a6 100644
--- a/R/subsetting.R
+++ b/R/subsetting.R
@@ -601,32 +601,42 @@ is_tight_sequence_at_end <- function(i_new, n) {
 }
 
 tbl_subassign_col <- function(x, j, value) {
+  # Fix order
+  order_j <- order(j)
+  value <- value[order_j]
+  j <- j[order_j]
+
   # tibble_col_modify
 
-  is_data <- !vapply(value, is.null, NA)
-  nrow <- fast_nrow(x)
+  # Adapt to interface
+  names(value) <- names(j)
 
-  x <- unclass(x)
+  # New names
+  tweak_names <- (j > length(x))
+  need_tweak_names <- any(tweak_names)
 
-  # Grow, assign new names
-  new <- which(j > length(x))
-  if (has_length(new)) {
-    length(x) <- max(j[new])
-    names(x)[ j[new] ] <- names2(j)[new]
-  }
+  if (need_tweak_names) {
+    new_names <- names(x)
+    new_names[ j[tweak_names] ] <- names(j)[tweak_names]
+
+    # New names ("" means appending at end)
+    names(value)[tweak_names] <- ""
 
-  # Update
-  for (jj in which(is_data)) {
-    ji <- j[[jj]]
-    x[[ji]] <- value[[jj]]
+    # Removed names, use vapply() for speed
+    col_is_null <- vapply(value, is.null, NA)
+    if (any(col_is_null)) {
+      new_names <- new_names[ -j[col_is_null] ]
+    }
   }
 
-  # Remove
-  j_remove <- j[!is_data & !is.na(j)]
-  if (has_length(j_remove)) x <- x[-j_remove]
+  out <- tibble_col_modify(x, value)
 
-  # Restore
-  set_tibble_class(x, nrow)
+  # This calls `names<-()` for the tibble class
+  if (need_tweak_names) {
+    names(out) <- new_names
+  }
+
+  return(out)
 }
 
 tbl_expand_to_nrow <- function(x, i) {