From 169af59845009f00e621c9673d3fa1944a33ca2d Mon Sep 17 00:00:00 2001 From: jmgirard Date: Mon, 19 Apr 2021 21:05:01 -0500 Subject: [PATCH 1/4] Add sample code for clustered and nonclustered bootstrapping --- R/cor_boot.R | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 R/cor_boot.R diff --git a/R/cor_boot.R b/R/cor_boot.R new file mode 100644 index 00000000..db44e0d0 --- /dev/null +++ b/R/cor_boot.R @@ -0,0 +1,82 @@ +cor_boot <- function(data, + x, + y, + method = "pearson", + ci = 0.95, + R = 2000, + cluster = NULL, + ...) { + + if (!is.null(cluster)) { + if (!requireNamespace("tidyr", quietly = TRUE)) { + stop("Package `tidyr` required for clustered bootstrapping. Please install it by running `install.packages('tidyr').", call. = FALSE) + } + bs_data <- data[, c(x, y, cluster)] + bs_data <- tidyr::nest(bs_data, data = -c(cluster)) + # clustered bootstrap + bs_results <- clusterboot( + bs_data = bs_data, + method = method, + R = R, + ... + ) + } else { + # nonclustered bootstrap + bs_results <- singleboot( + bs_data = data[, c(x, y)], + method = method, + R = R, + ... + ) + } + + bs_ci <- boot::boot.ci(bs_results, conf = ci, type = "bca", index = 2) + + out <- data.frame( + "Parameter1" = x, + "Parameter2" = y, + r = bs_results$t0[2], + se = sd(bs_results$t[, 2]), + CI_low = bs_ci$bca[[4]], + CI_high = bs_ci$bca[[5]], + Method = method, + stringsAsFactors = FALSE + ) + + out +} + +## Non-clustered Bootstrap +singleboot <- function(bs_data, method, R, ...) { + boot::boot( + data = bs_data, + statistic = singleboot_stat, + R = R, + method = method, + ... + ) +} + +singleboot_stat <- function(data, index, method) { + stats::cor(data[index, ], method = method) +} + +## Cluster Bootstrap +clusterboot <- function(bs_data, method, R, ...) { + boot::boot( + data = bs_data, + statistic = clusterboot_stat, + R = R, + method = method, + ... + ) +} + +clusterboot_stat <- function(data, index, method) { + resample <- data[index, ] + resample <- tidyr::unnest(resample, cols = data) + resample <- resample[-1] + stats::cor(resample, method = method) +} + + From 2115d937a5530afa94c08064381d20947da27ce2 Mon Sep 17 00:00:00 2001 From: jmgirard Date: Tue, 20 Apr 2021 11:51:57 -0500 Subject: [PATCH 2/4] Replace nest with split and BCA CIs with Percentile CIs --- R/cor_boot.R | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/R/cor_boot.R b/R/cor_boot.R index db44e0d0..b5709672 100644 --- a/R/cor_boot.R +++ b/R/cor_boot.R @@ -8,11 +8,8 @@ cor_boot <- function(data, ...) { if (!is.null(cluster)) { - if (!requireNamespace("tidyr", quietly = TRUE)) { - stop("Package `tidyr` required for clustered bootstrapping. Please install it by running `install.packages('tidyr').", call. = FALSE) - } bs_data <- data[, c(x, y, cluster)] - bs_data <- tidyr::nest(bs_data, data = -c(cluster)) + bs_data <- split(bs_data, bs_data[[cluster]]) # clustered bootstrap bs_results <- clusterboot( bs_data = bs_data, @@ -30,15 +27,16 @@ cor_boot <- function(data, ) } - bs_ci <- boot::boot.ci(bs_results, conf = ci, type = "bca", index = 2) + # TODO: Replace this with the r-to-z transformation + bs_ci <- boot::boot.ci(bs_results, conf = ci, type = "perc", index = 2) out <- data.frame( "Parameter1" = x, "Parameter2" = y, r = bs_results$t0[2], - se = sd(bs_results$t[, 2]), - CI_low = bs_ci$bca[[4]], - CI_high = bs_ci$bca[[5]], + SE = sd(bs_results$t[, 2]), + CI_low = bs_ci$percent[[4]], + CI_high = bs_ci$percent[[5]], Method = method, stringsAsFactors = FALSE ) @@ -73,9 +71,7 @@ clusterboot <- function(bs_data, method, R, ...) { } clusterboot_stat <- function(data, index, method) { - resample <- data[index, ] - resample <- tidyr::unnest(resample, cols = data) - resample <- resample[-1] + resample <- do.call(rbind, data[index]) stats::cor(resample, method = method) } From ca33c8cbb963244b1b188dc8a222f38e5b229ab7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 20 Apr 2021 18:59:21 +0200 Subject: [PATCH 3/4] make sure only first two variables are selected (and cluster variable is ignored) in correlation --- R/cor_boot.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cor_boot.R b/R/cor_boot.R index b5709672..19ab451a 100644 --- a/R/cor_boot.R +++ b/R/cor_boot.R @@ -72,7 +72,7 @@ clusterboot <- function(bs_data, method, R, ...) { clusterboot_stat <- function(data, index, method) { resample <- do.call(rbind, data[index]) - stats::cor(resample, method = method) + stats::cor(resample[, 1:2], method = method) } From d12660bf31fd30b6dad24f3358e2eccaa51539d4 Mon Sep 17 00:00:00 2001 From: jmgirard Date: Tue, 20 Apr 2021 12:05:16 -0500 Subject: [PATCH 4/4] Add fisher transformation to bootstrap and calculate CIs using z-scores --- R/cor_boot.R | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/R/cor_boot.R b/R/cor_boot.R index b5709672..78fa95f5 100644 --- a/R/cor_boot.R +++ b/R/cor_boot.R @@ -27,16 +27,16 @@ cor_boot <- function(data, ) } - # TODO: Replace this with the r-to-z transformation bs_ci <- boot::boot.ci(bs_results, conf = ci, type = "perc", index = 2) + bs_ci <- bs_ci$percent[4:5] out <- data.frame( "Parameter1" = x, "Parameter2" = y, - r = bs_results$t0[2], - SE = sd(bs_results$t[, 2]), - CI_low = bs_ci$percent[[4]], - CI_high = bs_ci$percent[[5]], + r = z_to_r(bs_results$t0[2]), + SE = sd(z_to_r(bs_results$t[, 2])), + CI_low = z_to_r(bs_ci[[1]]), + CI_high = z_to_r(bs_ci[[2]]), Method = method, stringsAsFactors = FALSE ) @@ -55,8 +55,18 @@ singleboot <- function(bs_data, method, R, ...) { ) } +r_to_z <- function(r) { + 0.5 * log((1 + r) / (1 - r)) +} + +z_to_r <- function(z) { + (exp(2 * z) - 1) / (exp(2 * z) + 1) +} + singleboot_stat <- function(data, index, method) { - stats::cor(data[index, ], method = method) + r <- stats::cor(data[index, ], method = method) + z <- r_to_z(r) + z } ## Cluster Bootstrap @@ -72,7 +82,9 @@ clusterboot <- function(bs_data, method, R, ...) { clusterboot_stat <- function(data, index, method) { resample <- do.call(rbind, data[index]) - stats::cor(resample, method = method) + r <- stats::cor(resample[, 1:2], method = method) + z <- r_to_z(r) + z }