From 70ec9a01ff3d0fa9bd76bc192d2aa905e27ecb28 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Fri, 28 Nov 2025 13:47:19 +0800 Subject: [PATCH 01/25] init commit --- .lintr | 6 + DESCRIPTION | 1 + NAMESPACE | 4 + R/assert.R | 88 +++++++ R/from_formatters.R | 31 +++ R/gkm.R | 560 +++++++++++++++++++++++++++++++++++++++++++ man/g_km.Rd | 41 ++++ man/obj_label-set.Rd | 14 ++ 8 files changed, 745 insertions(+) create mode 100644 .lintr create mode 100644 R/assert.R create mode 100644 R/from_formatters.R create mode 100644 R/gkm.R create mode 100644 man/g_km.Rd create mode 100644 man/obj_label-set.Rd diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..c514f393 --- /dev/null +++ b/.lintr @@ -0,0 +1,6 @@ +linters: linters_with_defaults( + line_length_linter = line_length_linter(120), + cyclocomp_linter = NULL, + object_usage_linter = NULL, + object_length_linter = NULL + ) diff --git a/DESCRIPTION b/DESCRIPTION index e2702d27..7f9ba55e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), + checkmate (>= 2.3.2), cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), diff --git a/NAMESPACE b/NAMESPACE index 124285cd..2484284d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,10 +7,12 @@ S3method(add_overall,tbl_shift) S3method(add_overall,tbl_survfit_quantiles) S3method(add_overall,tbl_survfit_times) export("%>%") +export("obj_label<-") export(add_blank_rows) export(add_hierarchical_count_row) export(add_overall) export(filter_hierarchical) +export(g_km) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) @@ -34,6 +36,8 @@ export(tbl_shift) export(tbl_survfit_quantiles) export(tbl_survfit_times) export(theme_gtsummary_roche) +exportMethods("obj_label<-") +exportMethods(obj_label) import(glue) import(rlang) importFrom(dplyr,"%>%") diff --git a/R/assert.R b/R/assert.R new file mode 100644 index 00000000..9f2c08e6 --- /dev/null +++ b/R/assert.R @@ -0,0 +1,88 @@ +assert_proportion_value <- function (x, include_boundaries = FALSE) +{ + checkmate::assert_number(x, lower = 0, upper = 1) + checkmate::assert_flag(include_boundaries) + if (isFALSE(include_boundaries)) { + checkmate::assert_true(x > 0) + checkmate::assert_true(x < 1) + } +} + +check_list_of_variables <- function (x) +{ + x <- Filter(Negate(is.null), x) + res <- checkmate::check_list(x, names = "named", min.len = 1, + any.missing = FALSE, types = "character") + if (isTRUE(res)) { + res <- checkmate::check_character(unlist(x), min.chars = 1) + } + res +} + +assert_list_of_variables <- function (x, .var.name = checkmate::vname(x), add = NULL) +{ + if (missing(x)) + stop(sprintf("argument \"%s\" is missing, with no default", + .var.name)) + res = check_list_of_variables(x) + checkmate::makeAssertion(x, res, .var.name, add) +} + +check_df_with_variables <- function (df, variables, na_level = NULL) +{ + checkmate::assert_data_frame(df) + assert_list_of_variables(variables) + err_flag <- all(unlist(variables) %in% colnames(df)) + checkmate::assert_flag(err_flag) + if (isFALSE(err_flag)) { + vars <- setdiff(unlist(variables), colnames(df)) + return(paste(deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", + paste(vars, collapse = ", "))) + } + if (!is.null(na_level)) { + checkmate::assert_string(na_level) + res <- unlist(lapply(as.list(df)[unlist(variables)], + function(x) any(x == na_level))) + if (any(res)) { + return(paste0(deparse(substitute(df)), " contains explicit na_level (", + na_level, ") in the following columns: ", paste0(unlist(variables)[res], + collapse = ", "))) + } + } + return(TRUE) +} + +assert_df_with_variables <- function (df, variables, na_level = NULL, .var.name = checkmate::vname(df), + add = NULL) +{ + if (missing(df)) + stop(sprintf("argument \"%s\" is missing, with no default", + .var.name)) + res = check_df_with_variables(df, variables, na_level) + checkmate::makeAssertion(df, res, .var.name, add) +} + +check_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL) +{ + checkmate::assert_int(min.levels, lower = 1) + res <- checkmate::check_factor(x, min.levels = min.levels, + null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, + n.levels = n.levels) + if (isTRUE(res)) { + res <- checkmate::check_character(levels(x), min.chars = 1) + } + return(res) +} + +assert_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), + add = NULL) +{ + if (missing(x)) + stop(sprintf("argument \"%s\" is missing, with no default", + .var.name)) + res = check_valid_factor(x, min.levels, max.levels, null.ok, + any.missing, n.levels, len) + checkmate::makeAssertion(x, res, .var.name, add) +} diff --git a/R/from_formatters.R b/R/from_formatters.R new file mode 100644 index 00000000..218adba7 --- /dev/null +++ b/R/from_formatters.R @@ -0,0 +1,31 @@ +# ## Changelog +# nocov start +# styler: off + +setGeneric("obj_label", function(obj) standardGeneric("obj_label")) + +#' The new label +#' @param value character(1). The new label +#' @export +setGeneric("obj_label<-", function(obj, value) standardGeneric("obj_label<-")) + +#' @exportMethod obj_label +setMethod("obj_label", "ANY", function(obj) attr(obj, "label")) + +#' @exportMethod obj_label<- +setMethod( + "obj_label<-", "ANY", + function(obj, value) { + attr(obj, "label") <- value + obj + } +) + +with_label <- function (x, label) +{ + obj_label(x) <- label + x +} + +# nocov end +# styler: on diff --git a/R/gkm.R b/R/gkm.R new file mode 100644 index 00000000..97d85993 --- /dev/null +++ b/R/gkm.R @@ -0,0 +1,560 @@ +control_surv_timepoint <- function (conf_level = 0.95, conf_type = c("plain", "log", "log-log")) +{ + conf_type <- match.arg(conf_type) + assert_proportion_value(conf_level) + list(conf_level = conf_level, conf_type = conf_type) +} + +control_coxph <- function (pval_method = c("log-rank", "wald", "likelihood"), + ties = c("efron", "breslow", "exact"), conf_level = 0.95) +{ + pval_method <- match.arg(pval_method) + ties <- match.arg(ties) + assert_proportion_value(conf_level) + list(pval_method = pval_method, ties = ties, conf_level = conf_level) +} + +control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { + assert_proportion_value(x) + assert_proportion_value(y) + assert_proportion_value(w) + assert_proportion_value(h) + + list(x = x, y = y, w = w, h = h, fill = fill) +} + +control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { + checkmate::assert_logical(ref_lbls, any.missing = FALSE) + + res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls)) + res +} + + +## ---------------------------------------------------------------------------- +## 3. Helper Functions (Formatting, Data Preparation, Plotting Utilities) +## ---------------------------------------------------------------------------- + +f_conf_level <- function (conf_level) +{ + assert_proportion_value(conf_level) + paste0(conf_level * 100, "% CI") +} + +df2gg <- function (df, colwidths = NULL, font_size = 10, col_labels = TRUE, + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) +{ + df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) + "NA" + else as.character(x))) + if (col_labels) { + df <- as.matrix(df) + df <- rbind(colnames(df), df) + } + if (is.null(colwidths)) { + colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) + } + tot_width <- sum(colwidths) + res <- ggplot2::ggplot(data = df) + theme_void() + scale_x_continuous(limits = c(0, + tot_width)) + scale_y_continuous(limits = c(1, nrow(df))) + if (!is.null(bg_fill)) + res <- res + theme(plot.background = element_rect(fill = bg_fill)) + if (hline) { + res <- res + annotate("segment", x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5) + } + for (i in seq_len(ncol(df))) { + line_pos <- c(if (i == 1) 0 else sum(colwidths[1:(i - + 1)]), sum(colwidths[1:i])) + res <- res + annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size/.pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } + else { + rep("plain", nrow(df)) + }) + } + res +} + +h_xticks <- function(data, xticks = NULL, max_time = NULL) { + if (is.null(xticks)) { + if (is.null(max_time)) { + labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) + } else { + labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5) + } + } else if (checkmate::test_number(xticks)) { + if (is.null(max_time)) { + seq(0, max(data$time), xticks) + } else { + seq(0, max(data$time, max_time), xticks) + } + } else if (is.numeric(xticks)) { + xticks + } else { + stop( + paste( + "xticks should be either `NULL`", + "or a single number (interval between x ticks)", + "or a numeric vector (position of ticks on the x axis)" + ) + ) + } +} + +h_tbl_median_surv <- function(fit_km, armval = "All") { + y <- if (is.null(fit_km$strata)) { + as.data.frame(t(summary(fit_km)$table), row.names = armval) + } else { + tbl <- summary(fit_km)$table + rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals") + rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2] + as.data.frame(tbl) + } + conf.int <- summary(fit_km)$conf.int # nolint + y$records <- round(y$records) + y$median <- signif(y$median, 4) + y$`CI` <- paste0( + "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")" + ) + stats::setNames( + y[c("records", "median", "CI")], + c("N", "Median", f_conf_level(conf.int)) + ) +} + +h_tbl_coxph_pairwise <- function(df, + variables, + ref_group_coxph = NULL, + control_coxph_pw = control_coxph(), + annot_coxph_ref_lbls = FALSE) { + + assert_df_with_variables(df, variables) + checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) + checkmate::assert_flag(annot_coxph_ref_lbls) + + arm <- variables$arm + df[[arm]] <- factor(df[[arm]]) + + ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] + comp_group <- setdiff(levels(df[[arm]]), ref_group) + + results <- Map(function(comp) { + res <- s_coxph_pairwise( + df = df[df[[arm]] == comp, , drop = FALSE], + .ref_group = df[df[[arm]] == ref_group, , drop = FALSE], + .in_ref_col = FALSE, + .var = variables$tte, + is_event = variables$is_event, + strata = variables$strata, + control = control_coxph_pw + ) + res_df <- data.frame( + hr = format(round(res$hr, 2), nsmall = 2), + hr_ci = paste0( + "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", + format(round(res$hr_ci[2], 2), nsmall = 2), ")" + ), + pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), + stringsAsFactors = FALSE + ) + colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) # nolint + row.names(res_df) <- comp + res_df + }, comp_group) + if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) + + do.call(rbind, results) +} + +h_data_plot <- function(fit_km, + armval = "All", + max_time = NULL) { + y <- broom::tidy(fit_km) + + if (!is.null(fit_km$strata)) { + fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals") + strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2) + strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals") + y$strata <- factor( + vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2), + levels = strata_levels + ) + } else { + y$strata <- armval + } + + y_by_strata <- split(y, y$strata) + y_by_strata_extended <- lapply( + y_by_strata, + FUN = function(tbl) { + first_row <- tbl[1L, ] + first_row$time <- 0 + first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")]) + first_row$n.event <- first_row$n.censor <- 0 + first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1 + first_row$std.error <- 0 + rbind( + first_row, + tbl + ) + } + ) + y <- do.call(rbind, y_by_strata_extended) + + y$censor <- ifelse(y$n.censor > 0, y$estimate, NA) + if (!is.null(max_time)) { + y <- y[y$time <= max(max_time), ] + } + y +} + + +## ---------------------------------------------------------------------------- +## 4. Core Statistical Function +## ---------------------------------------------------------------------------- + +s_coxph_pairwise <- + function (df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, + control = control_coxph(), ...) + { + checkmate::assert_string(.var) + checkmate::assert_numeric(df[[.var]]) + checkmate::assert_logical(df[[is_event]]) + assert_df_with_variables(df, list(tte = .var, is_event = is_event)) + pval_method <- control$pval_method + ties <- control$ties + conf_level <- control$conf_level + if (.in_ref_col) { + return(list(pvalue = with_label(numeric(), + paste0("p-value (", pval_method, ")")), hr = with_label(numeric(), + "Hazard Ratio"), hr_ci = with_label(numeric(), + f_conf_level(conf_level)), hr_ci_3d = with_label(numeric(), + paste0("Hazard Ratio (", f_conf_level(conf_level), + ")")), n_tot = with_label(numeric(), + "Total n"), n_tot_events = with_label(numeric(), + "Total events"))) + } + data <- rbind(.ref_group, df) + group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), + levels = c("ref", "x")) + df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], + arm = group) + if (is.null(strata)) { + formula_cox <- survival::Surv(tte, is_event) ~ arm + } + else { + formula_cox <- stats::as.formula(paste0("survival::Surv(tte, is_event) ~ arm + strata(", + paste(strata, collapse = ","), ")")) + df_cox <- cbind(df_cox, data[strata]) + } + cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, + ties = ties) + sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) + orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) + log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + 1) + pval <- switch(pval_method, wald = sum_cox$waldtest["pvalue"], + `log-rank` = log_rank_pvalue, likelihood = sum_cox$logtest["pvalue"]) + list(pvalue = with_label(unname(pval), paste0("p-value (", + pval_method, ")")), hr = with_label(sum_cox$conf.int[1, + 1], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[1, + 3:4]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[1, + 1], unname(sum_cox$conf.int[1, 3:4])), paste0("Hazard Ratio (", + f_conf_level(conf_level), ")")), n_tot = with_label(sum_cox$n, + "Total n"), n_tot_events = with_label(sum_cox$nevent, + "Total events")) + } + + +h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { + tte <- variables$tte + is_event <- variables$is_event + arm <- variables$arm + + assert_valid_factor(df[[arm]]) + assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) + + formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) + fit_km <- survival::survfit( + formula = formula, + data = df, + conf.int = control_surv$conf_level, + conf.type = control_surv$conf_type + ) + return(fit_km) +} + +#' g_km plot +#' @export +g_km <- function(fit_km, + variables, + coxph_tbl = NULL, # New argument for pre-calculated Cox-PH table + control_surv = control_surv_timepoint(), + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + annot_at_risk = TRUE, + annot_at_risk_title = TRUE, + annot_surv_med = TRUE, + control_annot_surv_med = control_surv_med_annot(), + control_annot_coxph = control_coxph_annot(), + legend_pos = NULL, + rel_height_plot = 0.75, + ggtheme = NULL, + as_list = FALSE) { + + # --- Data Extraction and Assertions --- + checkmate::assert_class(fit_km, "survfit") + checkmate::assert_list(variables) + checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables)) + + # 1. Extract arm values (strata names) from the fitted object + # h_data_plot is used here only to consistently get the unique strata levels + armval <- if (is.null(fit_km$strata)) "All" else levels(h_data_plot(fit_km, max_time = 1)$strata) + checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) + + # Check if a Cox-PH table was provided (replaces annot_coxph flag) + if (!is.null(coxph_tbl)) { + checkmate::assert_data_frame(coxph_tbl) + } + + # --- Data Processing --- + yval <- match.arg(yval) + data <- h_data_plot(fit_km, armval = armval, max_time = max_time) + xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) + + # change estimates of survival to estimates of failure (1 - survival) + if (yval == "Failure") { + data[c("estimate", "conf.low", "conf.high", "censor")] <- list( + 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor + ) + } + + # derive y-axis limits + if (is.null(ylim)) { + if (!is.null(max_time)) { + y_lwr <- min(data[data$time < max_time, ][["estimate"]]) + y_upr <- max(data[data$time < max_time, ][["estimate"]]) + } else { + y_lwr <- min(data[["estimate"]]) + y_upr <- max(data[["estimate"]]) + } + ylim <- c(y_lwr, y_upr) + } + + # --- ggplot Initialization and Aesthetics (Unchanged) --- + gg_plt <- ggplot2::ggplot( + data = data, + mapping = aes( + x = .data[["time"]], y = .data[["estimate"]], ymin = .data[["conf.low"]], + ymax = .data[["conf.high"]], color = .data[["strata"]], fill = .data[["strata"]] + ) + ) + + theme_bw(base_size = font_size) + + scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + + labs(title = title, x = xlab, y = ylab, caption = footnotes) + + theme( + axis.text = element_text(size = font_size), axis.title = element_text(size = font_size), + legend.title = element_blank(), legend.text = element_text(size = font_size), + legend.box.background = element_rect(fill = "white", linewidth = 0.5), + legend.background = element_blank(), legend.position = "inside", + legend.spacing.y = unit(-0.02, "npc"), panel.grid.major = element_blank(), + panel.grid.minor = element_blank() + ) + + # derive x-axis limits + if (!is.null(max_time) && !is.null(xticks)) { + gg_plt <- gg_plt + scale_x_continuous( + breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) + ) + } else if (!is.null(xticks)) { + if (max(data$time) <= max(xticks)) { + gg_plt <- gg_plt + scale_x_continuous( + breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) + ) + } else { + gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) + } + } else if (!is.null(max_time)) { + gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) + } + + # set legend position (unchanged logic) + if (!is.null(legend_pos)) { + gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) + } else { + max_time2 <- sort( + data$time, + partial = nrow(data) - length(armval) - 1 + )[nrow(data) - length(armval) - 1] + + y_rng <- ylim[2] - ylim[1] + + if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && + all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint + gg_plt <- gg_plt + + theme( + legend.position.inside = c(1, 0.5), + legend.justification = c(1.1, 0.6) + ) + } else { + gg_plt <- gg_plt + + theme( + legend.position.inside = c(1, 0), + legend.justification = c(1.1, -0.4) + ) + } + } + + # add lines, censor marks, ci ribbon, and colors (unchanged) + gg_plt <- if (is.null(lty)) { + gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) + } else if (length(lty) == 1) { + gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) + } else { + gg_plt + + geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + + scale_linetype_manual(values = lty) + } + + if (censor_show) { + gg_plt <- gg_plt + geom_point( + data = data[data$n.censor != 0, ], + aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), + size = size, + na.rm = TRUE + ) + + scale_shape_manual(name = NULL, values = pch) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + } + + if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) + + if (!is.null(col)) { + gg_plt <- gg_plt + + scale_color_manual(values = col) + + scale_fill_manual(values = col) + } + if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme + + # --- Annotation Tables --- + + # 2. Median survival time annotation table + if (annot_surv_med) { + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) + bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + + theme( + axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = margin(0, 2, 0, 5) + ) + + coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + scale_x_continuous(expand = c(0.025, 0)) + + scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], + width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], + vjust = 0.5, hjust = 0.5 + ) + } + + # 3. Cox-PH annotation table + if (!is.null(coxph_tbl)) { + # coxph_tbl is pre-computed outside g_km, just plot it + bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] + + gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + + theme( + axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = margin(0, 2, 0, 5) + ) + + coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) + gg_coxph <- suppressMessages( + gg_coxph + + scale_x_continuous(expand = c(0.025, 0)) + + scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], + width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], + vjust = 0.5, hjust = 0.5 + ) + } + + # add at risk annotation table (unchanged logic) + if (annot_at_risk) { + annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) + annot_tbl <- if (is.null(fit_km$strata)) { + data.frame( + n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = armval + ) + } else { + strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") + levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] + data.frame( + n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = annot_tbl$strata + ) + } + + at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) + at_risk_tbl[is.na(at_risk_tbl)] <- 0 + rownames(at_risk_tbl) <- levels(annot_tbl$strata) + + gg_at_risk <- df2gg( + at_risk_tbl, font_size = font_size, col_labels = FALSE, hline = FALSE, + colwidths = rep(1, ncol(at_risk_tbl)) + ) + + labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + + theme_bw(base_size = font_size) + + theme( + plot.title = element_text(size = font_size, vjust = 3, face = "bold"), + panel.border = element_blank(), panel.grid = element_blank(), + axis.title.y = element_blank(), axis.ticks.y = element_blank(), + axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), + axis.text.x = element_text(size = font_size), axis.line.x = element_line() + ) + + coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) + gg_at_risk <- suppressMessages( + gg_at_risk + + scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + + scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) + ) + + if (!as_list) { + gg_plt <- cowplot::plot_grid( + gg_plt, gg_at_risk, align = "v", axis = "tblr", ncol = 1, + rel_heights = c(rel_height_plot, 1 - rel_height_plot) + ) + } + } + + if (as_list) { + list(plot = gg_plt, table = gg_at_risk) + } else { + gg_plt + } +} diff --git a/man/g_km.Rd b/man/g_km.Rd new file mode 100644 index 00000000..c1dab50c --- /dev/null +++ b/man/g_km.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{g_km} +\alias{g_km} +\title{g_km plot} +\usage{ +g_km( + fit_km, + variables, + coxph_tbl = NULL, + control_surv = control_surv_timepoint(), + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + annot_at_risk = TRUE, + annot_at_risk_title = TRUE, + annot_surv_med = TRUE, + control_annot_surv_med = control_surv_med_annot(), + control_annot_coxph = control_coxph_annot(), + legend_pos = NULL, + rel_height_plot = 0.75, + ggtheme = NULL, + as_list = FALSE +) +} +\description{ +g_km plot +} diff --git a/man/obj_label-set.Rd b/man/obj_label-set.Rd new file mode 100644 index 00000000..f566f8c6 --- /dev/null +++ b/man/obj_label-set.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/from_formatters.R +\name{obj_label<-} +\alias{obj_label<-} +\title{The new label} +\usage{ +obj_label(obj) <- value +} +\arguments{ +\item{value}{character(1). The new label} +} +\description{ +The new label +} From 6d8d156fa3fea18f32842066e10b4b57b55b3684 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Fri, 28 Nov 2025 23:39:09 +0800 Subject: [PATCH 02/25] update --- NAMESPACE | 29 ++++ R/assert.R | 101 ++++++++------ R/gkm.R | 262 +++++++++++++++++++++++++++--------- man/g_km.Rd | 73 +++++++++- man/h_km_fit.Rd | 29 ++++ man/h_tbl_coxph_pairwise.Rd | 37 +++++ 6 files changed, 421 insertions(+), 110 deletions(-) create mode 100644 man/h_km_fit.Rd create mode 100644 man/h_tbl_coxph_pairwise.Rd diff --git a/NAMESPACE b/NAMESPACE index 2484284d..ad4a231e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,8 @@ export(add_hierarchical_count_row) export(add_overall) export(filter_hierarchical) export(g_km) +export(h_km_fit) +export(h_tbl_coxph_pairwise) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) @@ -40,6 +42,10 @@ exportMethods("obj_label<-") exportMethods(obj_label) import(glue) import(rlang) +importFrom(broom,tidy) +importFrom(cowplot,draw_plot) +importFrom(cowplot,ggdraw) +importFrom(cowplot,plot_grid) importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,all_of) @@ -52,6 +58,29 @@ importFrom(dplyr,matches) importFrom(dplyr,num_range) importFrom(dplyr,starts_with) importFrom(dplyr,where) +importFrom(ggplot2,aes) +importFrom(ggplot2,coord_cartesian) +importFrom(ggplot2,element_blank) +importFrom(ggplot2,element_line) +importFrom(ggplot2,element_rect) +importFrom(ggplot2,element_text) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_ribbon) +importFrom(ggplot2,geom_step) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,guide_legend) +importFrom(ggplot2,guides) +importFrom(ggplot2,labs) +importFrom(ggplot2,scale_color_manual) +importFrom(ggplot2,scale_fill_manual) +importFrom(ggplot2,scale_linetype_manual) +importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,theme) +importFrom(ggplot2,theme_bw) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(stats,as.formula) +importFrom(survival,survfit) +importFrom(tidyr,pivot_wider) diff --git a/R/assert.R b/R/assert.R index 9f2c08e6..47c8b433 100644 --- a/R/assert.R +++ b/R/assert.R @@ -1,5 +1,4 @@ -assert_proportion_value <- function (x, include_boundaries = FALSE) -{ +assert_proportion_value <- function(x, include_boundaries = FALSE) { checkmate::assert_number(x, lower = 0, upper = 1) checkmate::assert_flag(include_boundaries) if (isFALSE(include_boundaries)) { @@ -8,81 +7,97 @@ assert_proportion_value <- function (x, include_boundaries = FALSE) } } -check_list_of_variables <- function (x) -{ +check_list_of_variables <- function(x) { x <- Filter(Negate(is.null), x) - res <- checkmate::check_list(x, names = "named", min.len = 1, - any.missing = FALSE, types = "character") + res <- checkmate::check_list(x, + names = "named", min.len = 1, + any.missing = FALSE, types = "character" + ) if (isTRUE(res)) { res <- checkmate::check_character(unlist(x), min.chars = 1) } res } -assert_list_of_variables <- function (x, .var.name = checkmate::vname(x), add = NULL) -{ - if (missing(x)) - stop(sprintf("argument \"%s\" is missing, with no default", - .var.name)) - res = check_list_of_variables(x) +assert_list_of_variables <- function(x, .var.name = checkmate::vname(x), add = NULL) { + if (missing(x)) { + stop(sprintf( + "argument \"%s\" is missing, with no default", + .var.name + )) + } + res <- check_list_of_variables(x) checkmate::makeAssertion(x, res, .var.name, add) } -check_df_with_variables <- function (df, variables, na_level = NULL) -{ +check_df_with_variables <- function(df, variables, na_level = NULL) { checkmate::assert_data_frame(df) assert_list_of_variables(variables) err_flag <- all(unlist(variables) %in% colnames(df)) checkmate::assert_flag(err_flag) if (isFALSE(err_flag)) { vars <- setdiff(unlist(variables), colnames(df)) - return(paste(deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", - paste(vars, collapse = ", "))) + return(paste( + deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", + paste(vars, collapse = ", ") + )) } if (!is.null(na_level)) { checkmate::assert_string(na_level) - res <- unlist(lapply(as.list(df)[unlist(variables)], - function(x) any(x == na_level))) + res <- unlist(lapply( + as.list(df)[unlist(variables)], + function(x) any(x == na_level) + )) if (any(res)) { - return(paste0(deparse(substitute(df)), " contains explicit na_level (", - na_level, ") in the following columns: ", paste0(unlist(variables)[res], - collapse = ", "))) + return(paste0( + deparse(substitute(df)), " contains explicit na_level (", + na_level, ") in the following columns: ", paste0(unlist(variables)[res], + collapse = ", " + ) + )) } } return(TRUE) } -assert_df_with_variables <- function (df, variables, na_level = NULL, .var.name = checkmate::vname(df), - add = NULL) -{ - if (missing(df)) - stop(sprintf("argument \"%s\" is missing, with no default", - .var.name)) - res = check_df_with_variables(df, variables, na_level) +assert_df_with_variables <- function(df, variables, na_level = NULL, .var.name = checkmate::vname(df), + add = NULL) { + if (missing(df)) { + stop(sprintf( + "argument \"%s\" is missing, with no default", + .var.name + )) + } + res <- check_df_with_variables(df, variables, na_level) checkmate::makeAssertion(df, res, .var.name, add) } -check_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL) -{ +check_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL) { checkmate::assert_int(min.levels, lower = 1) - res <- checkmate::check_factor(x, min.levels = min.levels, - null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, - n.levels = n.levels) + res <- checkmate::check_factor(x, + min.levels = min.levels, + null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, + n.levels = n.levels + ) if (isTRUE(res)) { res <- checkmate::check_character(levels(x), min.chars = 1) } return(res) } -assert_valid_factor <- function (x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), - add = NULL) -{ - if (missing(x)) - stop(sprintf("argument \"%s\" is missing, with no default", - .var.name)) - res = check_valid_factor(x, min.levels, max.levels, null.ok, - any.missing, n.levels, len) +assert_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, + any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), + add = NULL) { + if (missing(x)) { + stop(sprintf( + "argument \"%s\" is missing, with no default", + .var.name + )) + } + res <- check_valid_factor( + x, min.levels, max.levels, null.ok, + any.missing, n.levels, len + ) checkmate::makeAssertion(x, res, .var.name, add) } diff --git a/R/gkm.R b/R/gkm.R index 97d85993..e79f3a9d 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,13 +1,11 @@ -control_surv_timepoint <- function (conf_level = 0.95, conf_type = c("plain", "log", "log-log")) -{ +control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { conf_type <- match.arg(conf_type) assert_proportion_value(conf_level) list(conf_level = conf_level, conf_type = conf_type) } -control_coxph <- function (pval_method = c("log-rank", "wald", "likelihood"), - ties = c("efron", "breslow", "exact"), conf_level = 0.95) -{ +control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), + ties = c("efron", "breslow", "exact"), conf_level = 0.95) { pval_method <- match.arg(pval_method) ties <- match.arg(ties) assert_proportion_value(conf_level) @@ -35,18 +33,20 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T ## 3. Helper Functions (Formatting, Data Preparation, Plotting Utilities) ## ---------------------------------------------------------------------------- -f_conf_level <- function (conf_level) -{ +f_conf_level <- function(conf_level) { assert_proportion_value(conf_level) paste0(conf_level * 100, "% CI") } -df2gg <- function (df, colwidths = NULL, font_size = 10, col_labels = TRUE, - col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) -{ - df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) - "NA" - else as.character(x))) +df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, + col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { + df <- as.data.frame(apply(df, 1:2, function(x) { + if (is.na(x)) { + "NA" + } else { + as.character(x) + } + })) if (col_labels) { df <- as.matrix(df) df <- rbind(colnames(df), df) @@ -55,25 +55,38 @@ df2gg <- function (df, colwidths = NULL, font_size = 10, col_labels = TRUE, colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE)) } tot_width <- sum(colwidths) - res <- ggplot2::ggplot(data = df) + theme_void() + scale_x_continuous(limits = c(0, - tot_width)) + scale_y_continuous(limits = c(1, nrow(df))) - if (!is.null(bg_fill)) + res <- ggplot2::ggplot(data = df) + + theme_void() + + scale_x_continuous(limits = c( + 0, + tot_width + )) + + scale_y_continuous(limits = c(1, nrow(df))) + if (!is.null(bg_fill)) { res <- res + theme(plot.background = element_rect(fill = bg_fill)) + } if (hline) { - res <- res + annotate("segment", x = 0 + 0.2 * colwidths[2], - xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - - 0.5, yend = nrow(df) - 0.5) + res <- res + annotate("segment", + x = 0 + 0.2 * colwidths[2], + xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - + 0.5, yend = nrow(df) - 0.5 + ) } for (i in seq_len(ncol(df))) { - line_pos <- c(if (i == 1) 0 else sum(colwidths[1:(i - - 1)]), sum(colwidths[1:i])) - res <- res + annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), - label = df[, i], size = font_size/.pt, fontface = if (col_labels) { - c(col_lab_fontface, rep("plain", nrow(df) - 1)) - } - else { - rep("plain", nrow(df)) - }) + line_pos <- c(if (i == 1) { + 0 + } else { + sum(colwidths[1:(i - + 1)]) + }, sum(colwidths[1:i])) + res <- res + annotate("text", + x = mean(line_pos), y = rev(seq_len(nrow(df))), + label = df[, i], size = font_size / .pt, fontface = if (col_labels) { + c(col_lab_fontface, rep("plain", nrow(df) - 1)) + } else { + rep("plain", nrow(df)) + } + ) } res } @@ -125,12 +138,29 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { ) } +#' @title Pairwise Cox Proportional Hazards Model Summary Table +#' +#' @description This function computes and formats the results of a pairwise Cox Proportional +#' Hazards (Cox-PH) regression analysis between different treatment arms. +#' +#' @param df A data frame containing the survival data. +#' @param variables A named list specifying the column names for time-to-event (\code{tte}), +#' treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}. +#' @param ref_group_coxph An optional string specifying the reference group for the Cox-PH model. +#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. +#' @param control_coxph_pw A list of control parameters for the Cox-PH model, typically +#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. +#' @param annot_coxph_ref_lbls A logical flag indicating whether to append "vs. ref group" +#' to the row names in the resulting table. +#' +#' @return A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), +#' its confidence interval, and the p-value. +#' @export h_tbl_coxph_pairwise <- function(df, variables, ref_group_coxph = NULL, control_coxph_pw = control_coxph(), annot_coxph_ref_lbls = FALSE) { - assert_df_with_variables(df, variables) checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) checkmate::assert_flag(annot_coxph_ref_lbls) @@ -217,9 +247,8 @@ h_data_plot <- function(fit_km, ## ---------------------------------------------------------------------------- s_coxph_pairwise <- - function (df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, - control = control_coxph(), ...) - { + function(df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, + control = control_coxph(), ...) { checkmate::assert_string(.var) checkmate::assert_numeric(df[[.var]]) checkmate::assert_logical(df[[is_event]]) @@ -228,47 +257,102 @@ s_coxph_pairwise <- ties <- control$ties conf_level <- control$conf_level if (.in_ref_col) { - return(list(pvalue = with_label(numeric(), - paste0("p-value (", pval_method, ")")), hr = with_label(numeric(), - "Hazard Ratio"), hr_ci = with_label(numeric(), - f_conf_level(conf_level)), hr_ci_3d = with_label(numeric(), - paste0("Hazard Ratio (", f_conf_level(conf_level), - ")")), n_tot = with_label(numeric(), - "Total n"), n_tot_events = with_label(numeric(), - "Total events"))) + return(list(pvalue = with_label( + numeric(), + paste0("p-value (", pval_method, ")") + ), hr = with_label( + numeric(), + "Hazard Ratio" + ), hr_ci = with_label( + numeric(), + f_conf_level(conf_level) + ), hr_ci_3d = with_label( + numeric(), + paste0( + "Hazard Ratio (", f_conf_level(conf_level), + ")" + ) + ), n_tot = with_label( + numeric(), + "Total n" + ), n_tot_events = with_label( + numeric(), + "Total events" + ))) } data <- rbind(.ref_group, df) group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), - levels = c("ref", "x")) - df_cox <- data.frame(tte = data[[.var]], is_event = data[[is_event]], - arm = group) + levels = c("ref", "x") + ) + df_cox <- data.frame( + tte = data[[.var]], is_event = data[[is_event]], + arm = group + ) if (is.null(strata)) { formula_cox <- survival::Surv(tte, is_event) ~ arm - } - else { - formula_cox <- stats::as.formula(paste0("survival::Surv(tte, is_event) ~ arm + strata(", - paste(strata, collapse = ","), ")")) + } else { + formula_cox <- stats::as.formula(paste0( + "survival::Surv(tte, is_event) ~ arm + strata(", + paste(strata, collapse = ","), ")" + )) df_cox <- cbind(df_cox, data[strata]) } - cox_fit <- survival::coxph(formula = formula_cox, data = df_cox, - ties = ties) + cox_fit <- survival::coxph( + formula = formula_cox, data = df_cox, + ties = ties + ) sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) - pval <- switch(pval_method, wald = sum_cox$waldtest["pvalue"], - `log-rank` = log_rank_pvalue, likelihood = sum_cox$logtest["pvalue"]) - list(pvalue = with_label(unname(pval), paste0("p-value (", - pval_method, ")")), hr = with_label(sum_cox$conf.int[1, - 1], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[1, - 3:4]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[1, - 1], unname(sum_cox$conf.int[1, 3:4])), paste0("Hazard Ratio (", - f_conf_level(conf_level), ")")), n_tot = with_label(sum_cox$n, - "Total n"), n_tot_events = with_label(sum_cox$nevent, - "Total events")) + 1) + pval <- switch(pval_method, + wald = sum_cox$waldtest["pvalue"], + `log-rank` = log_rank_pvalue, + likelihood = sum_cox$logtest["pvalue"] + ) + list(pvalue = with_label(unname(pval), paste0( + "p-value (", + pval_method, ")" + )), hr = with_label(sum_cox$conf.int[ + 1, + 1 + ], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[ + 1, + 3:4 + ]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[ + 1, + 1 + ], unname(sum_cox$conf.int[1, 3:4])), paste0( + "Hazard Ratio (", + f_conf_level(conf_level), ")" + )), n_tot = with_label( + sum_cox$n, + "Total n" + ), n_tot_events = with_label( + sum_cox$nevent, + "Total events" + )) } +#' @title Kaplan-Meier Survival Curve Fitting +#' +#' @description This helper function fits a Kaplan-Meier survival curve model +#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. +#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. +#' +#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), +#' and treatment arm (\code{arm}) variables. +#' @param variables A named list specifying the column names for time-to-event (\code{tte}), +#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. +#' For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}. +#' @param control_surv A list of control parameters for the \code{survival::survfit} function, +#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level +#' and confidence interval type. +#' +#' @return An object of class \code{survfit} from the \code{survival} package, containing +#' the fitted Kaplan-Meier curves. +#' @export h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { tte <- variables$tte is_event <- variables$is_event @@ -287,7 +371,54 @@ h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { return(fit_km) } -#' g_km plot +#' @title Generate a Kaplan-Meier Plot with Annotations +#' +#' @description This function creates a comprehensive ggplot2 object for a Kaplan-Meier +#' survival curve, optionally including annotations for median survival and Cox-PH results, +#' and a 'Numbers at Risk' table below the main plot. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, typically +#' generated by \code{\link{h_km_fit}}. +#' @param variables A named list specifying the survival and grouping variables (needed +#' for accessing the column names, even if the fit is provided). +#' @param coxph_tbl An optional data frame containing pre-calculated Cox-PH results, +#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added. +#' @param control_surv A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}. +#' @param col A character vector of colors for the survival curves. Length should match number of arms. +#' @param lty A vector of line types for the survival curves, or \code{NULL} for default. +#' @param lwd Numeric value specifying line width for the survival curves. +#' @param censor_show Logical, whether to display censoring marks on the plot. +#' @param pch Plotting character for censoring marks. +#' @param size Size of the censoring marks. +#' @param max_time Numeric, the maximum time point to display on the x-axis. +#' @param xticks Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto. +#' @param xlab Character string for the x-axis label. +#' @param yval Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability. +#' @param ylab Character string for the y-axis label. +#' @param ylim Numeric vector of length 2 for y-axis limits. +#' @param title Character string for the plot title. +#' @param footnotes Character string for plot footnotes/caption. +#' @param font_size Numeric, base font size for the plot theme. +#' @param ci_ribbon Logical, whether to display confidence intervals as a ribbon (area). +#' @param annot_at_risk Logical, whether to include the 'Numbers at Risk' table below the plot. +#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:" in the table. +#' @param annot_surv_med Logical, whether to include the median survival time annotation table. +#' @param control_annot_surv_med A list of control parameters for the median survival annotation box, +#' typically generated by \code{\link{control_surv_med_annot}}. +#' @param control_annot_coxph A list of control parameters for the Cox-PH annotation box, +#' typically generated by \code{\link{control_coxph_annot}}. +#' @param legend_pos Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement. +#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). +#' @param ggtheme An optional \code{ggplot2} theme to apply. +#' @param as_list Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object. +#' +#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +#' @importFrom ggplot2 ggplot aes theme_bw scale_y_continuous labs theme element_text element_blank element_rect element_line geom_step geom_point scale_shape_manual guides guide_legend geom_ribbon scale_color_manual scale_fill_manual scale_linetype_manual coord_cartesian +#' @importFrom cowplot ggdraw draw_plot plot_grid +#' @importFrom tidyr pivot_wider +#' @importFrom survival survfit +#' @importFrom broom tidy +#' @importFrom stats as.formula #' @export g_km <- function(fit_km, variables, @@ -318,7 +449,6 @@ g_km <- function(fit_km, rel_height_plot = 0.75, ggtheme = NULL, as_list = FALSE) { - # --- Data Extraction and Assertions --- checkmate::assert_class(fit_km, "survfit") checkmate::assert_list(variables) @@ -407,7 +537,7 @@ g_km <- function(fit_km, y_rng <- ylim[2] - ylim[1] if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && - all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint + all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint gg_plt <- gg_plt + theme( legend.position.inside = c(1, 0.5), @@ -525,7 +655,8 @@ g_km <- function(fit_km, rownames(at_risk_tbl) <- levels(annot_tbl$strata) gg_at_risk <- df2gg( - at_risk_tbl, font_size = font_size, col_labels = FALSE, hline = FALSE, + at_risk_tbl, + font_size = font_size, col_labels = FALSE, hline = FALSE, colwidths = rep(1, ncol(at_risk_tbl)) ) + labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + @@ -546,7 +677,8 @@ g_km <- function(fit_km, if (!as_list) { gg_plt <- cowplot::plot_grid( - gg_plt, gg_at_risk, align = "v", axis = "tblr", ncol = 1, + gg_plt, gg_at_risk, + align = "v", axis = "tblr", ncol = 1, rel_heights = c(rel_height_plot, 1 - rel_height_plot) ) } diff --git a/man/g_km.Rd b/man/g_km.Rd index c1dab50c..7fd3be45 100644 --- a/man/g_km.Rd +++ b/man/g_km.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/gkm.R \name{g_km} \alias{g_km} -\title{g_km plot} +\title{Generate a Kaplan-Meier Plot with Annotations} \usage{ g_km( fit_km, @@ -36,6 +36,75 @@ g_km( as_list = FALSE ) } +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, typically +generated by \code{\link{h_km_fit}}.} + +\item{variables}{A named list specifying the survival and grouping variables (needed +for accessing the column names, even if the fit is provided).} + +\item{coxph_tbl}{An optional data frame containing pre-calculated Cox-PH results, +typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added.} + +\item{control_surv}{A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}.} + +\item{col}{A character vector of colors for the survival curves. Length should match number of arms.} + +\item{lty}{A vector of line types for the survival curves, or \code{NULL} for default.} + +\item{lwd}{Numeric value specifying line width for the survival curves.} + +\item{censor_show}{Logical, whether to display censoring marks on the plot.} + +\item{pch}{Plotting character for censoring marks.} + +\item{size}{Size of the censoring marks.} + +\item{max_time}{Numeric, the maximum time point to display on the x-axis.} + +\item{xticks}{Numeric vector of x-axis tick positions, or a single number for the interval, or \code{NULL} for auto.} + +\item{xlab}{Character string for the x-axis label.} + +\item{yval}{Character string, either \code{"Survival"} or \code{"Failure"} to plot Survival or Failure probability.} + +\item{ylab}{Character string for the y-axis label.} + +\item{ylim}{Numeric vector of length 2 for y-axis limits.} + +\item{title}{Character string for the plot title.} + +\item{footnotes}{Character string for plot footnotes/caption.} + +\item{font_size}{Numeric, base font size for the plot theme.} + +\item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} + +\item{annot_at_risk}{Logical, whether to include the 'Numbers at Risk' table below the plot.} + +\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:" in the table.} + +\item{annot_surv_med}{Logical, whether to include the median survival time annotation table.} + +\item{control_annot_surv_med}{A list of control parameters for the median survival annotation box, +typically generated by \code{\link{control_surv_med_annot}}.} + +\item{control_annot_coxph}{A list of control parameters for the Cox-PH annotation box, +typically generated by \code{\link{control_coxph_annot}}.} + +\item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} + +\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} + +\item{ggtheme}{An optional \code{ggplot2} theme to apply.} + +\item{as_list}{Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object.} +} +\value{ +A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +} \description{ -g_km plot +This function creates a comprehensive ggplot2 object for a Kaplan-Meier +survival curve, optionally including annotations for median survival and Cox-PH results, +and a 'Numbers at Risk' table below the main plot. } diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd new file mode 100644 index 00000000..bbb66746 --- /dev/null +++ b/man/h_km_fit.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_km_fit} +\alias{h_km_fit} +\title{Kaplan-Meier Survival Curve Fitting} +\usage{ +h_km_fit(df, variables, control_surv = control_surv_timepoint()) +} +\arguments{ +\item{df}{A data frame containing time-to-event (tte), event status (\code{is_event}), +and treatment arm (\code{arm}) variables.} + +\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), +event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. +For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}.} + +\item{control_surv}{A list of control parameters for the \code{survival::survfit} function, +typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level +and confidence interval type.} +} +\value{ +An object of class \code{survfit} from the \code{survival} package, containing +the fitted Kaplan-Meier curves. +} +\description{ +This helper function fits a Kaplan-Meier survival curve model +using the formula \code{survival::Surv(tte, is_event) ~ arm}. +It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. +} diff --git a/man/h_tbl_coxph_pairwise.Rd b/man/h_tbl_coxph_pairwise.Rd new file mode 100644 index 00000000..ee4add72 --- /dev/null +++ b/man/h_tbl_coxph_pairwise.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_tbl_coxph_pairwise} +\alias{h_tbl_coxph_pairwise} +\title{Pairwise Cox Proportional Hazards Model Summary Table} +\usage{ +h_tbl_coxph_pairwise( + df, + variables, + ref_group_coxph = NULL, + control_coxph_pw = control_coxph(), + annot_coxph_ref_lbls = FALSE +) +} +\arguments{ +\item{df}{A data frame containing the survival data.} + +\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), +treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}.} + +\item{ref_group_coxph}{An optional string specifying the reference group for the Cox-PH model. +If \code{NULL}, the first factor level of the arm variable is used as the reference group.} + +\item{control_coxph_pw}{A list of control parameters for the Cox-PH model, typically +generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level.} + +\item{annot_coxph_ref_lbls}{A logical flag indicating whether to append "vs. ref group" +to the row names in the resulting table.} +} +\value{ +A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), +its confidence interval, and the p-value. +} +\description{ +This function computes and formats the results of a pairwise Cox Proportional +Hazards (Cox-PH) regression analysis between different treatment arms. +} From 41b74977f10b298f4ccaa3dd1707d65e515c3b79 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sat, 29 Nov 2025 08:40:40 +0800 Subject: [PATCH 03/25] wip --- DESCRIPTION | 2 ++ R/assert.R | 7 +++++++ R/from_formatters.R | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2b0e7dd9..c976c2f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,10 +26,12 @@ Imports: broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), + cowplot (>= 1.2.0), checkmate (>= 2.3.2), cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), + ggplot2 (>= 4.0.1), glue (>= 1.8.0), gt (>= 0.11.1), lifecycle, diff --git a/R/assert.R b/R/assert.R index 47c8b433..8165d3ba 100644 --- a/R/assert.R +++ b/R/assert.R @@ -1,3 +1,7 @@ +# styler: off +# nocov start + + assert_proportion_value <- function(x, include_boundaries = FALSE) { checkmate::assert_number(x, lower = 0, upper = 1) checkmate::assert_flag(include_boundaries) @@ -101,3 +105,6 @@ assert_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = ) checkmate::makeAssertion(x, res, .var.name, add) } + +# nocov end +# styler: on diff --git a/R/from_formatters.R b/R/from_formatters.R index 218adba7..0bab0ab7 100644 --- a/R/from_formatters.R +++ b/R/from_formatters.R @@ -1,6 +1,6 @@ # ## Changelog -# nocov start # styler: off +# nocov start setGeneric("obj_label", function(obj) standardGeneric("obj_label")) From 9b8807a4e4a996976347bf859248c4fa0074499b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sat, 29 Nov 2025 08:42:00 +0800 Subject: [PATCH 04/25] wordlist --- inst/WORDLIST | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index b95a577a..7f7292b8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -15,11 +15,13 @@ cardx de flextable funder +ggplot gtsummary pharma pre rlang's survfit tbl +tte tidyselect unstratified From 6b51033152372648b6f1c16ee01b80bff4abe5aa Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 08:27:38 +0800 Subject: [PATCH 05/25] adding code --- NAMESPACE | 14 +- R/gkm.R | 671 +++++++++++++++++++++------------- man/annot_at_risk.Rd | 34 ++ man/annot_cox_ph.Rd | 31 ++ man/annot_surv_med.Rd | 30 ++ man/control_coxph.Rd | 28 ++ man/control_coxph_annot.Rd | 35 ++ man/control_surv_med_annot.Rd | 26 ++ man/control_surv_timepoint.Rd | 24 ++ man/df2gg.Rd | 37 ++ man/f_conf_level.Rd | 17 + man/g_km.Rd | 51 +-- man/h_data_plot.Rd | 23 ++ man/h_km_fit.Rd | 3 +- man/h_tbl_median_surv.Rd | 20 + man/h_xticks.Rd | 21 ++ man/s_coxph_pairwise.Rd | 43 +++ 17 files changed, 801 insertions(+), 307 deletions(-) create mode 100644 man/annot_at_risk.Rd create mode 100644 man/annot_cox_ph.Rd create mode 100644 man/annot_surv_med.Rd create mode 100644 man/control_coxph.Rd create mode 100644 man/control_coxph_annot.Rd create mode 100644 man/control_surv_med_annot.Rd create mode 100644 man/control_surv_timepoint.Rd create mode 100644 man/df2gg.Rd create mode 100644 man/f_conf_level.Rd create mode 100644 man/h_data_plot.Rd create mode 100644 man/h_tbl_median_surv.Rd create mode 100644 man/h_xticks.Rd create mode 100644 man/s_coxph_pairwise.Rd diff --git a/NAMESPACE b/NAMESPACE index ad4a231e..024efc1a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,9 +11,12 @@ export("obj_label<-") export(add_blank_rows) export(add_hierarchical_count_row) export(add_overall) +export(annot_at_risk) +export(annot_cox_ph) +export(annot_surv_med) export(filter_hierarchical) export(g_km) -export(h_km_fit) +export(h_data_plot) export(h_tbl_coxph_pairwise) export(label_roche_number) export(label_roche_percent) @@ -58,7 +61,9 @@ importFrom(dplyr,matches) importFrom(dplyr,num_range) importFrom(dplyr,starts_with) importFrom(dplyr,where) +importFrom(ggplot2,.pt) importFrom(ggplot2,aes) +importFrom(ggplot2,annotate) importFrom(ggplot2,coord_cartesian) importFrom(ggplot2,element_blank) importFrom(ggplot2,element_line) @@ -71,16 +76,23 @@ importFrom(ggplot2,ggplot) importFrom(ggplot2,guide_legend) importFrom(ggplot2,guides) importFrom(ggplot2,labs) +importFrom(ggplot2,margin) importFrom(ggplot2,scale_color_manual) importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,scale_linetype_manual) importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) +importFrom(ggplot2,theme_void) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(rlang,.data) importFrom(stats,as.formula) +importFrom(survival,Surv) +importFrom(survival,coxph) +importFrom(survival,survdiff) importFrom(survival,survfit) importFrom(tidyr,pivot_wider) diff --git a/R/gkm.R b/R/gkm.R index e79f3a9d..aab7d8c1 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,17 +1,35 @@ -control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { - conf_type <- match.arg(conf_type) - assert_proportion_value(conf_level) - list(conf_level = conf_level, conf_type = conf_type) -} - +#' @title Control parameters for Cox Proportional Hazards model +#' +#' @description Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) +#' analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. +#' +#' @param pval_method A character string specifying the method for calculating the p-value. +#' Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}. +#' @param ties A character string specifying the method for handling tied failure times. +#' Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}. +#' @param conf_level A numeric value between 0 and 1, specifying the confidence level. +#' +#' @return A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), ties = c("efron", "breslow", "exact"), conf_level = 0.95) { pval_method <- match.arg(pval_method) ties <- match.arg(ties) - assert_proportion_value(conf_level) + assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere list(pval_method = pval_method, ties = ties, conf_level = conf_level) } +#' @title Control parameters for Median Survival Annotation Box +#' +#' @description Creates a list of control parameters for positioning and styling the +#' median survival annotation box on a plot. +#' +#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). +#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). +#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). +#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). +#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. +#' +#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { assert_proportion_value(x) assert_proportion_value(y) @@ -21,6 +39,19 @@ control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = list(x = x, y = y, w = w, h = h, fill = fill) } +#' @title Control parameters for Cox-PH Annotation Box +#' +#' @description Creates a list of control parameters for positioning and styling the +#' Cox Proportional Hazards annotation box on a plot. +#' +#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). +#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). +#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). +#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). +#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. +#' @param ref_lbls A logical flag indicating whether to append "vs. ref group" to row names. +#' +#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { checkmate::assert_logical(ref_lbls, any.missing = FALSE) @@ -29,17 +60,34 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } -## ---------------------------------------------------------------------------- -## 3. Helper Functions (Formatting, Data Preparation, Plotting Utilities) -## ---------------------------------------------------------------------------- +## Helper Functions (Formatting, Data Preparation, Plotting Utilities) +#' @title Format Confidence Level String +#' @description Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95% CI"). +#' @param conf_level A numeric confidence level (proportion, 0 to 1). +#' @return A character string. f_conf_level <- function(conf_level) { - assert_proportion_value(conf_level) + assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere paste0(conf_level * 100, "% CI") } +#' @title Convert Data Frame to ggplot2 Table Graphic +#' +#' @description Creates a \code{ggplot2} object that renders a data frame as a table graphic. +#' +#' @param df The data frame to render. +#' @param colwidths Numeric vector of relative column widths. If \code{NULL}, determined by max character length. +#' @param font_size Numeric base font size. +#' @param col_labels Logical, whether to display column labels (header). +#' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). +#' @param hline Logical, whether to draw a horizontal line below the column labels. +#' @param bg_fill Optional color string for the plot background. +#' +#' @return A \code{ggplot2} object representing the table. +#' @importFrom ggplot2 ggplot theme_void scale_x_continuous scale_y_continuous theme element_rect annotate element_text .pt df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, col_lab_fontface = "bold", hline = TRUE, bg_fill = NULL) { + # ... (function body remains the same) df <- as.data.frame(apply(df, 1:2, function(x) { if (is.na(x)) { "NA" @@ -56,17 +104,17 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, } tot_width <- sum(colwidths) res <- ggplot2::ggplot(data = df) + - theme_void() + - scale_x_continuous(limits = c( + ggplot2::theme_void() + + ggplot2::scale_x_continuous(limits = c( 0, tot_width )) + - scale_y_continuous(limits = c(1, nrow(df))) + ggplot2::scale_y_continuous(limits = c(1, nrow(df))) if (!is.null(bg_fill)) { - res <- res + theme(plot.background = element_rect(fill = bg_fill)) + res <- res + ggplot2::theme(plot.background = ggplot2::element_rect(fill = bg_fill)) } if (hline) { - res <- res + annotate("segment", + res <- res + ggplot2::annotate("segment", x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1), y = nrow(df) - 0.5, yend = nrow(df) - 0.5 @@ -79,7 +127,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, sum(colwidths[1:(i - 1)]) }, sum(colwidths[1:i])) - res <- res + annotate("text", + res <- res + ggplot2::annotate("text", x = mean(line_pos), y = rev(seq_len(nrow(df))), label = df[, i], size = font_size / .pt, fontface = if (col_labels) { c(col_lab_fontface, rep("plain", nrow(df) - 1)) @@ -91,7 +139,17 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, res } +#' @title Calculate X-axis Ticks +#' +#' @description Determines the positions for x-axis ticks based on the data and user input. +#' +#' @param data A data frame containing a \code{time} column. +#' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. +#' @param max_time Optional numeric value specifying the maximum time to consider for tick range. +#' +#' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { + # ... (function body remains the same) if (is.null(xticks)) { if (is.null(max_time)) { labeling::extended(range(data$time)[1], range(data$time)[2], m = 5) @@ -117,7 +175,17 @@ h_xticks <- function(data, xticks = NULL, max_time = NULL) { } } +#' @title Median Survival Summary Table +#' +#' @description Extracts and formats the median survival time and its confidence interval +#' from a fitted Kaplan-Meier object. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. +#' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). +#' +#' @return A data frame with columns "N", "Median", and the confidence interval label. h_tbl_median_surv <- function(fit_km, armval = "All") { + # ... (function body remains the same) y <- if (is.null(fit_km$strata)) { as.data.frame(t(summary(fit_km)$table), row.names = armval) } else { @@ -147,21 +215,22 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @param variables A named list specifying the column names for time-to-event (\code{tte}), #' treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}. #' @param ref_group_coxph An optional string specifying the reference group for the Cox-PH model. -#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. +#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. #' @param control_coxph_pw A list of control parameters for the Cox-PH model, typically -#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. +#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. #' @param annot_coxph_ref_lbls A logical flag indicating whether to append "vs. ref group" -#' to the row names in the resulting table. +#' to the row names in the resulting table. #' #' @return A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), -#' its confidence interval, and the p-value. +#' its confidence interval, and the p-value. #' @export h_tbl_coxph_pairwise <- function(df, variables, ref_group_coxph = NULL, control_coxph_pw = control_coxph(), annot_coxph_ref_lbls = FALSE) { - assert_df_with_variables(df, variables) + # ... (function body remains the same) + assert_df_with_variables(df, variables) # Assuming assert_df_with_variables is defined elsewhere checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) checkmate::assert_flag(annot_coxph_ref_lbls) @@ -190,6 +259,7 @@ h_tbl_coxph_pairwise <- function(df, pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), stringsAsFactors = FALSE ) + # Assuming obj_label is defined elsewhere and hr_ci is the label for the CI colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) # nolint row.names(res_df) <- comp res_df @@ -199,9 +269,23 @@ h_tbl_coxph_pairwise <- function(df, do.call(rbind, results) } +#' @title Prepare Kaplan-Meier Data for Plotting +#' +#' @description Takes a fitted \code{survfit} object and processes it into a data frame +#' suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending +#' the curve to time zero. +#' +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. +#' @param armval Character string for the strata level if \code{fit_km} has no strata (e.g., "All"). +#' @param max_time Numeric, the maximum time point to include in the data, or \code{NULL} for no limit. +#' +#' @return A data frame containing the survival curve steps, confidence intervals, and censoring info. +#' @importFrom broom tidy +#' @export h_data_plot <- function(fit_km, armval = "All", max_time = NULL) { + # ... (function body remains the same) y <- broom::tidy(fit_km) if (!is.null(fit_km$strata)) { @@ -242,13 +326,31 @@ h_data_plot <- function(fit_km, } -## ---------------------------------------------------------------------------- -## 4. Core Statistical Function -## ---------------------------------------------------------------------------- +## Core Statistical Function +#' @title Pairwise Cox Proportional Hazards Model Calculation +#' +#' @description Performs a Cox Proportional Hazards model calculation comparing two groups +#' (a reference group and a comparison group). This is an internal function used by +#' \code{\link{h_tbl_coxph_pairwise}}. +#' +#' @param df Data frame for the comparison group. +#' @param .ref_group Data frame for the reference group. +#' @param .in_ref_col Logical, if \code{TRUE} returns empty results (for internal table building). +#' @param .var Character string for the time-to-event variable name. +#' @param is_event Character string for the event status variable name. +#' @param strata Optional character vector of stratification variable names. +#' @param control A list of control parameters from \code{\link{control_coxph}}. +#' @param ... Additional arguments (not used). +#' +#' @return A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), +#' and total counts. +#' @importFrom survival Surv coxph survdiff +#' @importFrom stats as.formula s_coxph_pairwise <- function(df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, control = control_coxph(), ...) { + # ... (function body remains the same) checkmate::assert_string(.var) checkmate::assert_numeric(df[[.var]]) checkmate::assert_logical(df[[is_event]]) @@ -257,6 +359,7 @@ s_coxph_pairwise <- ties <- control$ties conf_level <- control$conf_level if (.in_ref_col) { + # ... (returns empty list for reference column) return(list(pvalue = with_label( numeric(), paste0("p-value (", pval_method, ")") @@ -292,7 +395,7 @@ s_coxph_pairwise <- formula_cox <- survival::Surv(tte, is_event) ~ arm } else { formula_cox <- stats::as.formula(paste0( - "survival::Surv(tte, is_event) ~ arm + strata(", + "survival::Surv(tte, is_event) ~ arm + survival::strata(", paste(strata, collapse = ","), ")" )) df_cox <- cbind(df_cox, data[strata]) @@ -303,13 +406,14 @@ s_coxph_pairwise <- ) sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) - log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1) pval <- switch(pval_method, wald = sum_cox$waldtest["pvalue"], `log-rank` = log_rank_pvalue, likelihood = sum_cox$logtest["pvalue"] ) + # Assuming with_label is defined elsewhere list(pvalue = with_label(unname(pval), paste0( "p-value (", pval_method, ")" @@ -335,55 +439,15 @@ s_coxph_pairwise <- } -#' @title Kaplan-Meier Survival Curve Fitting -#' -#' @description This helper function fits a Kaplan-Meier survival curve model -#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. -#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. -#' -#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), -#' and treatment arm (\code{arm}) variables. -#' @param variables A named list specifying the column names for time-to-event (\code{tte}), -#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. -#' For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}. -#' @param control_surv A list of control parameters for the \code{survival::survfit} function, -#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level -#' and confidence interval type. -#' -#' @return An object of class \code{survfit} from the \code{survival} package, containing -#' the fitted Kaplan-Meier curves. -#' @export -h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { - tte <- variables$tte - is_event <- variables$is_event - arm <- variables$arm - - assert_valid_factor(df[[arm]]) - assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) - - formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) - fit_km <- survival::survfit( - formula = formula, - data = df, - conf.int = control_surv$conf_level, - conf.type = control_surv$conf_type - ) - return(fit_km) -} +## Core Plotting and Annotation Functions -#' @title Generate a Kaplan-Meier Plot with Annotations +#' @title Generate a Kaplan-Meier Plot #' -#' @description This function creates a comprehensive ggplot2 object for a Kaplan-Meier -#' survival curve, optionally including annotations for median survival and Cox-PH results, -#' and a 'Numbers at Risk' table below the main plot. +#' @description This function creates a comprehensive \code{ggplot2} object for a Kaplan-Meier +#' survival curve, with support for various customizations like censoring marks, CIs, and axis control. #' -#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, typically -#' generated by \code{\link{h_km_fit}}. -#' @param variables A named list specifying the survival and grouping variables (needed -#' for accessing the column names, even if the fit is provided). -#' @param coxph_tbl An optional data frame containing pre-calculated Cox-PH results, -#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added. -#' @param control_surv A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}. +#' @param surv_plot_data A data frame containing the pre-processed survival data, ready for plotting. +#' This data should be equivalent to the output of \code{\link{h_data_plot}}. #' @param col A character vector of colors for the survival curves. Length should match number of arms. #' @param lty A vector of line types for the survival curves, or \code{NULL} for default. #' @param lwd Numeric value specifying line width for the survival curves. @@ -400,83 +464,50 @@ h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { #' @param footnotes Character string for plot footnotes/caption. #' @param font_size Numeric, base font size for the plot theme. #' @param ci_ribbon Logical, whether to display confidence intervals as a ribbon (area). -#' @param annot_at_risk Logical, whether to include the 'Numbers at Risk' table below the plot. -#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:" in the table. -#' @param annot_surv_med Logical, whether to include the median survival time annotation table. -#' @param control_annot_surv_med A list of control parameters for the median survival annotation box, -#' typically generated by \code{\link{control_surv_med_annot}}. -#' @param control_annot_coxph A list of control parameters for the Cox-PH annotation box, -#' typically generated by \code{\link{control_coxph_annot}}. #' @param legend_pos Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement. -#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). #' @param ggtheme An optional \code{ggplot2} theme to apply. -#' @param as_list Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object. #' -#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +#' @return A \code{ggplot2} object of the KM plot. #' @importFrom ggplot2 ggplot aes theme_bw scale_y_continuous labs theme element_text element_blank element_rect element_line geom_step geom_point scale_shape_manual guides guide_legend geom_ribbon scale_color_manual scale_fill_manual scale_linetype_manual coord_cartesian -#' @importFrom cowplot ggdraw draw_plot plot_grid -#' @importFrom tidyr pivot_wider -#' @importFrom survival survfit -#' @importFrom broom tidy -#' @importFrom stats as.formula +#' @importFrom rlang .data #' @export -g_km <- function(fit_km, - variables, - coxph_tbl = NULL, # New argument for pre-calculated Cox-PH table - control_surv = control_surv_timepoint(), - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - annot_at_risk = TRUE, - annot_at_risk_title = TRUE, - annot_surv_med = TRUE, - control_annot_surv_med = control_surv_med_annot(), - control_annot_coxph = control_coxph_annot(), - legend_pos = NULL, - rel_height_plot = 0.75, - ggtheme = NULL, - as_list = FALSE) { - # --- Data Extraction and Assertions --- - checkmate::assert_class(fit_km, "survfit") - checkmate::assert_list(variables) - checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables)) - - # 1. Extract arm values (strata names) from the fitted object - # h_data_plot is used here only to consistently get the unique strata levels - armval <- if (is.null(fit_km$strata)) "All" else levels(h_data_plot(fit_km, max_time = 1)$strata) +g_km <- function( + surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL) { + # ... (function body remains the same) + checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) + data <- surv_plot_data + + armval <- levels(data$strata) checkmate::assert_vector(col, len = length(armval), null.ok = TRUE) - # Check if a Cox-PH table was provided (replaces annot_coxph flag) - if (!is.null(coxph_tbl)) { - checkmate::assert_data_frame(coxph_tbl) - } - - # --- Data Processing --- yval <- match.arg(yval) - data <- h_data_plot(fit_km, armval = armval, max_time = max_time) + xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time) - # change estimates of survival to estimates of failure (1 - survival) if (yval == "Failure") { data[c("estimate", "conf.low", "conf.high", "censor")] <- list( 1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor ) } - # derive y-axis limits if (is.null(ylim)) { if (!is.null(max_time)) { y_lwr <- min(data[data$time < max_time, ][["estimate"]]) @@ -488,46 +519,43 @@ g_km <- function(fit_km, ylim <- c(y_lwr, y_upr) } - # --- ggplot Initialization and Aesthetics (Unchanged) --- gg_plt <- ggplot2::ggplot( data = data, - mapping = aes( + mapping = ggplot2::aes( x = .data[["time"]], y = .data[["estimate"]], ymin = .data[["conf.low"]], ymax = .data[["conf.high"]], color = .data[["strata"]], fill = .data[["strata"]] ) ) + - theme_bw(base_size = font_size) + - scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + - labs(title = title, x = xlab, y = ylab, caption = footnotes) + - theme( - axis.text = element_text(size = font_size), axis.title = element_text(size = font_size), - legend.title = element_blank(), legend.text = element_text(size = font_size), - legend.box.background = element_rect(fill = "white", linewidth = 0.5), - legend.background = element_blank(), legend.position = "inside", - legend.spacing.y = unit(-0.02, "npc"), panel.grid.major = element_blank(), - panel.grid.minor = element_blank() + ggplot2::theme_bw(base_size = font_size) + + ggplot2::scale_y_continuous(limits = ylim, expand = c(0.025, 0)) + + ggplot2::labs(title = title, x = xlab, y = ylab, caption = footnotes) + + ggplot2::theme( + axis.text = ggplot2::element_text(size = font_size), axis.title = ggplot2::element_text(size = font_size), + legend.title = ggplot2::element_blank(), legend.text = ggplot2::element_text(size = font_size), + legend.box.background = ggplot2::element_rect(fill = "white", linewidth = 0.5), + legend.background = ggplot2::element_blank(), legend.position = "inside", + legend.spacing.y = ggplot2::unit(-0.02, "npc"), panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank() ) - # derive x-axis limits if (!is.null(max_time) && !is.null(xticks)) { - gg_plt <- gg_plt + scale_x_continuous( + gg_plt <- gg_plt + ggplot2::scale_x_continuous( breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0) ) } else if (!is.null(xticks)) { if (max(data$time) <= max(xticks)) { - gg_plt <- gg_plt + scale_x_continuous( + gg_plt <- gg_plt + ggplot2::scale_x_continuous( breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0) ) } else { - gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) + gg_plt <- gg_plt + ggplot2::scale_x_continuous(breaks = xticks, expand = c(0.025, 0)) } } else if (!is.null(max_time)) { - gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) + gg_plt <- gg_plt + ggplot2::scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0)) } - # set legend position (unchanged logic) if (!is.null(legend_pos)) { - gg_plt <- gg_plt + theme(legend.position.inside = legend_pos) + gg_plt <- gg_plt + ggplot2::theme(legend.position.inside = legend_pos) } else { max_time2 <- sort( data$time, @@ -539,154 +567,275 @@ g_km <- function(fit_km, if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) && all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint gg_plt <- gg_plt + - theme( + ggplot2::theme( legend.position.inside = c(1, 0.5), legend.justification = c(1.1, 0.6) ) } else { gg_plt <- gg_plt + - theme( + ggplot2::theme( legend.position.inside = c(1, 0), legend.justification = c(1.1, -0.4) ) } } - # add lines, censor marks, ci ribbon, and colors (unchanged) gg_plt <- if (is.null(lty)) { - gg_plt + geom_step(linewidth = lwd, na.rm = TRUE) + gg_plt + ggplot2::geom_step(linewidth = lwd, na.rm = TRUE) } else if (length(lty) == 1) { - gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) + gg_plt + ggplot2::geom_step(linewidth = lwd, lty = lty, na.rm = TRUE) } else { gg_plt + - geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + - scale_linetype_manual(values = lty) + ggplot2::geom_step(ggplot2::aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) + + ggplot2::scale_linetype_manual(values = lty) } if (censor_show) { - gg_plt <- gg_plt + geom_point( + gg_plt <- gg_plt + ggplot2::geom_point( data = data[data$n.censor != 0, ], - aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), + ggplot2::aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"), size = size, na.rm = TRUE ) + - scale_shape_manual(name = NULL, values = pch) + - guides(fill = guide_legend(override.aes = list(shape = NA))) + ggplot2::scale_shape_manual(name = NULL, values = pch) + + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA))) } - if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) + if (ci_ribbon) gg_plt <- gg_plt + ggplot2::geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE) if (!is.null(col)) { gg_plt <- gg_plt + - scale_color_manual(values = col) + - scale_fill_manual(values = col) + ggplot2::scale_color_manual(values = col) + + ggplot2::scale_fill_manual(values = col) } if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme - # --- Annotation Tables --- - - # 2. Median survival time annotation table - if (annot_surv_med) { - surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) - bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] - - gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + - theme( - axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), - plot.margin = margin(0, 2, 0, 5) - ) + - coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) - gg_surv_med <- suppressMessages( - gg_surv_med + - scale_x_continuous(expand = c(0.025, 0)) + - scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + + gg_plt +} + +#' @title Annotate Kaplan-Meier Plot with Median Survival Table +#' +#' @description Adds a median survival time summary table as an annotation box on a +#' Kaplan-Meier plot using \code{cowplot}. +#' +#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. +#' @param control_annot_surv_med A list of control parameters for the annotation box, +#' typically generated by \code{\link{control_surv_med_annot}}. +#' @param font_size Numeric, base font size for the annotation table. +#' +#' @return A \code{cowplot} object with the median survival table annotation added. +#' @importFrom cowplot ggdraw draw_plot +#' @importFrom ggplot2 theme element_text coord_cartesian scale_x_continuous scale_y_continuous margin +#' @export +annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv_med_annot(), font_size = 10) { + # Determine armval for h_tbl_median_surv, assuming it's available in the calling environment or logic should be updated + # For now, keeping as is, but this typically requires armval or inferring it from fit_km + armval <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) # Placeholder for armval + + surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval) + bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]] + + gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5)) + gg_surv_med <- suppressMessages( + gg_surv_med + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl))) + ) + + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], + width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], + vjust = 0.5, hjust = 0.5 ) + gg_plt +} - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_surv_med, control_annot_surv_med[["x"]], control_annot_surv_med[["y"]], - width = control_annot_surv_med[["w"]], height = control_annot_surv_med[["h"]], - vjust = 0.5, hjust = 0.5 - ) - } +#' @title Annotate Kaplan-Meier Plot with Cox-PH Table +#' +#' @description Adds a Cox Proportional Hazards summary table as an annotation box on a +#' Kaplan-Meier plot using \code{cowplot}. +#' +#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. +#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results, +#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. +#' @param control_annot_coxph A list of control parameters for the annotation box, +#' typically generated by \code{\link{control_coxph_annot}}. +#' @param font_size Numeric, base font size for the annotation table. +#' +#' @return A \code{cowplot} object with the Cox-PH table annotation added. +#' @importFrom cowplot ggdraw draw_plot +#' @importFrom ggplot2 theme element_text coord_cartesian scale_x_continuous scale_y_continuous margin +#' @export +annot_cox_ph <- function(gg_plt, coxph_tbl, control_annot_coxph = control_coxph_annot(), font_size = 10) { + # ... (function body remains the same) + bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] + + gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + + ggplot2::theme( + axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), + plot.margin = ggplot2::margin(0, 2, 0, 5) + ) + + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) + gg_coxph <- suppressMessages( + gg_coxph + + ggplot2::scale_x_continuous(expand = c(0.025, 0)) + + ggplot2::scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + ) - # 3. Cox-PH annotation table - if (!is.null(coxph_tbl)) { - # coxph_tbl is pre-computed outside g_km, just plot it - bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]] - - gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) + - theme( - axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), - plot.margin = margin(0, 2, 0, 5) - ) + - coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5)) - gg_coxph <- suppressMessages( - gg_coxph + - scale_x_continuous(expand = c(0.025, 0)) + - scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl))) + gg_plt <- cowplot::ggdraw(gg_plt) + + cowplot::draw_plot( + gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], + width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], + vjust = 0.5, hjust = 0.5 ) + gg_plt +} - gg_plt <- cowplot::ggdraw(gg_plt) + - cowplot::draw_plot( - gg_coxph, control_annot_coxph[["x"]], control_annot_coxph[["y"]], - width = control_annot_coxph[["w"]], height = control_annot_coxph[["h"]], - vjust = 0.5, hjust = 0.5 - ) +#' @title Annotate Plot with Numbers at Risk Table +#' +#' @description Adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. +#' +#' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. +#' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. +#' @param font_size Numeric, base font size for the table. +#' @param annot_at_risk_title Logical, whether to include the title "Patients at Risk:". +#' @param rel_height_plot Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1). +#' @param xlab Character string for the x-axis label on the 'at-risk' table (typically time). +#' +#' @return A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +#' @importFrom broom tidy +#' @importFrom tidyr pivot_wider +#' @importFrom cowplot plot_grid +#' @importFrom ggplot2 labs theme_bw theme element_text element_blank element_line coord_cartesian scale_x_continuous scale_y_continuous +#' @export +annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = TRUE, rel_height_plot = 0.75, xlab = "Days") { + # ... (function body remains the same) + data <- broom::tidy(fit_km) + xticks <- h_xticks(data = data) + annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) + + # Placeholder for armval, should be retrieved from fit_km or passed as argument + armval <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) + + annot_tbl <- if (is.null(fit_km$strata)) { + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = armval + ) + } else { + strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") + levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] + data.frame( + n.risk = annot_tbl$n.risk, + time = annot_tbl$time, + strata = annot_tbl$strata + ) } - # add at risk annotation table (unchanged logic) - if (annot_at_risk) { - annot_tbl <- summary(fit_km, times = xticks, extend = TRUE) - annot_tbl <- if (is.null(fit_km$strata)) { - data.frame( - n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = armval - ) - } else { - strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals") - levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2] - data.frame( - n.risk = annot_tbl$n.risk, time = annot_tbl$time, strata = annot_tbl$strata - ) - } - - at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) - at_risk_tbl[is.na(at_risk_tbl)] <- 0 - rownames(at_risk_tbl) <- levels(annot_tbl$strata) + at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1]) + at_risk_tbl[is.na(at_risk_tbl)] <- 0 + rownames(at_risk_tbl) <- levels(annot_tbl$strata) - gg_at_risk <- df2gg( - at_risk_tbl, - font_size = font_size, col_labels = FALSE, hline = FALSE, - colwidths = rep(1, ncol(at_risk_tbl)) + gg_at_risk <- df2gg( + at_risk_tbl, + font_size = font_size, col_labels = FALSE, hline = FALSE, + colwidths = rep(1, ncol(at_risk_tbl)) + ) + + ggplot2::labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + + ggplot2::theme_bw(base_size = font_size) + + ggplot2::theme( + plot.title = ggplot2::element_text(size = font_size, vjust = 3, face = "bold"), + panel.border = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), + axis.title.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + axis.text.y = ggplot2::element_text(size = font_size, face = "italic", hjust = 1), + axis.text.x = ggplot2::element_text(size = font_size), + axis.line.x = ggplot2::element_line() ) + - labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) + - theme_bw(base_size = font_size) + - theme( - plot.title = element_text(size = font_size, vjust = 3, face = "bold"), - panel.border = element_blank(), panel.grid = element_blank(), - axis.title.y = element_blank(), axis.ticks.y = element_blank(), - axis.text.y = element_text(size = font_size, face = "italic", hjust = 1), - axis.text.x = element_text(size = font_size), axis.line.x = element_line() - ) + - coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) - gg_at_risk <- suppressMessages( - gg_at_risk + - scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + - scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) - ) + ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl))) + gg_at_risk <- suppressMessages( + gg_at_risk + + ggplot2::scale_x_continuous(expand = c(0.1, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) + + ggplot2::scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl))) + ) - if (!as_list) { - gg_plt <- cowplot::plot_grid( - gg_plt, gg_at_risk, - align = "v", axis = "tblr", ncol = 1, - rel_heights = c(rel_height_plot, 1 - rel_height_plot) - ) - } - } + gg_plt <- cowplot::plot_grid( + gg_plt, gg_at_risk, + align = "v", axis = "tblr", ncol = 1, + rel_heights = c(rel_height_plot, 1 - rel_height_plot) + ) + gg_plt +} - if (as_list) { - list(plot = gg_plt, table = gg_at_risk) - } else { - gg_plt +# styler: off +# nocov start + + + + ## Control and Internal KM Fit (from `tern` or similar) + + #' @title Control parameters for Survival Timepoint Estimation + #' + #' @description Creates a list of control parameters for \code{survival::survfit} when used + #' for timepoint estimation. + #' + #' @param conf_level A numeric value (0 to 1) for the confidence level. + #' @param conf_type A character string specifying the type of confidence interval. + #' Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}. + #' + #' @return A list with elements \code{conf_level} and \code{conf_type}. + control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { + conf_type <- match.arg(conf_type) + assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere + list(conf_level = conf_level, conf_type = conf_type) } + + +#' @title Kaplan-Meier Survival Curve Fitting +#' +#' @description This helper function fits a Kaplan-Meier survival curve model +#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. +#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. +#' +#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), +#' and treatment arm (\code{arm}) variables. +#' @param variables A named list specifying the column names for time-to-event (\code{tte}), +#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. +#' @param control_surv A list of control parameters for the \code{survival::survfit} function, +#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level +#' and confidence interval type. +#' +#' @return An object of class \code{survfit} from the \code{survival} package, containing +#' the fitted Kaplan-Meier curves. +#' @importFrom survival survfit Surv +#' @importFrom stats as.formula +h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { + tte <- variables$tte + is_event <- variables$is_event + arm <- variables$arm + + # Assuming assert_valid_factor and assert_df_with_variables are defined elsewhere + assert_valid_factor(df[[arm]]) + assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) + + formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) + fit_km <- survival::survfit( + formula = formula, + data = df, + conf.int = control_surv$conf_level, + conf.type = control_surv$conf_type + ) + return(fit_km) } +# nocov end +# styler: on diff --git a/man/annot_at_risk.Rd b/man/annot_at_risk.Rd new file mode 100644 index 00000000..3892dbcc --- /dev/null +++ b/man/annot_at_risk.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{annot_at_risk} +\alias{annot_at_risk} +\title{Annotate Plot with Numbers at Risk Table} +\usage{ +annot_at_risk( + gg_plt, + fit_km, + font_size = 10, + annot_at_risk_title = TRUE, + rel_height_plot = 0.75, + xlab = "Days" +) +} +\arguments{ +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} + +\item{font_size}{Numeric, base font size for the table.} + +\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:".} + +\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} + +\item{xlab}{Character string for the x-axis label on the 'at-risk' table (typically time).} +} +\value{ +A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. +} +\description{ +Adds a "Numbers at Risk" table below a Kaplan-Meier plot using \code{cowplot::plot_grid}. +} diff --git a/man/annot_cox_ph.Rd b/man/annot_cox_ph.Rd new file mode 100644 index 00000000..cbfce564 --- /dev/null +++ b/man/annot_cox_ph.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{annot_cox_ph} +\alias{annot_cox_ph} +\title{Annotate Kaplan-Meier Plot with Cox-PH Table} +\usage{ +annot_cox_ph( + gg_plt, + coxph_tbl, + control_annot_coxph = control_coxph_annot(), + font_size = 10 +) +} +\arguments{ +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results, +typically generated by \code{\link{h_tbl_coxph_pairwise}}.} + +\item{control_annot_coxph}{A list of control parameters for the annotation box, +typically generated by \code{\link{control_coxph_annot}}.} + +\item{font_size}{Numeric, base font size for the annotation table.} +} +\value{ +A \code{cowplot} object with the Cox-PH table annotation added. +} +\description{ +Adds a Cox Proportional Hazards summary table as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. +} diff --git a/man/annot_surv_med.Rd b/man/annot_surv_med.Rd new file mode 100644 index 00000000..0e2fa21c --- /dev/null +++ b/man/annot_surv_med.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{annot_surv_med} +\alias{annot_surv_med} +\title{Annotate Kaplan-Meier Plot with Median Survival Table} +\usage{ +annot_surv_med( + gg_plt, + fit_km, + control_annot_surv_med = control_surv_med_annot(), + font_size = 10 +) +} +\arguments{ +\item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} + +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} + +\item{control_annot_surv_med}{A list of control parameters for the annotation box, +typically generated by \code{\link{control_surv_med_annot}}.} + +\item{font_size}{Numeric, base font size for the annotation table.} +} +\value{ +A \code{cowplot} object with the median survival table annotation added. +} +\description{ +Adds a median survival time summary table as an annotation box on a +Kaplan-Meier plot using \code{cowplot}. +} diff --git a/man/control_coxph.Rd b/man/control_coxph.Rd new file mode 100644 index 00000000..9b79793b --- /dev/null +++ b/man/control_coxph.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_coxph} +\alias{control_coxph} +\title{Control parameters for Cox Proportional Hazards model} +\usage{ +control_coxph( + pval_method = c("log-rank", "wald", "likelihood"), + ties = c("efron", "breslow", "exact"), + conf_level = 0.95 +) +} +\arguments{ +\item{pval_method}{A character string specifying the method for calculating the p-value. +Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}.} + +\item{ties}{A character string specifying the method for handling tied failure times. +Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}.} + +\item{conf_level}{A numeric value between 0 and 1, specifying the confidence level.} +} +\value{ +A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. +} +\description{ +Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) +analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. +} diff --git a/man/control_coxph_annot.Rd b/man/control_coxph_annot.Rd new file mode 100644 index 00000000..b5e541c2 --- /dev/null +++ b/man/control_coxph_annot.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_coxph_annot} +\alias{control_coxph_annot} +\title{Control parameters for Cox-PH Annotation Box} +\usage{ +control_coxph_annot( + x = 0.29, + y = 0.51, + w = 0.4, + h = 0.125, + fill = TRUE, + ref_lbls = FALSE +) +} +\arguments{ +\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} + +\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} + +\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} + +\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} + +\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} + +\item{ref_lbls}{A logical flag indicating whether to append "vs. ref group" to row names.} +} +\value{ +A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. +} +\description{ +Creates a list of control parameters for positioning and styling the +Cox Proportional Hazards annotation box on a plot. +} diff --git a/man/control_surv_med_annot.Rd b/man/control_surv_med_annot.Rd new file mode 100644 index 00000000..15c6e3ee --- /dev/null +++ b/man/control_surv_med_annot.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_surv_med_annot} +\alias{control_surv_med_annot} +\title{Control parameters for Median Survival Annotation Box} +\usage{ +control_surv_med_annot(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) +} +\arguments{ +\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} + +\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} + +\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} + +\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} + +\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} +} +\value{ +A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. +} +\description{ +Creates a list of control parameters for positioning and styling the +median survival annotation box on a plot. +} diff --git a/man/control_surv_timepoint.Rd b/man/control_surv_timepoint.Rd new file mode 100644 index 00000000..c2c5979b --- /dev/null +++ b/man/control_surv_timepoint.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{control_surv_timepoint} +\alias{control_surv_timepoint} +\title{Control parameters for Survival Timepoint Estimation} +\usage{ +control_surv_timepoint( + conf_level = 0.95, + conf_type = c("plain", "log", "log-log") +) +} +\arguments{ +\item{conf_level}{A numeric value (0 to 1) for the confidence level.} + +\item{conf_type}{A character string specifying the type of confidence interval. +Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}.} +} +\value{ +A list with elements \code{conf_level} and \code{conf_type}. +} +\description{ +Creates a list of control parameters for \code{survival::survfit} when used +for timepoint estimation. +} diff --git a/man/df2gg.Rd b/man/df2gg.Rd new file mode 100644 index 00000000..3b3eb820 --- /dev/null +++ b/man/df2gg.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{df2gg} +\alias{df2gg} +\title{Convert Data Frame to ggplot2 Table Graphic} +\usage{ +df2gg( + df, + colwidths = NULL, + font_size = 10, + col_labels = TRUE, + col_lab_fontface = "bold", + hline = TRUE, + bg_fill = NULL +) +} +\arguments{ +\item{df}{The data frame to render.} + +\item{colwidths}{Numeric vector of relative column widths. If \code{NULL}, determined by max character length.} + +\item{font_size}{Numeric base font size.} + +\item{col_labels}{Logical, whether to display column labels (header).} + +\item{col_lab_fontface}{Character string for the font face of column labels (e.g., "bold").} + +\item{hline}{Logical, whether to draw a horizontal line below the column labels.} + +\item{bg_fill}{Optional color string for the plot background.} +} +\value{ +A \code{ggplot2} object representing the table. +} +\description{ +Creates a \code{ggplot2} object that renders a data frame as a table graphic. +} diff --git a/man/f_conf_level.Rd b/man/f_conf_level.Rd new file mode 100644 index 00000000..a6653cad --- /dev/null +++ b/man/f_conf_level.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{f_conf_level} +\alias{f_conf_level} +\title{Format Confidence Level String} +\usage{ +f_conf_level(conf_level) +} +\arguments{ +\item{conf_level}{A numeric confidence level (proportion, 0 to 1).} +} +\value{ +A character string. +} +\description{ +Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95\% CI"). +} diff --git a/man/g_km.Rd b/man/g_km.Rd index 7fd3be45..f0b15d19 100644 --- a/man/g_km.Rd +++ b/man/g_km.Rd @@ -2,13 +2,10 @@ % Please edit documentation in R/gkm.R \name{g_km} \alias{g_km} -\title{Generate a Kaplan-Meier Plot with Annotations} +\title{Generate a Kaplan-Meier Plot} \usage{ g_km( - fit_km, - variables, - coxph_tbl = NULL, - control_surv = control_surv_timepoint(), + surv_plot_data, col = NULL, lty = NULL, lwd = 0.5, @@ -25,28 +22,13 @@ g_km( footnotes = NULL, font_size = 10, ci_ribbon = FALSE, - annot_at_risk = TRUE, - annot_at_risk_title = TRUE, - annot_surv_med = TRUE, - control_annot_surv_med = control_surv_med_annot(), - control_annot_coxph = control_coxph_annot(), legend_pos = NULL, - rel_height_plot = 0.75, - ggtheme = NULL, - as_list = FALSE + ggtheme = NULL ) } \arguments{ -\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, typically -generated by \code{\link{h_km_fit}}.} - -\item{variables}{A named list specifying the survival and grouping variables (needed -for accessing the column names, even if the fit is provided).} - -\item{coxph_tbl}{An optional data frame containing pre-calculated Cox-PH results, -typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH annotation table is added.} - -\item{control_surv}{A list of control parameters for the survival model, from \code{\link{control_surv_timepoint}}.} +\item{surv_plot_data}{A data frame containing the pre-processed survival data, ready for plotting. +This data should be equivalent to the output of \code{\link{h_data_plot}}.} \item{col}{A character vector of colors for the survival curves. Length should match number of arms.} @@ -80,31 +62,14 @@ typically generated by \code{\link{h_tbl_coxph_pairwise}}. If provided, a Cox-PH \item{ci_ribbon}{Logical, whether to display confidence intervals as a ribbon (area).} -\item{annot_at_risk}{Logical, whether to include the 'Numbers at Risk' table below the plot.} - -\item{annot_at_risk_title}{Logical, whether to include the title "Patients at Risk:" in the table.} - -\item{annot_surv_med}{Logical, whether to include the median survival time annotation table.} - -\item{control_annot_surv_med}{A list of control parameters for the median survival annotation box, -typically generated by \code{\link{control_surv_med_annot}}.} - -\item{control_annot_coxph}{A list of control parameters for the Cox-PH annotation box, -typically generated by \code{\link{control_coxph_annot}}.} - \item{legend_pos}{Numeric vector of length 2 for legend position (x, y) relative to the plot area (0 to 1), or \code{NULL} for auto-placement.} -\item{rel_height_plot}{Numeric, relative height of the main plot area compared to the 'at-risk' table (0 to 1).} - \item{ggtheme}{An optional \code{ggplot2} theme to apply.} - -\item{as_list}{Logical, if \code{TRUE}, returns a list containing the plot and the 'at-risk' table as separate \code{cowplot} objects; otherwise returns the combined \code{cowplot} object.} } \value{ -A \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table, or a list if \code{as_list = TRUE}. +A \code{ggplot2} object of the KM plot. } \description{ -This function creates a comprehensive ggplot2 object for a Kaplan-Meier -survival curve, optionally including annotations for median survival and Cox-PH results, -and a 'Numbers at Risk' table below the main plot. +This function creates a comprehensive \code{ggplot2} object for a Kaplan-Meier +survival curve, with support for various customizations like censoring marks, CIs, and axis control. } diff --git a/man/h_data_plot.Rd b/man/h_data_plot.Rd new file mode 100644 index 00000000..159bc6c8 --- /dev/null +++ b/man/h_data_plot.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_data_plot} +\alias{h_data_plot} +\title{Prepare Kaplan-Meier Data for Plotting} +\usage{ +h_data_plot(fit_km, armval = "All", max_time = NULL) +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{armval}{Character string for the strata level if \code{fit_km} has no strata (e.g., "All").} + +\item{max_time}{Numeric, the maximum time point to include in the data, or \code{NULL} for no limit.} +} +\value{ +A data frame containing the survival curve steps, confidence intervals, and censoring info. +} +\description{ +Takes a fitted \code{survfit} object and processes it into a data frame +suitable for plotting a Kaplan-Meier curve with \code{ggplot2}, including extending +the curve to time zero. +} diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd index bbb66746..267a4527 100644 --- a/man/h_km_fit.Rd +++ b/man/h_km_fit.Rd @@ -11,8 +11,7 @@ h_km_fit(df, variables, control_surv = control_surv_timepoint()) and treatment arm (\code{arm}) variables.} \item{variables}{A named list specifying the column names for time-to-event (\code{tte}), -event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. -For example: \code{list(tte = "time_column", is_event = "status_column", arm = "group_column")}.} +event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}.} \item{control_surv}{A list of control parameters for the \code{survival::survfit} function, typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd new file mode 100644 index 00000000..a99e272d --- /dev/null +++ b/man/h_tbl_median_surv.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_tbl_median_surv} +\alias{h_tbl_median_surv} +\title{Median Survival Summary Table} +\usage{ +h_tbl_median_surv(fit_km, armval = "All") +} +\arguments{ +\item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{armval}{Character string to use as the row name if \code{fit_km} has no strata (e.g., "All").} +} +\value{ +A data frame with columns "N", "Median", and the confidence interval label. +} +\description{ +Extracts and formats the median survival time and its confidence interval +from a fitted Kaplan-Meier object. +} diff --git a/man/h_xticks.Rd b/man/h_xticks.Rd new file mode 100644 index 00000000..c50e8c1f --- /dev/null +++ b/man/h_xticks.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{h_xticks} +\alias{h_xticks} +\title{Calculate X-axis Ticks} +\usage{ +h_xticks(data, xticks = NULL, max_time = NULL) +} +\arguments{ +\item{data}{A data frame containing a \code{time} column.} + +\item{xticks}{A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation.} + +\item{max_time}{Optional numeric value specifying the maximum time to consider for tick range.} +} +\value{ +A numeric vector of x-axis tick positions. +} +\description{ +Determines the positions for x-axis ticks based on the data and user input. +} diff --git a/man/s_coxph_pairwise.Rd b/man/s_coxph_pairwise.Rd new file mode 100644 index 00000000..3adf3125 --- /dev/null +++ b/man/s_coxph_pairwise.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{s_coxph_pairwise} +\alias{s_coxph_pairwise} +\title{Pairwise Cox Proportional Hazards Model Calculation} +\usage{ +s_coxph_pairwise( + df, + .ref_group, + .in_ref_col, + .var, + is_event, + strata = NULL, + control = control_coxph(), + ... +) +} +\arguments{ +\item{df}{Data frame for the comparison group.} + +\item{.ref_group}{Data frame for the reference group.} + +\item{.in_ref_col}{Logical, if \code{TRUE} returns empty results (for internal table building).} + +\item{.var}{Character string for the time-to-event variable name.} + +\item{is_event}{Character string for the event status variable name.} + +\item{strata}{Optional character vector of stratification variable names.} + +\item{control}{A list of control parameters from \code{\link{control_coxph}}.} + +\item{...}{Additional arguments (not used).} +} +\value{ +A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), +and total counts. +} +\description{ +Performs a Cox Proportional Hazards model calculation comparing two groups +(a reference group and a comparison group). This is an internal function used by +\code{\link{h_tbl_coxph_pairwise}}. +} From 291244823c167edb53f117ecca6884a54e461d9b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 10:44:57 +0800 Subject: [PATCH 06/25] update ad test --- .Rbuildignore | 1 + DESCRIPTION | 3 ++- NAMESPACE | 2 ++ R/crane-package.R | 3 ++- R/gkm.R | 55 +++++---------------------------------- _pkgdown.yml | 8 ++++++ inst/WORDLIST | 1 + tests/testthat/test-gkm.R | 33 +++++++++++++++++++++++ 8 files changed, 55 insertions(+), 51 deletions(-) create mode 100644 tests/testthat/test-gkm.R diff --git a/.Rbuildignore b/.Rbuildignore index ae7e12a9..4deffee4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,4 @@ ^vignettes/articles$ ^\.gitlab-ci\.yml$ LICENSE +.lintr diff --git a/DESCRIPTION b/DESCRIPTION index c976c2f9..919d91f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,8 @@ Imports: cli (>= 3.6.4), dplyr (>= 1.1.4), flextable (>= 0.9.7), - ggplot2 (>= 4.0.1), + ggplot2 (>= 4.0.0), + ggsurvfit (>= 1.1.0), glue (>= 1.8.0), gt (>= 0.11.1), lifecycle, diff --git a/NAMESPACE b/NAMESPACE index 024efc1a..4259e82a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,8 @@ exportMethods("obj_label<-") exportMethods(obj_label) import(glue) import(rlang) +import(tail) +import(utils) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) diff --git a/R/crane-package.R b/R/crane-package.R index 9d4c0225..d295f062 100644 --- a/R/crane-package.R +++ b/R/crane-package.R @@ -1,6 +1,7 @@ #' @keywords internal #' @import rlang #' @import glue glue +#' @importFrom utils tail #' @importFrom dplyr across starts_with ends_with contains matches num_range #' all_of any_of everything last_col where "_PACKAGE" @@ -9,7 +10,7 @@ ## usethis namespace: end NULL -utils::globalVariables(c(".")) +utils::globalVariables(c(".", "obj")) # using pkgs to silence NOTE .silence <- function() { diff --git a/R/gkm.R b/R/gkm.R index aab7d8c1..d0301cc8 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,15 +1,3 @@ -#' @title Control parameters for Cox Proportional Hazards model -#' -#' @description Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) -#' analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. -#' -#' @param pval_method A character string specifying the method for calculating the p-value. -#' Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}. -#' @param ties A character string specifying the method for handling tied failure times. -#' Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}. -#' @param conf_level A numeric value between 0 and 1, specifying the confidence level. -#' -#' @return A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), ties = c("efron", "breslow", "exact"), conf_level = 0.95) { pval_method <- match.arg(pval_method) @@ -18,18 +6,6 @@ control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), list(pval_method = pval_method, ties = ties, conf_level = conf_level) } -#' @title Control parameters for Median Survival Annotation Box -#' -#' @description Creates a list of control parameters for positioning and styling the -#' median survival annotation box on a plot. -#' -#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). -#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). -#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). -#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). -#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. -#' -#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { assert_proportion_value(x) assert_proportion_value(y) @@ -39,19 +15,6 @@ control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = list(x = x, y = y, w = w, h = h, fill = fill) } -#' @title Control parameters for Cox-PH Annotation Box -#' -#' @description Creates a list of control parameters for positioning and styling the -#' Cox Proportional Hazards annotation box on a plot. -#' -#' @param x A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area). -#' @param y A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area). -#' @param w A numeric value (0 to 1) for the width of the box (relative to plot area). -#' @param h A numeric value (0 to 1) for the height of the box (relative to plot area). -#' @param fill A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background. -#' @param ref_lbls A logical flag indicating whether to append "vs. ref group" to row names. -#' -#' @return A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) { checkmate::assert_logical(ref_lbls, any.missing = FALSE) @@ -60,12 +23,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } -## Helper Functions (Formatting, Data Preparation, Plotting Utilities) - -#' @title Format Confidence Level String -#' @description Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95% CI"). -#' @param conf_level A numeric confidence level (proportion, 0 to 1). -#' @return A character string. f_conf_level <- function(conf_level) { assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere paste0(conf_level * 100, "% CI") @@ -82,7 +39,7 @@ f_conf_level <- function(conf_level) { #' @param col_lab_fontface Character string for the font face of column labels (e.g., "bold"). #' @param hline Logical, whether to draw a horizontal line below the column labels. #' @param bg_fill Optional color string for the plot background. -#' +#' @keywords internal #' @return A \code{ggplot2} object representing the table. #' @importFrom ggplot2 ggplot theme_void scale_x_continuous scale_y_continuous theme element_rect annotate element_text .pt df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, @@ -146,7 +103,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, #' @param data A data frame containing a \code{time} column. #' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. #' @param max_time Optional numeric value specifying the maximum time to consider for tick range. -#' +#' @keywords internal #' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { # ... (function body remains the same) @@ -182,7 +139,7 @@ h_xticks <- function(data, xticks = NULL, max_time = NULL) { #' #' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}. #' @param armval Character string to use as the row name if \code{fit_km} has no strata (e.g., "All"). -#' +#' @keywords internal #' @return A data frame with columns "N", "Median", and the confidence interval label. h_tbl_median_surv <- function(fit_km, armval = "All") { # ... (function body remains the same) @@ -342,7 +299,7 @@ h_data_plot <- function(fit_km, #' @param strata Optional character vector of stratification variable names. #' @param control A list of control parameters from \code{\link{control_coxph}}. #' @param ... Additional arguments (not used). -#' +#' @keywords internal #' @return A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), #' and total counts. #' @importFrom survival Surv coxph survdiff @@ -792,7 +749,7 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = #' @param conf_level A numeric value (0 to 1) for the confidence level. #' @param conf_type A character string specifying the type of confidence interval. #' Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}. - #' + #' @keywords internal #' @return A list with elements \code{conf_level} and \code{conf_type}. control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { conf_type <- match.arg(conf_type) @@ -814,7 +771,7 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = #' @param control_surv A list of control parameters for the \code{survival::survfit} function, #' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level #' and confidence interval type. -#' +#' @keywords internal #' @return An object of class \code{survfit} from the \code{survival} package, containing #' the fitted Kaplan-Meier curves. #' @importFrom survival survfit Surv diff --git a/_pkgdown.yml b/_pkgdown.yml index c850f898..0494b77d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -41,3 +41,11 @@ reference: - modify_zero_recode - add_blank_rows - label_roche + - title: "g km plot" + contents: + - h_tbl_coxph_pairwise + - h_data_plot + - g_km + - annot_surv_med + - annot_cox_ph + - annot_at_risk diff --git a/inst/WORDLIST b/inst/WORDLIST index 7f7292b8..0db4adb4 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -25,3 +25,4 @@ tbl tte tidyselect unstratified +customizations diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R new file mode 100644 index 00000000..f247c6d9 --- /dev/null +++ b/tests/testthat/test-gkm.R @@ -0,0 +1,33 @@ +skip_on_cran() + +anl <- cards::ADTTE |> + dplyr::mutate(is_event = CNSR == 0) %>% + dplyr::mutate(TRTP = as.factor(TRTP)) + +variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + +test_that("test gkm() works", { + fit_kmg01 <- survfit(ggsurvfit::Surv_CNSR(AVAL, CNSR) ~ TRTP, anl) + variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + + expect_no_error(surv_plot_data <- h_data_plot(fit_kmg01)) + + expect_no_error( + suppressWarnings( + coxph_tbl <- h_tbl_coxph_pairwise( + df = anl, + variables = variables + ) + ) + ) + + expect_no_error( + plt_kmg01 <- g_km(surv_plot_data, + xlab = "Time (Days)", + ylim = c(0.9, 1) + ) %>% + annot_surv_med(fit_kmg01) %>% + annot_cox_ph(coxph_tbl) %>% + annot_at_risk(fit_kmg01) + ) +}) From 015b86e91d3d1d34d29ea3fbd0951a825080e3a9 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 10:48:12 +0800 Subject: [PATCH 07/25] update namespace --- NAMESPACE | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4259e82a..db60c2bc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,8 +45,6 @@ exportMethods("obj_label<-") exportMethods(obj_label) import(glue) import(rlang) -import(tail) -import(utils) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) @@ -98,3 +96,4 @@ importFrom(survival,coxph) importFrom(survival,survdiff) importFrom(survival,survfit) importFrom(tidyr,pivot_wider) +importFrom(utils,tail) From ec08202bea3406a0c2db39130d8587f7654e35fb Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 30 Nov 2025 10:48:23 +0800 Subject: [PATCH 08/25] update doc --- man/control_coxph.Rd | 28 ---------------------------- man/control_coxph_annot.Rd | 35 ----------------------------------- man/control_surv_med_annot.Rd | 26 -------------------------- man/control_surv_timepoint.Rd | 1 + man/df2gg.Rd | 1 + man/f_conf_level.Rd | 17 ----------------- man/h_km_fit.Rd | 1 + man/h_tbl_median_surv.Rd | 1 + man/h_xticks.Rd | 1 + man/s_coxph_pairwise.Rd | 1 + 10 files changed, 6 insertions(+), 106 deletions(-) delete mode 100644 man/control_coxph.Rd delete mode 100644 man/control_coxph_annot.Rd delete mode 100644 man/control_surv_med_annot.Rd delete mode 100644 man/f_conf_level.Rd diff --git a/man/control_coxph.Rd b/man/control_coxph.Rd deleted file mode 100644 index 9b79793b..00000000 --- a/man/control_coxph.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_coxph} -\alias{control_coxph} -\title{Control parameters for Cox Proportional Hazards model} -\usage{ -control_coxph( - pval_method = c("log-rank", "wald", "likelihood"), - ties = c("efron", "breslow", "exact"), - conf_level = 0.95 -) -} -\arguments{ -\item{pval_method}{A character string specifying the method for calculating the p-value. -Must be one of \code{"log-rank"}, \code{"wald"}, or \code{"likelihood"}.} - -\item{ties}{A character string specifying the method for handling tied failure times. -Must be one of \code{"efron"}, \code{"breslow"}, or \code{"exact"}.} - -\item{conf_level}{A numeric value between 0 and 1, specifying the confidence level.} -} -\value{ -A list with elements \code{pval_method}, \code{ties}, and \code{conf_level}. -} -\description{ -Creates a list of control parameters for Cox Proportional Hazards (Cox-PH) -analysis, used by functions like \code{\link{h_tbl_coxph_pairwise}}. -} diff --git a/man/control_coxph_annot.Rd b/man/control_coxph_annot.Rd deleted file mode 100644 index b5e541c2..00000000 --- a/man/control_coxph_annot.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_coxph_annot} -\alias{control_coxph_annot} -\title{Control parameters for Cox-PH Annotation Box} -\usage{ -control_coxph_annot( - x = 0.29, - y = 0.51, - w = 0.4, - h = 0.125, - fill = TRUE, - ref_lbls = FALSE -) -} -\arguments{ -\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} - -\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} - -\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} - -\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} - -\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} - -\item{ref_lbls}{A logical flag indicating whether to append "vs. ref group" to row names.} -} -\value{ -A list with elements \code{x}, \code{y}, \code{w}, \code{h}, \code{fill}, and \code{ref_lbls}. -} -\description{ -Creates a list of control parameters for positioning and styling the -Cox Proportional Hazards annotation box on a plot. -} diff --git a/man/control_surv_med_annot.Rd b/man/control_surv_med_annot.Rd deleted file mode 100644 index 15c6e3ee..00000000 --- a/man/control_surv_med_annot.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_surv_med_annot} -\alias{control_surv_med_annot} -\title{Control parameters for Median Survival Annotation Box} -\usage{ -control_surv_med_annot(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) -} -\arguments{ -\item{x}{A numeric value (0 to 1) for the x-coordinate of the box center (relative to plot area).} - -\item{y}{A numeric value (0 to 1) for the y-coordinate of the box center (relative to plot area).} - -\item{w}{A numeric value (0 to 1) for the width of the box (relative to plot area).} - -\item{h}{A numeric value (0 to 1) for the height of the box (relative to plot area).} - -\item{fill}{A logical value (\code{TRUE} for a default light gray fill) or a color string for the box background.} -} -\value{ -A list with elements \code{x}, \code{y}, \code{w}, \code{h}, and \code{fill}. -} -\description{ -Creates a list of control parameters for positioning and styling the -median survival annotation box on a plot. -} diff --git a/man/control_surv_timepoint.Rd b/man/control_surv_timepoint.Rd index c2c5979b..457addf8 100644 --- a/man/control_surv_timepoint.Rd +++ b/man/control_surv_timepoint.Rd @@ -22,3 +22,4 @@ A list with elements \code{conf_level} and \code{conf_type}. Creates a list of control parameters for \code{survival::survfit} when used for timepoint estimation. } +\keyword{internal} diff --git a/man/df2gg.Rd b/man/df2gg.Rd index 3b3eb820..cc7a2be6 100644 --- a/man/df2gg.Rd +++ b/man/df2gg.Rd @@ -35,3 +35,4 @@ A \code{ggplot2} object representing the table. \description{ Creates a \code{ggplot2} object that renders a data frame as a table graphic. } +\keyword{internal} diff --git a/man/f_conf_level.Rd b/man/f_conf_level.Rd deleted file mode 100644 index a6653cad..00000000 --- a/man/f_conf_level.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{f_conf_level} -\alias{f_conf_level} -\title{Format Confidence Level String} -\usage{ -f_conf_level(conf_level) -} -\arguments{ -\item{conf_level}{A numeric confidence level (proportion, 0 to 1).} -} -\value{ -A character string. -} -\description{ -Converts a confidence level (e.g., 0.95) to a formatted string (e.g., "95\% CI"). -} diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd index 267a4527..6f99e03b 100644 --- a/man/h_km_fit.Rd +++ b/man/h_km_fit.Rd @@ -26,3 +26,4 @@ This helper function fits a Kaplan-Meier survival curve model using the formula \code{survival::Surv(tte, is_event) ~ arm}. It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. } +\keyword{internal} diff --git a/man/h_tbl_median_surv.Rd b/man/h_tbl_median_surv.Rd index a99e272d..8d181629 100644 --- a/man/h_tbl_median_surv.Rd +++ b/man/h_tbl_median_surv.Rd @@ -18,3 +18,4 @@ A data frame with columns "N", "Median", and the confidence interval label. Extracts and formats the median survival time and its confidence interval from a fitted Kaplan-Meier object. } +\keyword{internal} diff --git a/man/h_xticks.Rd b/man/h_xticks.Rd index c50e8c1f..2f912508 100644 --- a/man/h_xticks.Rd +++ b/man/h_xticks.Rd @@ -19,3 +19,4 @@ A numeric vector of x-axis tick positions. \description{ Determines the positions for x-axis ticks based on the data and user input. } +\keyword{internal} diff --git a/man/s_coxph_pairwise.Rd b/man/s_coxph_pairwise.Rd index 3adf3125..39a44de0 100644 --- a/man/s_coxph_pairwise.Rd +++ b/man/s_coxph_pairwise.Rd @@ -41,3 +41,4 @@ Performs a Cox Proportional Hazards model calculation comparing two groups (a reference group and a comparison group). This is an internal function used by \code{\link{h_tbl_coxph_pairwise}}. } +\keyword{internal} From 585ee242635351d648067d87a4ba3010e0627e3d Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 09:43:09 +0800 Subject: [PATCH 09/25] rm code --- R/gkm.R | 62 --------------------------------------------------------- 1 file changed, 62 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index d0301cc8..23e623ca 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -734,65 +734,3 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = gg_plt } -# styler: off -# nocov start - - - - ## Control and Internal KM Fit (from `tern` or similar) - - #' @title Control parameters for Survival Timepoint Estimation - #' - #' @description Creates a list of control parameters for \code{survival::survfit} when used - #' for timepoint estimation. - #' - #' @param conf_level A numeric value (0 to 1) for the confidence level. - #' @param conf_type A character string specifying the type of confidence interval. - #' Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}. - #' @keywords internal - #' @return A list with elements \code{conf_level} and \code{conf_type}. - control_surv_timepoint <- function(conf_level = 0.95, conf_type = c("plain", "log", "log-log")) { - conf_type <- match.arg(conf_type) - assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - list(conf_level = conf_level, conf_type = conf_type) - } - - -#' @title Kaplan-Meier Survival Curve Fitting -#' -#' @description This helper function fits a Kaplan-Meier survival curve model -#' using the formula \code{survival::Surv(tte, is_event) ~ arm}. -#' It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. -#' -#' @param df A data frame containing time-to-event (tte), event status (\code{is_event}), -#' and treatment arm (\code{arm}) variables. -#' @param variables A named list specifying the column names for time-to-event (\code{tte}), -#' event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}. -#' @param control_surv A list of control parameters for the \code{survival::survfit} function, -#' typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level -#' and confidence interval type. -#' @keywords internal -#' @return An object of class \code{survfit} from the \code{survival} package, containing -#' the fitted Kaplan-Meier curves. -#' @importFrom survival survfit Surv -#' @importFrom stats as.formula -h_km_fit <- function(df, variables, control_surv = control_surv_timepoint()) { - tte <- variables$tte - is_event <- variables$is_event - arm <- variables$arm - - # Assuming assert_valid_factor and assert_df_with_variables are defined elsewhere - assert_valid_factor(df[[arm]]) - assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm)) - - formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm)) - fit_km <- survival::survfit( - formula = formula, - data = df, - conf.int = control_surv$conf_level, - conf.type = control_surv$conf_type - ) - return(fit_km) -} -# nocov end -# styler: on From 4c3268f08fe41f7f019314ca198eb8fc56d9af2b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 09:49:04 +0800 Subject: [PATCH 10/25] rm doc --- NAMESPACE | 1 - man/control_surv_timepoint.Rd | 25 ------------------------- man/h_km_fit.Rd | 29 ----------------------------- 3 files changed, 55 deletions(-) delete mode 100644 man/control_surv_timepoint.Rd delete mode 100644 man/h_km_fit.Rd diff --git a/NAMESPACE b/NAMESPACE index db60c2bc..982c9a68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,6 +94,5 @@ importFrom(stats,as.formula) importFrom(survival,Surv) importFrom(survival,coxph) importFrom(survival,survdiff) -importFrom(survival,survfit) importFrom(tidyr,pivot_wider) importFrom(utils,tail) diff --git a/man/control_surv_timepoint.Rd b/man/control_surv_timepoint.Rd deleted file mode 100644 index 457addf8..00000000 --- a/man/control_surv_timepoint.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{control_surv_timepoint} -\alias{control_surv_timepoint} -\title{Control parameters for Survival Timepoint Estimation} -\usage{ -control_surv_timepoint( - conf_level = 0.95, - conf_type = c("plain", "log", "log-log") -) -} -\arguments{ -\item{conf_level}{A numeric value (0 to 1) for the confidence level.} - -\item{conf_type}{A character string specifying the type of confidence interval. -Must be one of \code{"plain"}, \code{"log"}, or \code{"log-log"}.} -} -\value{ -A list with elements \code{conf_level} and \code{conf_type}. -} -\description{ -Creates a list of control parameters for \code{survival::survfit} when used -for timepoint estimation. -} -\keyword{internal} diff --git a/man/h_km_fit.Rd b/man/h_km_fit.Rd deleted file mode 100644 index 6f99e03b..00000000 --- a/man/h_km_fit.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{h_km_fit} -\alias{h_km_fit} -\title{Kaplan-Meier Survival Curve Fitting} -\usage{ -h_km_fit(df, variables, control_surv = control_surv_timepoint()) -} -\arguments{ -\item{df}{A data frame containing time-to-event (tte), event status (\code{is_event}), -and treatment arm (\code{arm}) variables.} - -\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), -event status (\code{is_event}), and treatment arm (\code{arm}) in \code{df}.} - -\item{control_surv}{A list of control parameters for the \code{survival::survfit} function, -typically generated by \code{\link{control_surv_timepoint}}, controlling confidence level -and confidence interval type.} -} -\value{ -An object of class \code{survfit} from the \code{survival} package, containing -the fitted Kaplan-Meier curves. -} -\description{ -This helper function fits a Kaplan-Meier survival curve model -using the formula \code{survival::Surv(tte, is_event) ~ arm}. -It is designed to be a prerequisite for plotting functions like \code{\link{g_km}}. -} -\keyword{internal} From b29d65d0c04ebfcbc73c553bc3fec914dc8e1ca0 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:04:05 +0800 Subject: [PATCH 11/25] modify code --- NAMESPACE | 7 +- R/assert.R | 110 ---------------- R/from_formatters.R | 31 ----- R/gkm.R | 250 ++++++++++-------------------------- man/get_cox_pairwise_tbl.Rd | 50 ++++++++ man/h_tbl_coxph_pairwise.Rd | 37 ------ man/obj_label-set.Rd | 14 -- man/s_coxph_pairwise.Rd | 44 ------- 8 files changed, 118 insertions(+), 425 deletions(-) delete mode 100644 R/assert.R delete mode 100644 R/from_formatters.R create mode 100644 man/get_cox_pairwise_tbl.Rd delete mode 100644 man/h_tbl_coxph_pairwise.Rd delete mode 100644 man/obj_label-set.Rd delete mode 100644 man/s_coxph_pairwise.Rd diff --git a/NAMESPACE b/NAMESPACE index 982c9a68..84d07f40 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ S3method(add_overall,tbl_shift) S3method(add_overall,tbl_survfit_quantiles) S3method(add_overall,tbl_survfit_times) export("%>%") -export("obj_label<-") export(add_blank_rows) export(add_hierarchical_count_row) export(add_overall) @@ -16,8 +15,8 @@ export(annot_cox_ph) export(annot_surv_med) export(filter_hierarchical) export(g_km) +export(get_cox_pairwise_tbl) export(h_data_plot) -export(h_tbl_coxph_pairwise) export(label_roche_number) export(label_roche_percent) export(label_roche_pvalue) @@ -41,8 +40,6 @@ export(tbl_shift) export(tbl_survfit_quantiles) export(tbl_survfit_times) export(theme_gtsummary_roche) -exportMethods("obj_label<-") -exportMethods(obj_label) import(glue) import(rlang) importFrom(broom,tidy) @@ -90,7 +87,7 @@ importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) importFrom(rlang,.data) -importFrom(stats,as.formula) +importFrom(stats,pchisq) importFrom(survival,Surv) importFrom(survival,coxph) importFrom(survival,survdiff) diff --git a/R/assert.R b/R/assert.R deleted file mode 100644 index 8165d3ba..00000000 --- a/R/assert.R +++ /dev/null @@ -1,110 +0,0 @@ -# styler: off -# nocov start - - -assert_proportion_value <- function(x, include_boundaries = FALSE) { - checkmate::assert_number(x, lower = 0, upper = 1) - checkmate::assert_flag(include_boundaries) - if (isFALSE(include_boundaries)) { - checkmate::assert_true(x > 0) - checkmate::assert_true(x < 1) - } -} - -check_list_of_variables <- function(x) { - x <- Filter(Negate(is.null), x) - res <- checkmate::check_list(x, - names = "named", min.len = 1, - any.missing = FALSE, types = "character" - ) - if (isTRUE(res)) { - res <- checkmate::check_character(unlist(x), min.chars = 1) - } - res -} - -assert_list_of_variables <- function(x, .var.name = checkmate::vname(x), add = NULL) { - if (missing(x)) { - stop(sprintf( - "argument \"%s\" is missing, with no default", - .var.name - )) - } - res <- check_list_of_variables(x) - checkmate::makeAssertion(x, res, .var.name, add) -} - -check_df_with_variables <- function(df, variables, na_level = NULL) { - checkmate::assert_data_frame(df) - assert_list_of_variables(variables) - err_flag <- all(unlist(variables) %in% colnames(df)) - checkmate::assert_flag(err_flag) - if (isFALSE(err_flag)) { - vars <- setdiff(unlist(variables), colnames(df)) - return(paste( - deparse(substitute(df)), "does not contain all specified variables as column names. Missing from data frame:", - paste(vars, collapse = ", ") - )) - } - if (!is.null(na_level)) { - checkmate::assert_string(na_level) - res <- unlist(lapply( - as.list(df)[unlist(variables)], - function(x) any(x == na_level) - )) - if (any(res)) { - return(paste0( - deparse(substitute(df)), " contains explicit na_level (", - na_level, ") in the following columns: ", paste0(unlist(variables)[res], - collapse = ", " - ) - )) - } - } - return(TRUE) -} - -assert_df_with_variables <- function(df, variables, na_level = NULL, .var.name = checkmate::vname(df), - add = NULL) { - if (missing(df)) { - stop(sprintf( - "argument \"%s\" is missing, with no default", - .var.name - )) - } - res <- check_df_with_variables(df, variables, na_level) - checkmate::makeAssertion(df, res, .var.name, add) -} - -check_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL) { - checkmate::assert_int(min.levels, lower = 1) - res <- checkmate::check_factor(x, - min.levels = min.levels, - null.ok = null.ok, max.levels = max.levels, any.missing = any.missing, - n.levels = n.levels - ) - if (isTRUE(res)) { - res <- checkmate::check_character(levels(x), min.chars = 1) - } - return(res) -} - -assert_valid_factor <- function(x, min.levels = 1, max.levels = NULL, null.ok = TRUE, - any.missing = TRUE, n.levels = NULL, len = NULL, .var.name = checkmate::vname(x), - add = NULL) { - if (missing(x)) { - stop(sprintf( - "argument \"%s\" is missing, with no default", - .var.name - )) - } - res <- check_valid_factor( - x, min.levels, max.levels, null.ok, - any.missing, n.levels, len - ) - checkmate::makeAssertion(x, res, .var.name, add) -} - -# nocov end -# styler: on diff --git a/R/from_formatters.R b/R/from_formatters.R deleted file mode 100644 index 0bab0ab7..00000000 --- a/R/from_formatters.R +++ /dev/null @@ -1,31 +0,0 @@ -# ## Changelog -# styler: off -# nocov start - -setGeneric("obj_label", function(obj) standardGeneric("obj_label")) - -#' The new label -#' @param value character(1). The new label -#' @export -setGeneric("obj_label<-", function(obj, value) standardGeneric("obj_label<-")) - -#' @exportMethod obj_label -setMethod("obj_label", "ANY", function(obj) attr(obj, "label")) - -#' @exportMethod obj_label<- -setMethod( - "obj_label<-", "ANY", - function(obj, value) { - attr(obj, "label") <- value - obj - } -) - -with_label <- function (x, label) -{ - obj_label(x) <- label - x -} - -# nocov end -# styler: on diff --git a/R/gkm.R b/R/gkm.R index 23e623ca..57dfc77e 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,16 +1,9 @@ -control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"), - ties = c("efron", "breslow", "exact"), conf_level = 0.95) { - pval_method <- match.arg(pval_method) - ties <- match.arg(ties) - assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - list(pval_method = pval_method, ties = ties, conf_level = conf_level) -} control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { - assert_proportion_value(x) - assert_proportion_value(y) - assert_proportion_value(w) - assert_proportion_value(h) + # assert_proportion_value(x) + # assert_proportion_value(y) + # assert_proportion_value(w) + # assert_proportion_value(h) list(x = x, y = y, w = w, h = h, fill = fill) } @@ -23,10 +16,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } -f_conf_level <- function(conf_level) { - assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere - paste0(conf_level * 100, "% CI") -} #' @title Convert Data Frame to ggplot2 Table Graphic #' @@ -163,69 +152,75 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { ) } -#' @title Pairwise Cox Proportional Hazards Model Summary Table +#' Perform Pairwise Cox Proportional Hazards Regression +#' +#' This function performs a pairwise comparison of treatment arms using the **Cox proportional hazards model** and calculates the corresponding **log-rank p-value**. Each comparison is made between a specified reference group and all other comparison groups in the dataset. +#' +#' @param model_formula A \code{\link[stats]{formula}} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}. +#' @param data A \code{\link[base]{data.frame}} containing the survival data, including time, status, and the arm variable. +#' @param arm A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable). +#' @param ref_group A character string specifying the level of the \code{arm} variable to be used as the **reference group** for all pairwise comparisons. If \code{NULL} (the default), the **first unique level** of the \code{arm} column is used as the reference group. #' -#' @description This function computes and formats the results of a pairwise Cox Proportional -#' Hazards (Cox-PH) regression analysis between different treatment arms. +#' @return A \code{\link[base]{data.frame}} with the results of the pairwise comparisons. The columns include: +#' \itemize{ +#' \item \code{arm}: The comparison arm being tested against the reference group. +#' \item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +#' \item \code{ci}: The 95\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. +#' \item \code{pval}: The log-rank p-value for the comparison. +#' } #' -#' @param df A data frame containing the survival data. -#' @param variables A named list specifying the column names for time-to-event (\code{tte}), -#' treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}. -#' @param ref_group_coxph An optional string specifying the reference group for the Cox-PH model. -#' If \code{NULL}, the first factor level of the arm variable is used as the reference group. -#' @param control_coxph_pw A list of control parameters for the Cox-PH model, typically -#' generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level. -#' @param annot_coxph_ref_lbls A logical flag indicating whether to append "vs. ref group" -#' to the row names in the resulting table. +#' @details The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. #' -#' @return A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), -#' its confidence interval, and the p-value. +#' @importFrom survival coxph Surv survdiff +#' @importFrom stats pchisq #' @export -h_tbl_coxph_pairwise <- function(df, - variables, - ref_group_coxph = NULL, - control_coxph_pw = control_coxph(), - annot_coxph_ref_lbls = FALSE) { - # ... (function body remains the same) - assert_df_with_variables(df, variables) # Assuming assert_df_with_variables is defined elsewhere - checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE) - checkmate::assert_flag(annot_coxph_ref_lbls) - - arm <- variables$arm - df[[arm]] <- factor(df[[arm]]) - - ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1] - comp_group <- setdiff(levels(df[[arm]]), ref_group) - - results <- Map(function(comp) { - res <- s_coxph_pairwise( - df = df[df[[arm]] == comp, , drop = FALSE], - .ref_group = df[df[[arm]] == ref_group, , drop = FALSE], - .in_ref_col = FALSE, - .var = variables$tte, - is_event = variables$is_event, - strata = variables$strata, - control = control_coxph_pw - ) - res_df <- data.frame( - hr = format(round(res$hr, 2), nsmall = 2), - hr_ci = paste0( - "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ", - format(round(res$hr_ci[2], 2), nsmall = 2), ")" - ), - pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4), - stringsAsFactors = FALSE +#' +#' @examples +#' \dontrun{ +#' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +#' # and 'arm' is the treatment group) +#' # library(survival) +#' # data(lung) +#' # lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +#' # lung$status <- lung$status - 1 # Convert status to 0/1 +#' # lung <- na.omit(lung) +#' +#' ormula <- Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_tbl(model_formula = formula, +#' data = lung, +#' arm = "arm", +#' ref_group = "A") +#' print(results_tbl) +#' } +get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL){ + ref_group <- if (!is.null(ref_group)) ref_group else unique(data[[arm]])[1] + comp_group <- setdiff(unique(data[[arm]]), ref_group) + + ret <- c() + for (current_arm in comp_group){ + comp_df <- data[data[[arm]] %in% c(ref_group, current_arm), ] + suppressWarnings( + coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary()) + orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) + log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - + 1) + current_row <- data.frame( + arm = current_arm, + hr = sprintf("%.2f", coxph_ans$conf.int[1,1]), + ci = paste0("(", + sprintf("%.2f", coxph_ans$conf.int[1,3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1,4]), + ")"), + pval = log_rank_pvalue ) - # Assuming obj_label is defined elsewhere and hr_ci is the label for the CI - colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character")) # nolint - row.names(res_df) <- comp - res_df - }, comp_group) - if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group) - - do.call(rbind, results) + ret <- rbind(ret, current_row) + } + + return (ret) } + #' @title Prepare Kaplan-Meier Data for Plotting #' #' @description Takes a fitted \code{survfit} object and processes it into a data frame @@ -283,118 +278,6 @@ h_data_plot <- function(fit_km, } -## Core Statistical Function - -#' @title Pairwise Cox Proportional Hazards Model Calculation -#' -#' @description Performs a Cox Proportional Hazards model calculation comparing two groups -#' (a reference group and a comparison group). This is an internal function used by -#' \code{\link{h_tbl_coxph_pairwise}}. -#' -#' @param df Data frame for the comparison group. -#' @param .ref_group Data frame for the reference group. -#' @param .in_ref_col Logical, if \code{TRUE} returns empty results (for internal table building). -#' @param .var Character string for the time-to-event variable name. -#' @param is_event Character string for the event status variable name. -#' @param strata Optional character vector of stratification variable names. -#' @param control A list of control parameters from \code{\link{control_coxph}}. -#' @param ... Additional arguments (not used). -#' @keywords internal -#' @return A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), -#' and total counts. -#' @importFrom survival Surv coxph survdiff -#' @importFrom stats as.formula -s_coxph_pairwise <- - function(df, .ref_group, .in_ref_col, .var, is_event, strata = NULL, - control = control_coxph(), ...) { - # ... (function body remains the same) - checkmate::assert_string(.var) - checkmate::assert_numeric(df[[.var]]) - checkmate::assert_logical(df[[is_event]]) - assert_df_with_variables(df, list(tte = .var, is_event = is_event)) - pval_method <- control$pval_method - ties <- control$ties - conf_level <- control$conf_level - if (.in_ref_col) { - # ... (returns empty list for reference column) - return(list(pvalue = with_label( - numeric(), - paste0("p-value (", pval_method, ")") - ), hr = with_label( - numeric(), - "Hazard Ratio" - ), hr_ci = with_label( - numeric(), - f_conf_level(conf_level) - ), hr_ci_3d = with_label( - numeric(), - paste0( - "Hazard Ratio (", f_conf_level(conf_level), - ")" - ) - ), n_tot = with_label( - numeric(), - "Total n" - ), n_tot_events = with_label( - numeric(), - "Total events" - ))) - } - data <- rbind(.ref_group, df) - group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), - levels = c("ref", "x") - ) - df_cox <- data.frame( - tte = data[[.var]], is_event = data[[is_event]], - arm = group - ) - if (is.null(strata)) { - formula_cox <- survival::Surv(tte, is_event) ~ arm - } else { - formula_cox <- stats::as.formula(paste0( - "survival::Surv(tte, is_event) ~ arm + survival::strata(", - paste(strata, collapse = ","), ")" - )) - df_cox <- cbind(df_cox, data[strata]) - } - cox_fit <- survival::coxph( - formula = formula_cox, data = df_cox, - ties = ties - ) - sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE) - orginal_survdiff <- survival::survdiff(formula_cox, data = df_cox) - log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) - pval <- switch(pval_method, - wald = sum_cox$waldtest["pvalue"], - `log-rank` = log_rank_pvalue, - likelihood = sum_cox$logtest["pvalue"] - ) - # Assuming with_label is defined elsewhere - list(pvalue = with_label(unname(pval), paste0( - "p-value (", - pval_method, ")" - )), hr = with_label(sum_cox$conf.int[ - 1, - 1 - ], "Hazard Ratio"), hr_ci = with_label(unname(sum_cox$conf.int[ - 1, - 3:4 - ]), f_conf_level(conf_level)), hr_ci_3d = with_label(c(sum_cox$conf.int[ - 1, - 1 - ], unname(sum_cox$conf.int[1, 3:4])), paste0( - "Hazard Ratio (", - f_conf_level(conf_level), ")" - )), n_tot = with_label( - sum_cox$n, - "Total n" - ), n_tot_events = with_label( - sum_cox$nevent, - "Total events" - )) - } - ## Core Plotting and Annotation Functions @@ -733,4 +616,3 @@ annot_at_risk <- function(gg_plt, fit_km, font_size = 10, annot_at_risk_title = ) gg_plt } - diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd new file mode 100644 index 00000000..2061c737 --- /dev/null +++ b/man/get_cox_pairwise_tbl.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gkm.R +\name{get_cox_pairwise_tbl} +\alias{get_cox_pairwise_tbl} +\title{Perform Pairwise Cox Proportional Hazards Regression} +\usage{ +get_cox_pairwise_tbl(model_formula, data, arm, ref_group = NULL) +} +\arguments{ +\item{model_formula}{A \code{\link[stats]{formula}} object specifying the survival model, typically in the form \code{Surv(time, status) ~ arm + covariates}.} + +\item{data}{A \code{\link[base]{data.frame}} containing the survival data, including time, status, and the arm variable.} + +\item{arm}{A character string specifying the name of the column in \code{data} that contains the grouping/treatment arm variable (must be a factor-like variable).} + +\item{ref_group}{A character string specifying the level of the \code{arm} variable to be used as the \strong{reference group} for all pairwise comparisons. If \code{NULL} (the default), the \strong{first unique level} of the \code{arm} column is used as the reference group.} +} +\value{ +A \code{\link[base]{data.frame}} with the results of the pairwise comparisons. The columns include: +\itemize{ +\item \code{arm}: The comparison arm being tested against the reference group. +\item \code{hr}: The Hazard Ratio (HR) for the comparison arm vs. the reference arm, formatted to two decimal places. +\item \code{ci}: The 95\\% confidence interval for the HR, presented as a string in the format "(lower, upper)", with values formatted to two decimal places. +\item \code{pval}: The log-rank p-value for the comparison. +} +} +\description{ +This function performs a pairwise comparison of treatment arms using the \strong{Cox proportional hazards model} and calculates the corresponding \strong{log-rank p-value}. Each comparison is made between a specified reference group and all other comparison groups in the dataset. +} +\details{ +The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. +} +\examples{ +\dontrun{ +# Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), +# and 'arm' is the treatment group) +# library(survival) +# data(lung) +# lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +# lung$status <- lung$status - 1 # Convert status to 0/1 +# lung <- na.omit(lung) + +ormula <- Surv(time, status) ~ arm +results_tbl <- get_cox_pairwise_tbl(model_formula = formula, +data = lung, +arm = "arm", +ref_group = "A") + print(results_tbl) +} +} diff --git a/man/h_tbl_coxph_pairwise.Rd b/man/h_tbl_coxph_pairwise.Rd deleted file mode 100644 index ee4add72..00000000 --- a/man/h_tbl_coxph_pairwise.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{h_tbl_coxph_pairwise} -\alias{h_tbl_coxph_pairwise} -\title{Pairwise Cox Proportional Hazards Model Summary Table} -\usage{ -h_tbl_coxph_pairwise( - df, - variables, - ref_group_coxph = NULL, - control_coxph_pw = control_coxph(), - annot_coxph_ref_lbls = FALSE -) -} -\arguments{ -\item{df}{A data frame containing the survival data.} - -\item{variables}{A named list specifying the column names for time-to-event (\code{tte}), -treatment arm (\code{arm}), event status (\code{is_event}), and optional strata (\code{strata}) in \code{df}.} - -\item{ref_group_coxph}{An optional string specifying the reference group for the Cox-PH model. -If \code{NULL}, the first factor level of the arm variable is used as the reference group.} - -\item{control_coxph_pw}{A list of control parameters for the Cox-PH model, typically -generated by \code{\link{control_coxph}}, controlling the p-value method, ties handling, and confidence level.} - -\item{annot_coxph_ref_lbls}{A logical flag indicating whether to append "vs. ref group" -to the row names in the resulting table.} -} -\value{ -A data frame summarizing the pairwise Cox-PH results, including Hazard Ratio (HR), -its confidence interval, and the p-value. -} -\description{ -This function computes and formats the results of a pairwise Cox Proportional -Hazards (Cox-PH) regression analysis between different treatment arms. -} diff --git a/man/obj_label-set.Rd b/man/obj_label-set.Rd deleted file mode 100644 index f566f8c6..00000000 --- a/man/obj_label-set.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/from_formatters.R -\name{obj_label<-} -\alias{obj_label<-} -\title{The new label} -\usage{ -obj_label(obj) <- value -} -\arguments{ -\item{value}{character(1). The new label} -} -\description{ -The new label -} diff --git a/man/s_coxph_pairwise.Rd b/man/s_coxph_pairwise.Rd deleted file mode 100644 index 39a44de0..00000000 --- a/man/s_coxph_pairwise.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gkm.R -\name{s_coxph_pairwise} -\alias{s_coxph_pairwise} -\title{Pairwise Cox Proportional Hazards Model Calculation} -\usage{ -s_coxph_pairwise( - df, - .ref_group, - .in_ref_col, - .var, - is_event, - strata = NULL, - control = control_coxph(), - ... -) -} -\arguments{ -\item{df}{Data frame for the comparison group.} - -\item{.ref_group}{Data frame for the reference group.} - -\item{.in_ref_col}{Logical, if \code{TRUE} returns empty results (for internal table building).} - -\item{.var}{Character string for the time-to-event variable name.} - -\item{is_event}{Character string for the event status variable name.} - -\item{strata}{Optional character vector of stratification variable names.} - -\item{control}{A list of control parameters from \code{\link{control_coxph}}.} - -\item{...}{Additional arguments (not used).} -} -\value{ -A list containing the p-value, Hazard Ratio (HR), confidence interval (HR\_CI), -and total counts. -} -\description{ -Performs a Cox Proportional Hazards model calculation comparing two groups -(a reference group and a comparison group). This is an internal function used by -\code{\link{h_tbl_coxph_pairwise}}. -} -\keyword{internal} From 44b191ca8570220f1a8f218e0d3fd73ac737a50f Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:11:03 +0800 Subject: [PATCH 12/25] update --- R/gkm.R | 57 +++++++++++++++++++++---------------- man/get_cox_pairwise_tbl.Rd | 26 ++++++++--------- 2 files changed, 45 insertions(+), 38 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index 57dfc77e..0bbefbab 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -1,3 +1,7 @@ +f_conf_level <- function(conf_level) { + # assert_proportion_value(conf_level) # Assuming assert_proportion_value is defined elsewhere + paste0(conf_level * 100, "% CI") +} control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) { # assert_proportion_value(x) @@ -176,48 +180,51 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @export #' #' @examples -#' \dontrun{ #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) -#' # library(survival) -#' # data(lung) -#' # lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -#' # lung$status <- lung$status - 1 # Convert status to 0/1 -#' # lung <- na.omit(lung) +#' library(survival) +#' data(lung) +#' lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +#' lung$status <- lung$status - 1 # Convert status to 0/1 +#' lung <- na.omit(lung) #' -#' ormula <- Surv(time, status) ~ arm -#' results_tbl <- get_cox_pairwise_tbl(model_formula = formula, -#' data = lung, -#' arm = "arm", -#' ref_group = "A") -#' print(results_tbl) -#' } -get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL){ +#' formula <- Surv(time, status) ~ arm +#' results_tbl <- get_cox_pairwise_tbl( +#' model_formula = formula, +#' data = lung, +#' arm = "arm", +#' ref_group = "A" +#' ) +#' print(results_tbl) +get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { ref_group <- if (!is.null(ref_group)) ref_group else unique(data[[arm]])[1] comp_group <- setdiff(unique(data[[arm]]), ref_group) ret <- c() - for (current_arm in comp_group){ + for (current_arm in comp_group) { comp_df <- data[data[[arm]] %in% c(ref_group, current_arm), ] suppressWarnings( - coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary()) + coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() + ) orginal_survdiff <- survdiff(formula = model_formula, data = comp_df) log_rank_pvalue <- 1 - stats::pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - - 1) + 1) current_row <- data.frame( - arm = current_arm, - hr = sprintf("%.2f", coxph_ans$conf.int[1,1]), - ci = paste0("(", - sprintf("%.2f", coxph_ans$conf.int[1,3]), - ", ", - sprintf("%.2f", coxph_ans$conf.int[1,4]), - ")"), + hr = sprintf("%.2f", coxph_ans$conf.int[1, 1]), + ci = paste0( + "(", + sprintf("%.2f", coxph_ans$conf.int[1, 3]), + ", ", + sprintf("%.2f", coxph_ans$conf.int[1, 4]), + ")" + ), pval = log_rank_pvalue ) + rownames(current_row) <- current_arm ret <- rbind(ret, current_row) } - return (ret) + return(ret) } diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd index 2061c737..ff355067 100644 --- a/man/get_cox_pairwise_tbl.Rd +++ b/man/get_cox_pairwise_tbl.Rd @@ -31,20 +31,20 @@ This function performs a pairwise comparison of treatment arms using the \strong The function iterates through each unique arm (excluding the reference group), filters the data to include only the current comparison arm and the reference arm, and then fits a Cox model (\code{\link[survival]{coxph}}) and performs a log-rank test (\code{\link[survival]{survdiff}}). The Hazard Ratio and its 95\\% confidence interval are extracted from the Cox model summary, and the p-value is calculated from the log-rank test. } \examples{ -\dontrun{ # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), # and 'arm' is the treatment group) -# library(survival) -# data(lung) -# lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -# lung$status <- lung$status - 1 # Convert status to 0/1 -# lung <- na.omit(lung) +library(survival) +data(lung) +lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) +lung$status <- lung$status - 1 # Convert status to 0/1 +lung <- na.omit(lung) -ormula <- Surv(time, status) ~ arm -results_tbl <- get_cox_pairwise_tbl(model_formula = formula, -data = lung, -arm = "arm", -ref_group = "A") - print(results_tbl) -} +formula <- Surv(time, status) ~ arm +results_tbl <- get_cox_pairwise_tbl( + model_formula = formula, + data = lung, + arm = "arm", + ref_group = "A" +) +print(results_tbl) } From 770c10b4abb2349e23d2485d9b14a0e3dfabb3a1 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:13:10 +0800 Subject: [PATCH 13/25] updat epkgdown --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 0494b77d..ef2fb0c1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -43,7 +43,7 @@ reference: - label_roche - title: "g km plot" contents: - - h_tbl_coxph_pairwise + - get_cox_pairwise_tbl - h_data_plot - g_km - annot_surv_med From 9c2ef035c4098025b67e5b30576b07530a9dbd7b Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:17:42 +0800 Subject: [PATCH 14/25] restyle: --- R/gkm.R | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index 0bbefbab..2ee35001 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -20,7 +20,6 @@ control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = T } - #' @title Convert Data Frame to ggplot2 Table Graphic #' #' @description Creates a \code{ggplot2} object that renders a data frame as a table graphic. @@ -285,7 +284,6 @@ h_data_plot <- function(fit_km, } - ## Core Plotting and Annotation Functions #' @title Generate a Kaplan-Meier Plot @@ -319,25 +317,26 @@ h_data_plot <- function(fit_km, #' @importFrom rlang .data #' @export g_km <- function( - surv_plot_data, - col = NULL, - lty = NULL, - lwd = 0.5, - censor_show = TRUE, - pch = 3, - size = 2, - max_time = NULL, - xticks = NULL, - xlab = "Days", - yval = c("Survival", "Failure"), - ylab = paste(yval, "Probability"), - ylim = NULL, - title = NULL, - footnotes = NULL, - font_size = 10, - ci_ribbon = FALSE, - legend_pos = NULL, - ggtheme = NULL) { + surv_plot_data, + col = NULL, + lty = NULL, + lwd = 0.5, + censor_show = TRUE, + pch = 3, + size = 2, + max_time = NULL, + xticks = NULL, + xlab = "Days", + yval = c("Survival", "Failure"), + ylab = paste(yval, "Probability"), + ylim = NULL, + title = NULL, + footnotes = NULL, + font_size = 10, + ci_ribbon = FALSE, + legend_pos = NULL, + ggtheme = NULL +) { # ... (function body remains the same) checkmate::assert_data_frame(surv_plot_data, min.cols = 7, min.rows = 1) data <- surv_plot_data From 10e6312160933b934ce34069187741eed31e4a05 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:21:31 +0800 Subject: [PATCH 15/25] update --- DESCRIPTION | 1 + R/gkm.R | 10 ++++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 919d91f5..c812bd4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Imports: ggsurvfit (>= 1.1.0), glue (>= 1.8.0), gt (>= 0.11.1), + labeling, lifecycle, rlang (>= 1.1.5), survival (>= 3.6-4), diff --git a/R/gkm.R b/R/gkm.R index 2ee35001..4f9cb4fa 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -96,6 +96,7 @@ df2gg <- function(df, colwidths = NULL, font_size = 10, col_labels = TRUE, #' @param xticks A numeric vector of specific tick positions, a single number for the interval, or \code{NULL} for auto-calculation. #' @param max_time Optional numeric value specifying the maximum time to consider for tick range. #' @keywords internal +#' @importFrom labeling extended #' @return A numeric vector of x-axis tick positions. h_xticks <- function(data, xticks = NULL, max_time = NULL) { # ... (function body remains the same) @@ -467,8 +468,7 @@ g_km <- function( #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. #' @param fit_km A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data. -#' @param control_annot_surv_med A list of control parameters for the annotation box, -#' typically generated by \code{\link{control_surv_med_annot}}. +#' @param control_annot_surv_med A list of control parameters for the annotation box. #' @param font_size Numeric, base font size for the annotation table. #' #' @return A \code{cowplot} object with the median survival table annotation added. @@ -510,10 +510,8 @@ annot_surv_med <- function(gg_plt, fit_km, control_annot_surv_med = control_surv #' Kaplan-Meier plot using \code{cowplot}. #' #' @param gg_plt A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot. -#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results, -#' typically generated by \code{\link{h_tbl_coxph_pairwise}}. -#' @param control_annot_coxph A list of control parameters for the annotation box, -#' typically generated by \code{\link{control_coxph_annot}}. +#' @param coxph_tbl A data frame containing pre-calculated Cox-PH results. +#' @param control_annot_coxph A list of control parameters for the annotation box. #' @param font_size Numeric, base font size for the annotation table. #' #' @return A \code{cowplot} object with the Cox-PH table annotation added. From 1512115ed63ab9cb157ee7443ce6e49d17e96474 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 14:34:10 +0800 Subject: [PATCH 16/25] update test --- tests/testthat/test-gkm.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index f247c6d9..57448e7d 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -1,22 +1,27 @@ skip_on_cran() anl <- cards::ADTTE |> - dplyr::mutate(is_event = CNSR == 0) %>% - dplyr::mutate(TRTP = as.factor(TRTP)) - -variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + dplyr::mutate(is_event = CNSR == 0) +by = "TRTP" test_that("test gkm() works", { - fit_kmg01 <- survfit(ggsurvfit::Surv_CNSR(AVAL, CNSR) ~ TRTP, anl) - variables <- list(tte = "AVAL", is_event = "is_event", arm = "TRTP") + GROUP_SYM <- rlang::ensym(by) + model_formula <- rlang::new_formula( + lhs = rlang::expr(Surv(AVAL, is_event)), + rhs = rlang::expr(!!GROUP_SYM) + ) + + fit_kmg01 <- survival::survfit(model_formula, anl) + expect_no_error(surv_plot_data <- h_data_plot(fit_kmg01)) expect_no_error( suppressWarnings( - coxph_tbl <- h_tbl_coxph_pairwise( - df = anl, - variables = variables + coxph_tbl <- get_cox_pairwise_tbl( + model_formula, + data = anl, + arm = by ) ) ) From 1e3fa5d1d750f18c5367fe1cb35cc9546db690d1 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 16:07:59 +0800 Subject: [PATCH 17/25] update doc --- NAMESPACE | 1 + R/tbl_null_report.R | 4 +++- man/annot_cox_ph.Rd | 6 ++---- man/annot_surv_med.Rd | 3 +-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 84d07f40..3009e9eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ importFrom(ggplot2,theme_void) importFrom(gtsummary,add_overall) importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) +importFrom(labeling,extended) importFrom(rlang,.data) importFrom(stats,pchisq) importFrom(survival,Surv) diff --git a/R/tbl_null_report.R b/R/tbl_null_report.R index d8adb081..02b7237e 100644 --- a/R/tbl_null_report.R +++ b/R/tbl_null_report.R @@ -11,7 +11,9 @@ #' @export #' @rdname tbl_null_report tbl_null_report <- function( - label = "Null Report: no observations met the reporting criteria for inclusion in this output.") { + label = + "Null Report: no observations met the reporting criteria for inclusion in this output." +) { set_cli_abort_call() # Check input label ---------------------------------------------------------- diff --git a/man/annot_cox_ph.Rd b/man/annot_cox_ph.Rd index cbfce564..f4303738 100644 --- a/man/annot_cox_ph.Rd +++ b/man/annot_cox_ph.Rd @@ -14,11 +14,9 @@ annot_cox_ph( \arguments{ \item{gg_plt}{A \code{ggplot2} or \code{cowplot} object of the Kaplan-Meier plot.} -\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results, -typically generated by \code{\link{h_tbl_coxph_pairwise}}.} +\item{coxph_tbl}{A data frame containing pre-calculated Cox-PH results.} -\item{control_annot_coxph}{A list of control parameters for the annotation box, -typically generated by \code{\link{control_coxph_annot}}.} +\item{control_annot_coxph}{A list of control parameters for the annotation box.} \item{font_size}{Numeric, base font size for the annotation table.} } diff --git a/man/annot_surv_med.Rd b/man/annot_surv_med.Rd index 0e2fa21c..a86b706e 100644 --- a/man/annot_surv_med.Rd +++ b/man/annot_surv_med.Rd @@ -16,8 +16,7 @@ annot_surv_med( \item{fit_km}{A fitted Kaplan-Meier object of class \code{survfit}, used to generate the table data.} -\item{control_annot_surv_med}{A list of control parameters for the annotation box, -typically generated by \code{\link{control_surv_med_annot}}.} +\item{control_annot_surv_med}{A list of control parameters for the annotation box.} \item{font_size}{Numeric, base font size for the annotation table.} } From de896d3db37ba96ec1424d4c1b53ba76a3ab2ec4 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 16:09:45 +0800 Subject: [PATCH 18/25] not using ggsurvfit yet --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c812bd4c..482e2664 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,6 @@ Imports: dplyr (>= 1.1.4), flextable (>= 0.9.7), ggplot2 (>= 4.0.0), - ggsurvfit (>= 1.1.0), glue (>= 1.8.0), gt (>= 0.11.1), labeling, From c65cf110916524dbbbd3dff11b06ab53959c486d Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 16:11:48 +0800 Subject: [PATCH 19/25] restyle test --- tests/testthat/test-gkm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 57448e7d..a797b688 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -2,7 +2,7 @@ skip_on_cran() anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) -by = "TRTP" +by <- "TRTP" test_that("test gkm() works", { GROUP_SYM <- rlang::ensym(by) From 4364b7a9aed31a0ea475962ddc763717d72a4232 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 18:49:00 +0800 Subject: [PATCH 20/25] update assertion --- R/gkm.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/gkm.R b/R/gkm.R index 4f9cb4fa..dc9df6e7 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -197,12 +197,14 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - ref_group <- if (!is.null(ref_group)) ref_group else unique(data[[arm]])[1] - comp_group <- setdiff(unique(data[[arm]]), ref_group) + ref_group <- if (!is.null(ref_group)) ref_group else levels(data[[arm]])[1] + comp_group <- setdiff(levels(data[[arm]]), ref_group) ret <- c() for (current_arm in comp_group) { - comp_df <- data[data[[arm]] %in% c(ref_group, current_arm), ] + subset_arm <- c(ref_group, current_arm) + assertthat::assert_that(length(subset_arm) == 2, msg = "Make sure 2 arms") + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] suppressWarnings( coxph_ans <- coxph(formula = model_formula, data = comp_df) %>% summary() ) From 065ca7dfd6c482c9439dfe2e6ca6642c7176ca83 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 19:35:15 +0800 Subject: [PATCH 21/25] update assertion --- NAMESPACE | 1 + R/gkm.R | 7 +++++-- tests/testthat/test-gkm.R | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3009e9eb..ffbe6a0d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -88,6 +88,7 @@ importFrom(gtsummary,filter_hierarchical) importFrom(gtsummary,sort_hierarchical) importFrom(labeling,extended) importFrom(rlang,.data) +importFrom(rlang,ensym) importFrom(stats,pchisq) importFrom(survival,Surv) importFrom(survival,coxph) diff --git a/R/gkm.R b/R/gkm.R index dc9df6e7..652a0fe9 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -178,7 +178,7 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @importFrom survival coxph Surv survdiff #' @importFrom stats pchisq #' @export -#' +#' @importFrom rlang ensym #' @examples #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) @@ -197,7 +197,10 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - ref_group <- if (!is.null(ref_group)) ref_group else levels(data[[arm]])[1] + msg = paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") + assertthat::assert_that(is.factor(data[[arm]]), msg = msg) + ref_group <- if (!is.null(ref_group)) { + ref_group } else {levels(data[[arm]])[1]} comp_group <- setdiff(levels(data[[arm]]), ref_group) ret <- c() diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index a797b688..3a8e6fbc 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -3,6 +3,11 @@ skip_on_cran() anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) by <- "TRTP" +anl[[by]] = factor(anl[[by]], levels = c( + "Placebo", + "Xanomeline Low Dose", + "Xanomeline High Dose" +)) test_that("test gkm() works", { GROUP_SYM <- rlang::ensym(by) From 889c21a6415b4b01098740f7720443719d946d9e Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 19:57:53 +0800 Subject: [PATCH 22/25] update test --- tests/testthat/test-gkm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 3a8e6fbc..3ff407fb 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -10,7 +10,7 @@ anl[[by]] = factor(anl[[by]], levels = c( )) test_that("test gkm() works", { - GROUP_SYM <- rlang::ensym(by) + GROUP_SYM <- rlang::sym(by) model_formula <- rlang::new_formula( lhs = rlang::expr(Surv(AVAL, is_event)), rhs = rlang::expr(!!GROUP_SYM) From afebed3eb4304e8c494061ca016a6d4591b2dd4c Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 20:36:49 +0800 Subject: [PATCH 23/25] update --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/gkm.R | 18 +++++++++++------- man/get_cox_pairwise_tbl.Rd | 10 +++++----- tests/testthat/test-gkm.R | 2 +- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 482e2664..4de481a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,8 @@ BugReports: https://github.com/insightsengineering/crane/issues Depends: gtsummary (>= 2.4.0.9009), R (>= 4.2) -Imports: +Imports: + assertthat (>= 0.2.1), broom (>= 1.0.8), cards (>= 0.7.0), cardx (>= 0.3.0), diff --git a/NAMESPACE b/NAMESPACE index ffbe6a0d..c7c28f2a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(tbl_survfit_times) export(theme_gtsummary_roche) import(glue) import(rlang) +importFrom(asserthat,assert_that) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) diff --git a/R/gkm.R b/R/gkm.R index 652a0fe9..e5fb67f5 100644 --- a/R/gkm.R +++ b/R/gkm.R @@ -179,28 +179,32 @@ h_tbl_median_surv <- function(fit_km, armval = "All") { #' @importFrom stats pchisq #' @export #' @importFrom rlang ensym +#' @importFrom assertthat assert_that #' @examples #' # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), #' # and 'arm' is the treatment group) #' library(survival) -#' data(lung) -#' lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -#' lung$status <- lung$status - 1 # Convert status to 0/1 -#' lung <- na.omit(lung) +#' use_lung <- lung +#' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +#' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +#' use_lung <- na.omit(use_lung) #' #' formula <- Surv(time, status) ~ arm #' results_tbl <- get_cox_pairwise_tbl( #' model_formula = formula, -#' data = lung, +#' data = use_lung, #' arm = "arm", #' ref_group = "A" #' ) #' print(results_tbl) get_cox_pairwise_tbl <- function(model_formula, data, arm, ref_group = NULL) { - msg = paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") + msg <- paste0(rlang::ensym(data), "[['", rlang::ensym(arm), "']] is not a factor") assertthat::assert_that(is.factor(data[[arm]]), msg = msg) ref_group <- if (!is.null(ref_group)) { - ref_group } else {levels(data[[arm]])[1]} + ref_group + } else { + levels(data[[arm]])[1] + } comp_group <- setdiff(levels(data[[arm]]), ref_group) ret <- c() diff --git a/man/get_cox_pairwise_tbl.Rd b/man/get_cox_pairwise_tbl.Rd index ff355067..7ebe55ab 100644 --- a/man/get_cox_pairwise_tbl.Rd +++ b/man/get_cox_pairwise_tbl.Rd @@ -34,15 +34,15 @@ The function iterates through each unique arm (excluding the reference group), f # Example data setup (assuming 'time' is event time, 'status' is event indicator (1=event), # and 'arm' is the treatment group) library(survival) -data(lung) -lung$arm <- factor(sample(c("A", "B", "C"), nrow(lung), replace = TRUE)) -lung$status <- lung$status - 1 # Convert status to 0/1 -lung <- na.omit(lung) +use_lung <- lung +use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) +use_lung$status <- use_lung$status - 1 # Convert status to 0/1 +use_lung <- na.omit(use_lung) formula <- Surv(time, status) ~ arm results_tbl <- get_cox_pairwise_tbl( model_formula = formula, - data = lung, + data = use_lung, arm = "arm", ref_group = "A" ) diff --git a/tests/testthat/test-gkm.R b/tests/testthat/test-gkm.R index 3ff407fb..94671a23 100644 --- a/tests/testthat/test-gkm.R +++ b/tests/testthat/test-gkm.R @@ -3,7 +3,7 @@ skip_on_cran() anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) by <- "TRTP" -anl[[by]] = factor(anl[[by]], levels = c( +anl[[by]] <- factor(anl[[by]], levels = c( "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose" From 8f52316875bd6c3ff9322966f7b760736ba334a5 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 20:40:53 +0800 Subject: [PATCH 24/25] update --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index c7c28f2a..7d04acf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,7 +42,7 @@ export(tbl_survfit_times) export(theme_gtsummary_roche) import(glue) import(rlang) -importFrom(asserthat,assert_that) +importFrom(assertthat,assert_that) importFrom(broom,tidy) importFrom(cowplot,draw_plot) importFrom(cowplot,ggdraw) From d5a79b79658e9d06fb3fd87ac5f378a1ad86f517 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 1 Dec 2025 22:33:24 +0800 Subject: [PATCH 25/25] manual vbump --- DESCRIPTION | 2 +- NEWS.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4de481a7..422c6878 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: crane Title: Supplements the 'gtsummary' Package for Pharmaceutical Reporting -Version: 0.2.0.9014 +Version: 0.2.0.9015 Authors@R: c( person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0862-2018")), diff --git a/NEWS.md b/NEWS.md index 17a76e1d..a5922ec3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# crane 0.2.0.9014 +# crane 0.2.0.9015 + +* Added `g_km()` function for creating Kaplan-Meier plots. * Added `list("assign_summary_type-arg:cat_threshold" = 0L)` to `theme_gtsummary_roche()`. Numeric variables with few levels will no longer default to summary type `'categorical'` in `gtsummary::tbl_summary()` and `tbl_roche_summary()`. (#79)