From fc5724e7eed00432a17d6d6d9d7def1cc603f847 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 24 Oct 2025 15:42:33 +0200 Subject: [PATCH 01/63] Take charge of parsing and evaluation --- crates/amalthea/src/error.rs | 2 +- crates/ark/src/interface.rs | 276 +++++++++++++++++++++---------- crates/ark/tests/kernel.rs | 105 +++++++++++- crates/harp/src/object.rs | 13 ++ crates/harp/src/parser/srcref.rs | 36 ++++ crates/libr/src/r.rs | 12 +- 6 files changed, 344 insertions(+), 100 deletions(-) diff --git a/crates/amalthea/src/error.rs b/crates/amalthea/src/error.rs index 99fdf15fa..c71a9aa59 100644 --- a/crates/amalthea/src/error.rs +++ b/crates/amalthea/src/error.rs @@ -228,6 +228,6 @@ impl From> for Error { macro_rules! anyhow { ($($rest: expr),*) => {{ let message = anyhow::anyhow!($($rest, )*); - crate::error::Error::Anyhow(message) + $crate::error::Error::Anyhow(message) }} } diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index dcc26f0fc..6b2e6c747 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -73,6 +73,8 @@ use harp::object::RObject; use harp::r_symbol; use harp::routines::r_register_routines; use harp::session::r_traceback; +use harp::srcref::get_block_srcrefs; +use harp::srcref::get_srcref; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; use harp::R_MAIN_THREAD_ID; @@ -127,7 +129,6 @@ use crate::srcref::ark_uri; use crate::srcref::ns_populate_srcref; use crate::srcref::resource_loaded_namespaces; use crate::startup; -use crate::strings::lines; use crate::sys::console::console_to_utf8; use crate::ui::UiCommMessage; use crate::ui::UiCommSender; @@ -235,7 +236,7 @@ pub struct RMain { pub positron_ns: Option, - pending_lines: Vec, + pending_inputs: Option, /// Banner output accumulated during startup, but set to `None` after we complete /// the initialization procedure and forward the banner on @@ -269,6 +270,52 @@ pub struct RMain { debug_session_index: u32, } +struct PendingInputs { + exprs: RObject, + srcrefs: RObject, + len: isize, + index: isize, +} + +impl PendingInputs { + pub(crate) fn new(exprs: RObject, srcrefs: RObject) -> Option { + let len = exprs.length(); + let index = 0; + + if len == 0 { + return None; + } + + Some(Self { + exprs, + srcrefs, + len, + index, + }) + } + + pub(crate) fn is_empty(&self) -> bool { + self.index >= self.len + } + + pub(crate) fn pop(&mut self) -> Option { + if self.index >= self.len { + return None; + } + + let srcref = get_srcref(self.srcrefs.sexp, self.index); + let expr = harp::r_list_get(self.exprs.sexp, self.index); + + self.index += 1; + Some(PendingInput { expr, srcref }) + } +} + +pub(crate) struct PendingInput { + expr: RObject, + srcref: RObject, +} + /// Represents the currently active execution request from the frontend. It /// resolves at the next invocation of the `ReadConsole()` frontend method. struct ActiveReadConsoleRequest { @@ -323,8 +370,9 @@ pub enum ConsoleInput { Input(String), } -pub enum ConsoleResult { +pub(crate) enum ConsoleResult { NewInput, + NewPendingInput(PendingInput), Interrupt, Disconnected, Error(amalthea::Error), @@ -617,7 +665,6 @@ impl RMain { pending_futures: HashMap::new(), session_mode, positron_ns: None, - pending_lines: Vec::new(), banner: None, r_error_buffer: None, captured_output: String::new(), @@ -625,6 +672,7 @@ impl RMain { debug_last_stack: vec![], debug_env: None, debug_session_index: 1, + pending_inputs: None, } } @@ -747,6 +795,13 @@ impl RMain { let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); + // An incomplete prompt when we no longer have any inputs to send should + // never happen because we check for incomplete inputs ahead of time and + // respond to the frontend with an error. + if info.incomplete { + unreachable!("Incomplete input in `ReadConsole` handler"); + } + // Upon entering read-console, finalize any debug call text that we were capturing. // At this point, the user can either advance the debugger, causing us to capture // a new expression, or execute arbitrary code, where we will reuse a finalized @@ -1021,18 +1076,19 @@ impl RMain { } } - // An incomplete prompt when we no longer have any inputs to send should - // never happen because we check for incomplete inputs ahead of time and - // respond to the frontend with an error. - if info.incomplete && self.pending_lines.is_empty() { - unreachable!("Incomplete input in `ReadConsole` handler"); - } - - // Next check if we have any pending lines. If we do, we are in the middle of - // evaluating a multi line selection, so immediately write the next line into R's buffer. - // The active request remains active. - if let Some(console_result) = self.handle_pending_line(buf, buflen) { - return Some(console_result); + if let Some(input) = self.pop_pending() { + if info.browser { + if let Ok(sym) = harp::RSymbol::new(input.expr.sexp) { + let sym = String::from(sym); + let debug_commands = + vec!["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; + if debug_commands.contains(&&sym[..]) { + Self::on_console_input(buf, buflen, sym).unwrap(); + } + return Some(ConsoleResult::NewInput); + } + } + return Some(ConsoleResult::NewPendingInput(input)); } // Finally, check if we have an active request from a previous `read_console()` @@ -1125,29 +1181,107 @@ impl RMain { } } - // If the input is invalid (e.g. incomplete), don't send it to R - // at all, reply with an error right away - if let Err(err) = Self::check_console_input(code.as_str()) { - return Some(ConsoleResult::Error(err)); + if let Err(err) = self.read(&code) { + return Some(ConsoleResult::Error(amalthea::anyhow!("{err:?}"))); } - // Split input by lines, retrieve first line, and store - // remaining lines in a buffer. This helps with long inputs - // because R has a fixed input buffer size of 4096 bytes at the - // time of writing. - let code = self.buffer_console_input(code.as_str()); - - // Store input in R's buffer and return sentinel indicating some - // new input is ready - match Self::on_console_input(buf, buflen, code) { - Ok(()) => Some(ConsoleResult::NewInput), - Err(err) => Some(ConsoleResult::Error(err)), - } + self.handle_active_request(info, buf, buflen) }, + ConsoleInput::EOF => Some(ConsoleResult::Disconnected), } } + fn read(&mut self, input: &str) -> anyhow::Result<()> { + let status = match harp::parse_status(&harp::ParseInput::Text(input)) { + Err(err) => { + // Failed to even attempt to parse the input, something is seriously wrong + // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. + return Err(anyhow!("Failed to parse input: {err:?}")); + }, + Ok(status) => status, + }; + + // - Incomplete inputs put R into a state where it expects more input that will never come, so we + // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. + // - Complete statements are obviously fine. + // - Syntax errors will cause R to throw an error, which is expected. + let exprs = match status { + harp::ParseResult::Complete(exprs) => exprs, + harp::ParseResult::Incomplete => { + return Err(anyhow!("Can't execute incomplete input:\n{input}")); + }, + harp::ParseResult::SyntaxError { message, .. } => { + return Err(anyhow!("Syntax error: {message}")); + }, + }; + + let srcrefs = get_block_srcrefs(exprs.sexp); + + self.pending_inputs = PendingInputs::new(exprs, srcrefs); + Ok(()) + } + + fn pop_pending(&mut self) -> Option { + let Some(pending_inputs) = self.pending_inputs.as_mut() else { + return None; + }; + + let Some(input) = pending_inputs.pop() else { + // TODO! Don't like this + self.pending_inputs = None; + return None; + }; + + if pending_inputs.is_empty() { + self.pending_inputs = None; + } + + Some(input) + } + + // SAFETY: Call this from a POD frame. Inputs must be protected. + unsafe fn eval_pending( + &mut self, + expr: libr::SEXP, + srcref: libr::SEXP, + buf: *mut c_uchar, + buflen: c_int, + ) -> Option<()> { + // SAFETY: This may jump in case of error, keep this POD + unsafe { + // The global source reference is stored in this global variable by + // the R REPL before evaluation. We do the same here. + libr::set(libr::R_Srcref, srcref); + + // Evaluate the expression. Beware: this may throw an R longjump. + let value = libr::Rf_eval(expr, R_ENVS.global); + libr::Rf_protect(value); + + // Store in the base environment for robust access from (almost) any + // evaluation environment. We only require the presence of `::` so + // we can reach into base. Note that unlike regular environments + // which are stored in pairlists or hash tables, the base environment + // is stored in the `value` field of symbols, i.e. their "CDR". + libr::SETCDR(r_symbol!(".ark_last_value"), value); + + libr::Rf_unprotect(1); + value + }; + + // Back in business, Rust away + let code = if unsafe { libr::get(libr::R_Visible) == 1 } { + String::from("base::.ark_last_value") + } else { + String::from("base::invisible(base::.ark_last_value)") + }; + + // Unwrap safety: The input always fits in the buffer + Self::on_console_input(buf, buflen, code).unwrap(); + + Some(()) + } + /// Handle an `input_request` received outside of an `execute_request` context /// /// We believe it is always invalid to receive an `input_request` that isn't @@ -1545,63 +1679,6 @@ impl RMain { self.get_ui_comm_tx().is_some() } - fn handle_pending_line(&mut self, buf: *mut c_uchar, buflen: c_int) -> Option { - if self.error_occurred { - // If an error has occurred, we've already sent a complete expression that resulted in - // an error. Flush the remaining lines and return to `read_console()`, who will handle - // that error. - self.pending_lines.clear(); - return None; - } - - let Some(input) = self.pending_lines.pop() else { - // No pending lines - return None; - }; - - match Self::on_console_input(buf, buflen, input) { - Ok(()) => Some(ConsoleResult::NewInput), - Err(err) => Some(ConsoleResult::Error(err)), - } - } - - fn check_console_input(input: &str) -> amalthea::Result<()> { - let status = unwrap!(harp::parse_status(&harp::ParseInput::Text(input)), Err(err) => { - // Failed to even attempt to parse the input, something is seriously wrong - return Err(Error::InvalidConsoleInput(format!( - "Failed to parse input: {err:?}" - ))); - }); - - // - Incomplete inputs put R into a state where it expects more input that will never come, so we - // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. - // - Complete statements are obviously fine. - // - Syntax errors will cause R to throw an error, which is expected. - match status { - harp::ParseResult::Incomplete => Err(Error::InvalidConsoleInput(format!( - "Can't execute incomplete input:\n{input}" - ))), - harp::ParseResult::Complete(_) => Ok(()), - harp::ParseResult::SyntaxError { .. } => Ok(()), - } - } - - fn buffer_console_input(&mut self, input: &str) -> String { - // Split into lines and reverse them to be able to `pop()` from the front - let mut lines: Vec = lines(input).rev().map(String::from).collect(); - - // SAFETY: There is always at least one line because: - // - `lines("")` returns 1 element containing `""` - // - `lines("\n")` returns 2 elements containing `""` - let first = lines.pop().unwrap(); - - // No-op if `lines` is empty - assert!(self.pending_lines.is_empty()); - self.pending_lines.append(&mut lines); - - first - } - /// Copy console input into R's internal input buffer /// /// Supposedly `buflen` is "the maximum length, in bytes, including the @@ -1918,7 +1995,7 @@ impl RMain { // https://github.com/posit-dev/positron/issues/1881 // Handle last expression - if r_main.pending_lines.is_empty() { + if r_main.pending_inputs.is_none() { r_main.autoprint_output.push_str(&content); return; } @@ -2300,8 +2377,26 @@ pub extern "C-unwind" fn r_read_console( // destructors. We're longjumping from here in case of interrupt. match result { + ConsoleResult::NewPendingInput(input) => { + let PendingInput { expr, srcref } = input; + + unsafe { + let expr = expr.into_protected(); + let srcref = srcref.into_protected(); + + match main.eval_pending(expr, srcref, buf, buflen) { + None => todo!(), + Some(()) => Some(ConsoleResult::NewInput), + }; + + libr::Rf_unprotect(2); + return 1; + } + }, + ConsoleResult::NewInput => return 1, ConsoleResult::Disconnected => return 0, + ConsoleResult::Interrupt => { log::trace!("Interrupting `ReadConsole()`"); unsafe { @@ -2312,6 +2407,7 @@ pub extern "C-unwind" fn r_read_console( log::error!("`Rf_onintr()` did not longjump"); return 0; }, + ConsoleResult::Error(err) => { main.propagate_error(anyhow::anyhow!("{err}")); }, diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 042142cec..450f86b89 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -418,6 +418,9 @@ fn test_execute_request_multiple_expressions() { fn test_execute_request_single_line_buffer_overflow() { let frontend = DummyArkFrontend::lock(); + // This used to fail back when we were passing inputs down to the REPL from + // our `ReadConsole` handler. Below is the old test description for posterity. + // The newlines do matter for what we are testing here, // due to how we internally split by newlines. We want // to test that the `aaa`s result in an immediate R error, @@ -430,16 +433,10 @@ fn test_execute_request_single_line_buffer_overflow() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't pass console input on to R")); + assert!(frontend.recv_iopub_execute_result().contains(&aaa)); frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } #[test] @@ -590,6 +587,98 @@ fn test_stdin_from_menu() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +// Can debug the base environment (parent is the empty environment) +#[test] +fn test_browser_in_base_env() { + let frontend = DummyArkFrontend::lock(); + + let code = "evalq(browser(), baseenv())"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Inside `evalq()` we aren't at top level, so this comes as an iopub stream + // and not an execute result + frontend.recv_iopub_stream_stdout("Called from: evalq(browser(), baseenv())\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // While paused in the debugger, evaluate a simple expression + let code = "1 + 1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("[1] 2\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + +// The minimal environment we can debug in: access to base via `::`. This might +// be a problem for very specialised sandboxing environment, but they can +// temporarily add `::` while debugging. +#[test] +fn test_browser_in_sandboxing_environment() { + let frontend = DummyArkFrontend::lock(); + + let code = " +env <- new.env(parent = emptyenv()) +env$`::` <- `::` +evalq(base::browser(), env)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Inside `evalq()` we aren't at top level, so this comes as an iopub stream + // and not an execute result + frontend.recv_iopub_stream_stdout("Called from: evalq(base::browser(), env)\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // While paused in the debugger, evaluate a simple expression that only + // requires `::` + let code = "base::list"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("function (...) .Primitive(\"list\")\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_env_vars() { // These environment variables are set by R's shell script frontend. diff --git a/crates/harp/src/object.rs b/crates/harp/src/object.rs index 3daa1e0f1..5ce34a236 100644 --- a/crates/harp/src/object.rs +++ b/crates/harp/src/object.rs @@ -238,6 +238,10 @@ pub fn r_list_poke(x: SEXP, i: R_xlen_t, value: SEXP) { } } +pub fn r_list_get(x: SEXP, i: R_xlen_t) -> RObject { + unsafe { RObject::new(VECTOR_ELT(x, i)) } +} + pub fn r_lgl_begin(x: SEXP) -> *mut i32 { unsafe { LOGICAL(x) } } @@ -321,6 +325,15 @@ impl RObject { } } + /// Consume the `RObject` and return the underlying `SEXP`, protected via + /// `Rf_protect`. + pub unsafe fn into_protected(self) -> SEXP { + unsafe { + libr::Rf_protect(self.sexp); + } + self.sexp + } + pub fn view(data: SEXP) -> Self { RObject { sexp: data, diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index 91562fd94..65e7f58c0 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -153,6 +153,42 @@ impl TryFrom<&harp::CharacterVector> for SrcFile { } } +pub fn get_srcref(srcrefs: libr::SEXP, ind: isize) -> RObject { + if crate::r_is_null(srcrefs) { + return RObject::null(); + } + + if harp::r_length(srcrefs) <= ind { + return RObject::null(); + } + + let result = harp::list_get(srcrefs, ind); + + if crate::r_is_null(result) { + return RObject::null(); + } + + if unsafe { libr::TYPEOF(result) as u32 } != libr::INTSXP { + return RObject::null(); + } + + if harp::r_length(result) < 6 { + return RObject::null(); + } + + RObject::new(result) +} + +pub fn get_block_srcrefs(call: libr::SEXP) -> RObject { + let srcrefs = unsafe { libr::Rf_getAttrib(call, libr::R_SrcrefSymbol) }; + + if unsafe { libr::TYPEOF(srcrefs) as u32 } == libr::VECSXP { + return RObject::new(srcrefs); + } + + RObject::null() +} + #[cfg(test)] mod tests { use std::ops::Range; diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index eb927e48c..1c1b15663 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -135,7 +135,7 @@ functions::generate! { pub fn Rf_cons(arg1: SEXP, arg2: SEXP) -> SEXP; - pub fn Rf_defineVar(arg1: SEXP, arg2: SEXP, arg3: SEXP); + pub fn Rf_defineVar(sym: SEXP, value: SEXP, env: SEXP); pub fn Rf_eval(arg1: SEXP, arg2: SEXP) -> SEXP; @@ -617,6 +617,14 @@ constant_globals::generate! { #[default = std::ptr::null_mut()] pub static R_TripleColonSymbol: SEXP; + #[doc = "\"srcfile\""] + #[default = std::ptr::null_mut()] + pub static R_SrcfileSymbol: SEXP; + + #[doc = "\"srcref\""] + #[default = std::ptr::null_mut()] + pub static R_SrcrefSymbol: SEXP; + #[doc = "\"tsp\""] #[default = std::ptr::null_mut()] pub static R_TspSymbol: SEXP; @@ -689,6 +697,8 @@ mutable_globals::generate! { pub static mut R_Srcref: SEXP; + pub static mut R_Visible: Rboolean; + // ----------------------------------------------------------------------------------- // Unix From c5a83651d4f4f850a157bdc19ef11d2e2dbe7380 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 29 Oct 2025 12:27:14 +0100 Subject: [PATCH 02/63] More caller tracking --- crates/amalthea/src/fixtures/dummy_frontend.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 137e453a2..8d427a9d9 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -236,6 +236,7 @@ impl DummyFrontend { id } + #[track_caller] pub fn recv(socket: &Socket) -> Message { // It's important to wait with a timeout because the kernel thread might have // panicked, preventing it from sending the expected message. The tests would then @@ -254,16 +255,19 @@ impl DummyFrontend { } /// Receives a Jupyter message from the Shell socket + #[track_caller] pub fn recv_shell(&self) -> Message { Self::recv(&self.shell_socket) } /// Receives a Jupyter message from the IOPub socket + #[track_caller] pub fn recv_iopub(&self) -> Message { Self::recv(&self.iopub_socket) } /// Receives a Jupyter message from the Stdin socket + #[track_caller] pub fn recv_stdin(&self) -> Message { Self::recv(&self.stdin_socket) } From 99dfd7b6bb6c1d8ad07ffad4fc4d1fc043b46fbd Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 09:51:10 +0100 Subject: [PATCH 03/63] Consolidate debugger states --- crates/ark/src/interface.rs | 108 ++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 47 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 6b2e6c747..8e8e191fe 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -792,21 +792,30 @@ impl RMain { buflen: c_int, _hist: c_int, ) -> ConsoleResult { + // Upon entering read-console, finalize any debug call text that we were capturing. + // At this point, the user can either advance the debugger, causing us to capture + // a new expression, or execute arbitrary code, where we will reuse a finalized + // debug call text to maintain the debug state. + self.dap.finalize_call_text(); + let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); - // An incomplete prompt when we no longer have any inputs to send should - // never happen because we check for incomplete inputs ahead of time and - // respond to the frontend with an error. + // Since we parse expressions ourselves and only send complete inputs to + // the base REPL, we never should be asked for completing input if info.incomplete { unreachable!("Incomplete input in `ReadConsole` handler"); } - // Upon entering read-console, finalize any debug call text that we were capturing. - // At this point, the user can either advance the debugger, causing us to capture - // a new expression, or execute arbitrary code, where we will reuse a finalized - // debug call text to maintain the debug state. - self.dap.finalize_call_text(); + // Invariant: If we detect a browser prompt, `self.dap.is_debugging()` + // is true. Otherwise it is false. + if info.browser { + // Start or continue debugging with the `debug_preserve_focus` hint + // from the last expression we evaluated + self.start_debug(self.debug_preserve_focus); + } else if self.dap.is_debugging() { + self.stop_debug(); + } // We get called here everytime R needs more input. This handler // represents the driving event of a small state machine that manages @@ -856,21 +865,13 @@ impl RMain { // often. We'd still push a `DidChangeConsoleInputs` notification from // here, but only containing high-level information such as `search()` // contents and `ls(rho)`. - if !info.browser && !info.incomplete && !info.input_request { + if !self.dap.is_debugging() && !info.input_request { self.refresh_lsp(); } // Signal prompt EVENTS.console_prompt.emit(()); - if info.browser { - self.start_debug(); - } else { - if self.dap.is_debugging() { - self.stop_debug(); - } - } - let mut select = crossbeam::channel::Select::new(); // Cloning is necessary to avoid a double mutable borrow error @@ -1077,24 +1078,53 @@ impl RMain { } if let Some(input) = self.pop_pending() { - if info.browser { + // Default: preserve current focus for evaluated expressions. + // This only has an effect if we're debugging. + // https://github.com/posit-dev/positron/issues/3151 + self.debug_preserve_focus = true; + + if self.dap.is_debugging() { + // Try to interpret this pending input as a symbol (debug commands + // are entered as symbols). Whether or not it parses as a symbol, + // if we're currently debugging we must set `debug_preserve_focus`. if let Ok(sym) = harp::RSymbol::new(input.expr.sexp) { + // All debug commands as documented in `?browser` + const DEBUG_COMMANDS: &[&str] = + &["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; + + // The subset of debug commands that continue execution + const DEBUG_COMMANDS_CONTINUE: &[&str] = &["n", "f", "c", "cont"]; + let sym = String::from(sym); - let debug_commands = - vec!["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; - if debug_commands.contains(&&sym[..]) { + + if DEBUG_COMMANDS.contains(&&sym[..]) { + if DEBUG_COMMANDS_CONTINUE.contains(&&sym[..]) { + // For continue-like commands, we do not preserve focus, + // i.e. we let the cursor jump to the stopped + // position. Set the preserve focus hint for the + // next iteration of ReadConsole. + self.debug_preserve_focus = false; + + // Let the DAP client know that execution is now continuing + self.dap.send_dap(DapBackendEvent::Continued); + } + + // All debug commands are forwarded to the base REPL as + // is so that R can interpret them. + // Unwrap safety: A debug command fits in the buffer. Self::on_console_input(buf, buflen, sym).unwrap(); + return Some(ConsoleResult::NewInput); } - return Some(ConsoleResult::NewInput); } } + return Some(ConsoleResult::NewPendingInput(input)); } - // Finally, check if we have an active request from a previous `read_console()` - // iteration. If so, we `take()` and clear the `active_request` as we're about - // to complete it and send a reply to unblock the active Shell - // request. + // If we get here we finished evaluating all pending inputs. Check if we + // have an active request from a previous `read_console()` iteration. If + // so, we `take()` and clear the `active_request` as we're about to + // complete it and send a reply to unblock the active Shell request. if let Some(req) = std::mem::take(&mut self.active_request) { // FIXME: Race condition between the comm and shell socket threads. // @@ -1117,7 +1147,8 @@ impl RMain { self.reply_execute_request(req, &info); } - // Prepare for the next user input + // Fall through Ark's ReadConsole event loop while waiting for the next + // execution request None } @@ -1167,20 +1198,6 @@ impl RMain { match input { ConsoleInput::Input(code) => { - // Handle commands for the debug interpreter - if self.dap.is_debugging() { - let continue_cmds = vec!["n", "f", "c", "cont"]; - if continue_cmds.contains(&&code[..]) { - // We're stepping so we want to focus the next location we stop at - self.debug_preserve_focus = false; - self.dap.send_dap(DapBackendEvent::Continued); - } else { - // The user is evaluating some other expression so preserve current focus - // https://github.com/posit-dev/positron/issues/3151 - self.debug_preserve_focus = true; - } - } - if let Err(err) = self.read(&code) { return Some(ConsoleResult::Error(amalthea::anyhow!("{err:?}"))); } @@ -1324,7 +1341,7 @@ impl RMain { return ConsoleResult::Error(Error::InvalidInputRequest(message)); } - fn start_debug(&mut self) { + fn start_debug(&mut self, debug_preserve_focus: bool) { match self.dap.stack_info() { Ok(stack) => { if let Some(frame) = stack.first() { @@ -1351,11 +1368,8 @@ impl RMain { let fallback_sources = self.load_fallback_sources(&stack); self.debug_last_stack = stack_id; - self.dap.start_debug( - stack, - same_stack && self.debug_preserve_focus, - fallback_sources, - ); + self.dap + .start_debug(stack, same_stack && debug_preserve_focus, fallback_sources); }, Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), }; From 5ebe1a4f269e6848c10719b28e1557f7419541b2 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 11:15:42 +0100 Subject: [PATCH 04/63] Extract `handle_input_request()` --- crates/ark/src/interface.rs | 102 +++++++++++++----------------------- 1 file changed, 35 insertions(+), 67 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 8e8e191fe..ba5f3a467 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -817,43 +817,11 @@ impl RMain { self.stop_debug(); } - // We get called here everytime R needs more input. This handler - // represents the driving event of a small state machine that manages - // communication between R and the frontend. In the following order: - // - // - If we detect an input request prompt, then we forward the request - // on to the frontend and then fall through to the event loop to wait - // on the input reply. - // - // - If the vector of pending lines is not empty, R might be waiting for - // us to complete an incomplete expression, or we might just have - // completed an intermediate expression (e.g. from an ExecuteRequest - // like `foo\nbar` where `foo` is intermediate and `bar` is final). - // Send the next line to R. - // - // - If the vector of pending lines is empty, and if the prompt is for - // new R code, we close the active ExecuteRequest and send an - // ExecuteReply to the frontend. We then fall through to the event - // loop to wait for more input. - // - // This state machine depends on being able to reliably distinguish - // between readline prompts (from `readline()`, `scan()`, or `menu()`), - // and actual R code prompts (either top-level or from a nested debug - // REPL). A readline prompt should never change our state (in - // particular our vector of pending inputs). We think we are making this - // distinction sufficiently robustly but ideally R would let us know the - // prompt type so there is no ambiguity at all. - // - // R might throw an error at any time while we are working on our vector - // of pending lines, either from a syntax error or from an evaluation - // error. When this happens, we abort evaluation and clear the pending - // lines. - // - // If the vector of pending lines is empty and we detect an incomplete - // prompt, this is a panic. We check ahead of time for complete - // expressions before breaking up an ExecuteRequest in multiple lines, - // so this should not happen. - if let Some(console_result) = self.handle_active_request(&info, buf, buflen) { + if info.input_request { + if let Some(input) = self.handle_input_request(&info.input_prompt, buf, buflen) { + return input; + } + } else if let Some(console_result) = self.handle_active_request(&info, buf, buflen) { return console_result; }; @@ -1047,36 +1015,6 @@ impl RMain { buf: *mut c_uchar, buflen: c_int, ) -> Option { - // TODO: Can we remove this below code? - // If the prompt begins with "Save workspace", respond with (n) - // and allow R to immediately exit. - // - // NOTE: Should be able to overwrite the `Cleanup` frontend method. - // This would also help with detecting normal exits versus crashes. - if info.input_prompt.starts_with("Save workspace") { - match Self::on_console_input(buf, buflen, String::from("n")) { - Ok(()) => return Some(ConsoleResult::NewInput), - Err(err) => return Some(ConsoleResult::Error(err)), - } - } - - // First check if we are inside request for user input, like a `readline()` or `menu()`. - // It's entirely possible that we still have more pending lines, but an intermediate line - // put us into an `input_request` state. We must respond to that request before processing - // the rest of the pending lines. - if info.input_request { - if let Some(req) = &self.active_request { - // Send request to frontend. We'll wait for an `input_reply` - // from the frontend in the event loop in `read_console()`. - // The active request remains active. - self.request_input(req.originator.clone(), info.input_prompt.to_string()); - return None; - } else { - // Invalid input request, propagate error to R - return Some(self.handle_invalid_input_request(buf, buflen)); - } - } - if let Some(input) = self.pop_pending() { // Default: preserve current focus for evaluated expressions. // This only has an effect if we're debugging. @@ -1239,6 +1177,36 @@ impl RMain { Ok(()) } + /// Handles user input requests (e.g., readline, menu) and special prompts. + /// Returns `Some()` if this handler needs to return to the base R REPL, or + /// `None` if it needs to run Ark's `ReadConsole` event loop. + fn handle_input_request( + &mut self, + input_prompt: &str, + buf: *mut c_uchar, + buflen: c_int, + ) -> Option { + // If the prompt begins with "Save workspace", respond with (n) + // and allow R to immediately exit. + if input_prompt.starts_with("Save workspace") { + match Self::on_console_input(buf, buflen, String::from("n")) { + Ok(()) => return Some(ConsoleResult::NewInput), + Err(err) => return Some(ConsoleResult::Error(err)), + } + } + + if let Some(req) = &self.active_request { + // Send request to frontend. We'll wait for an `input_reply` + // from the frontend in the event loop in `read_console()`. + // The active request remains active. + self.request_input(req.originator.clone(), String::from(input_prompt)); + None + } else { + // Invalid input request, propagate error to R + Some(self.handle_invalid_input_request(buf, buflen)) + } + } + fn pop_pending(&mut self) -> Option { let Some(pending_inputs) = self.pending_inputs.as_mut() else { return None; From 6b4d28588164c3262a791b26b8ccb0c34dbc7799 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 11:47:04 +0100 Subject: [PATCH 05/63] Extract `handle_pending_input()` --- crates/ark/src/interface.rs | 127 ++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 58 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index ba5f3a467..71b96282b 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -821,9 +821,13 @@ impl RMain { if let Some(input) = self.handle_input_request(&info.input_prompt, buf, buflen) { return input; } - } else if let Some(console_result) = self.handle_active_request(&info, buf, buflen) { - return console_result; - }; + } else if let Some(input) = self.pop_pending() { + // Evaluate pending expression if there is any remaining + return self.handle_pending_input(input, buf, buflen); + } else { + // Otherwise close active request + self.handle_active_request(&info); + } // In the future we'll also send browser information, see // https://github.com/posit-dev/positron/issues/3001. Currently this is @@ -1009,56 +1013,7 @@ impl RMain { /// Returns: /// - `None` if we should fall through to the event loop to wait for more user input /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` - fn handle_active_request( - &mut self, - info: &PromptInfo, - buf: *mut c_uchar, - buflen: c_int, - ) -> Option { - if let Some(input) = self.pop_pending() { - // Default: preserve current focus for evaluated expressions. - // This only has an effect if we're debugging. - // https://github.com/posit-dev/positron/issues/3151 - self.debug_preserve_focus = true; - - if self.dap.is_debugging() { - // Try to interpret this pending input as a symbol (debug commands - // are entered as symbols). Whether or not it parses as a symbol, - // if we're currently debugging we must set `debug_preserve_focus`. - if let Ok(sym) = harp::RSymbol::new(input.expr.sexp) { - // All debug commands as documented in `?browser` - const DEBUG_COMMANDS: &[&str] = - &["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; - - // The subset of debug commands that continue execution - const DEBUG_COMMANDS_CONTINUE: &[&str] = &["n", "f", "c", "cont"]; - - let sym = String::from(sym); - - if DEBUG_COMMANDS.contains(&&sym[..]) { - if DEBUG_COMMANDS_CONTINUE.contains(&&sym[..]) { - // For continue-like commands, we do not preserve focus, - // i.e. we let the cursor jump to the stopped - // position. Set the preserve focus hint for the - // next iteration of ReadConsole. - self.debug_preserve_focus = false; - - // Let the DAP client know that execution is now continuing - self.dap.send_dap(DapBackendEvent::Continued); - } - - // All debug commands are forwarded to the base REPL as - // is so that R can interpret them. - // Unwrap safety: A debug command fits in the buffer. - Self::on_console_input(buf, buflen, sym).unwrap(); - return Some(ConsoleResult::NewInput); - } - } - } - - return Some(ConsoleResult::NewPendingInput(input)); - } - + fn handle_active_request(&mut self, info: &PromptInfo) { // If we get here we finished evaluating all pending inputs. Check if we // have an active request from a previous `read_console()` iteration. If // so, we `take()` and clear the `active_request` as we're about to @@ -1084,10 +1039,6 @@ impl RMain { // back to Idle. self.reply_execute_request(req, &info); } - - // Fall through Ark's ReadConsole event loop while waiting for the next - // execution request - None } fn handle_execute_request( @@ -1136,11 +1087,22 @@ impl RMain { match input { ConsoleInput::Input(code) => { + // Parse input into pending expressions if let Err(err) = self.read(&code) { return Some(ConsoleResult::Error(amalthea::anyhow!("{err:?}"))); } - self.handle_active_request(info, buf, buflen) + // Evaluate first expression if we got one + if let Some(input) = self.pop_pending() { + return Some(self.handle_pending_input(input, buf, buflen)); + } + + // Otherwise we got an empty input, e.g. `""` and there's + // nothing to do. Close active request. + self.handle_active_request(info); + + // And return to event loop + None }, ConsoleInput::EOF => Some(ConsoleResult::Disconnected), @@ -1207,6 +1169,55 @@ impl RMain { } } + fn handle_pending_input( + &mut self, + input: PendingInput, + buf: *mut c_uchar, + buflen: c_int, + ) -> ConsoleResult { + // Default: preserve current focus for evaluated expressions. + // This only has an effect if we're debugging. + // https://github.com/posit-dev/positron/issues/3151 + self.debug_preserve_focus = true; + + if self.dap.is_debugging() { + // Try to interpret this pending input as a symbol (debug commands + // are entered as symbols). Whether or not it parses as a symbol, + // if we're currently debugging we must set `debug_preserve_focus`. + if let Ok(sym) = harp::RSymbol::new(input.expr.sexp) { + // All debug commands as documented in `?browser` + const DEBUG_COMMANDS: &[&str] = + &["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; + + // The subset of debug commands that continue execution + const DEBUG_COMMANDS_CONTINUE: &[&str] = &["n", "f", "c", "cont"]; + + let sym = String::from(sym); + + if DEBUG_COMMANDS.contains(&&sym[..]) { + if DEBUG_COMMANDS_CONTINUE.contains(&&sym[..]) { + // For continue-like commands, we do not preserve focus, + // i.e. we let the cursor jump to the stopped + // position. Set the preserve focus hint for the + // next iteration of ReadConsole. + self.debug_preserve_focus = false; + + // Let the DAP client know that execution is now continuing + self.dap.send_dap(DapBackendEvent::Continued); + } + + // All debug commands are forwarded to the base REPL as + // is so that R can interpret them. + // Unwrap safety: A debug command fits in the buffer. + Self::on_console_input(buf, buflen, sym).unwrap(); + return ConsoleResult::NewInput; + } + } + } + + ConsoleResult::NewPendingInput(input) + } + fn pop_pending(&mut self) -> Option { let Some(pending_inputs) = self.pending_inputs.as_mut() else { return None; From f5dd8dfda44af5082cd843973137fb67b23d89ad Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 11:59:17 +0100 Subject: [PATCH 06/63] Rename `finalize_call_text()` to `handle_read_console()` --- crates/ark/src/dap/dap_r_main.rs | 8 ++++++-- crates/ark/src/interface.rs | 8 ++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/ark/src/dap/dap_r_main.rs b/crates/ark/src/dap/dap_r_main.rs index 3b9d96a70..00db8ed3f 100644 --- a/crates/ark/src/dap/dap_r_main.rs +++ b/crates/ark/src/dap/dap_r_main.rs @@ -141,7 +141,7 @@ impl RMainDap { self.debugging = false; } - pub fn handle_stdout(&mut self, content: &str) { + pub fn handle_write_console(&mut self, content: &str) { if let DebugCallText::Capturing(ref mut call_text) = self.call_text { // Append to current expression if we are currently capturing stdout call_text.push_str(content); @@ -164,7 +164,11 @@ impl RMainDap { } } - pub fn finalize_call_text(&mut self) { + pub fn handle_read_console(&mut self) { + // Upon entering read-console, finalize any debug call text that we were capturing. + // At this point, the user can either advance the debugger, causing us to capture + // a new expression, or execute arbitrary code, where we will reuse a finalized + // debug call text to maintain the debug state. match &self.call_text { // If not debugging, nothing to do. DebugCallText::None => (), diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 71b96282b..48cbdc9f6 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -792,11 +792,7 @@ impl RMain { buflen: c_int, _hist: c_int, ) -> ConsoleResult { - // Upon entering read-console, finalize any debug call text that we were capturing. - // At this point, the user can either advance the debugger, causing us to capture - // a new expression, or execute arbitrary code, where we will reuse a finalized - // debug call text to maintain the debug state. - self.dap.finalize_call_text(); + self.dap.handle_read_console(); let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); @@ -1945,7 +1941,7 @@ impl RMain { // To capture the current `debug: ` output, for use in the debugger's // match based fallback - r_main.dap.handle_stdout(&content); + r_main.dap.handle_write_console(&content); let stream = if otype == 0 { Stream::Stdout From 4e7c625b74d30c7294ff4c195f043dc38c114064 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 12:11:58 +0100 Subject: [PATCH 07/63] Make `read()` a constructor method on `PendingInputs` --- crates/ark/src/interface.rs | 73 ++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 48cbdc9f6..9ed1e584c 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -278,20 +278,45 @@ struct PendingInputs { } impl PendingInputs { - pub(crate) fn new(exprs: RObject, srcrefs: RObject) -> Option { + pub(crate) fn read(input: &str) -> anyhow::Result> { + let status = match harp::parse_status(&harp::ParseInput::Text(input)) { + Err(err) => { + // Failed to even attempt to parse the input, something is seriously wrong + // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. + return Err(anyhow!("Failed to parse input: {err:?}")); + }, + Ok(status) => status, + }; + + // - Incomplete inputs put R into a state where it expects more input that will never come, so we + // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. + // - Complete statements are obviously fine. + // - Syntax errors will cause R to throw an error, which is expected. + let exprs = match status { + harp::ParseResult::Complete(exprs) => exprs, + harp::ParseResult::Incomplete => { + return Err(anyhow!("Can't execute incomplete input:\n{input}")); + }, + harp::ParseResult::SyntaxError { message, .. } => { + return Err(anyhow!("Syntax error: {message}")); + }, + }; + + let srcrefs = get_block_srcrefs(exprs.sexp); + let len = exprs.length(); let index = 0; if len == 0 { - return None; + return Ok(None); } - Some(Self { + Ok(Some(Self { exprs, srcrefs, len, index, - }) + })) } pub(crate) fn is_empty(&self) -> bool { @@ -1084,11 +1109,14 @@ impl RMain { match input { ConsoleInput::Input(code) => { // Parse input into pending expressions - if let Err(err) = self.read(&code) { - return Some(ConsoleResult::Error(amalthea::anyhow!("{err:?}"))); + match PendingInputs::read(&code) { + Ok(inputs) => { + self.pending_inputs = inputs; + }, + Err(err) => return Some(ConsoleResult::Error(amalthea::anyhow!("{err:?}"))), } - // Evaluate first expression if we got one + // Evaluate first expression if there is one if let Some(input) = self.pop_pending() { return Some(self.handle_pending_input(input, buf, buflen)); } @@ -1105,36 +1133,6 @@ impl RMain { } } - fn read(&mut self, input: &str) -> anyhow::Result<()> { - let status = match harp::parse_status(&harp::ParseInput::Text(input)) { - Err(err) => { - // Failed to even attempt to parse the input, something is seriously wrong - // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. - return Err(anyhow!("Failed to parse input: {err:?}")); - }, - Ok(status) => status, - }; - - // - Incomplete inputs put R into a state where it expects more input that will never come, so we - // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. - // - Complete statements are obviously fine. - // - Syntax errors will cause R to throw an error, which is expected. - let exprs = match status { - harp::ParseResult::Complete(exprs) => exprs, - harp::ParseResult::Incomplete => { - return Err(anyhow!("Can't execute incomplete input:\n{input}")); - }, - harp::ParseResult::SyntaxError { message, .. } => { - return Err(anyhow!("Syntax error: {message}")); - }, - }; - - let srcrefs = get_block_srcrefs(exprs.sexp); - - self.pending_inputs = PendingInputs::new(exprs, srcrefs); - Ok(()) - } - /// Handles user input requests (e.g., readline, menu) and special prompts. /// Returns `Some()` if this handler needs to return to the base R REPL, or /// `None` if it needs to run Ark's `ReadConsole` event loop. @@ -1220,7 +1218,6 @@ impl RMain { }; let Some(input) = pending_inputs.pop() else { - // TODO! Don't like this self.pending_inputs = None; return None; }; From bb8f71e93425e20f2ce11beaf89911b250128088 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 14:58:20 +0100 Subject: [PATCH 08/63] Refactor console error and result handling --- crates/ark/src/errors.rs | 11 ++ crates/ark/src/interface.rs | 300 +++++++++++++++++------------------- crates/ark/tests/kernel.rs | 6 +- 3 files changed, 160 insertions(+), 157 deletions(-) diff --git a/crates/ark/src/errors.rs b/crates/ark/src/errors.rs index ba36a76b2..0b98aee32 100644 --- a/crates/ark/src/errors.rs +++ b/crates/ark/src/errors.rs @@ -5,6 +5,8 @@ // // +use harp::exec::r_peek_error_buffer; +use harp::exec::RE_STACK_OVERFLOW; use harp::object::RObject; use harp::r_symbol; use harp::session::r_format_traceback; @@ -67,3 +69,12 @@ unsafe extern "C-unwind" fn ps_rust_backtrace() -> anyhow::Result { let trace = format!("{trace}"); Ok(*RObject::from(trace)) } + +pub(crate) fn stack_overflow_occurred() -> bool { + // Error handlers are not called on stack overflow so the error flag + // isn't set. Instead we detect stack overflows by peeking at the error + // buffer. The message is explicitly not translated to save stack space + // so the matching should be reliable. + let err_buf = r_peek_error_buffer(); + RE_STACK_OVERFLOW.is_match(&err_buf) +} diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 9ed1e584c..87f00141e 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -64,7 +64,6 @@ use harp::exec::r_peek_error_buffer; use harp::exec::r_sandbox; use harp::exec::RFunction; use harp::exec::RFunctionExt; -use harp::exec::RE_STACK_OVERFLOW; use harp::library::RLibraries; use harp::line_ending::convert_line_endings; use harp::line_ending::LineEnding; @@ -99,6 +98,7 @@ use crate::dap::dap_r_main::FrameInfoId; use crate::dap::dap_r_main::RMainDap; use crate::dap::Dap; use crate::errors; +use crate::errors::stack_overflow_occurred; use crate::help::message::HelpEvent; use crate::help::r_help::RHelp; use crate::lsp::events::EVENTS; @@ -341,6 +341,12 @@ pub(crate) struct PendingInput { srcref: RObject, } +#[derive(Debug, Clone)] +enum ConsoleValue { + Success(serde_json::Map), + Error(Exception), +} + /// Represents the currently active execution request from the frontend. It /// resolves at the next invocation of the `ReadConsole()` frontend method. struct ActiveReadConsoleRequest { @@ -838,7 +844,13 @@ impl RMain { self.stop_debug(); } - if info.input_request { + if let Some(exception) = self.take_exception() { + // Clear any pending inputs, if any + self.pending_inputs = None; + + // Reply to active request with error + self.handle_active_request(&info, ConsoleValue::Error(exception)); + } else if info.input_request { if let Some(input) = self.handle_input_request(&info.input_prompt, buf, buflen) { return input; } @@ -847,7 +859,8 @@ impl RMain { return self.handle_pending_input(input, buf, buflen); } else { // Otherwise close active request - self.handle_active_request(&info); + let result = self.take_result(); + self.handle_active_request(&info, ConsoleValue::Success(result)); } // In the future we'll also send browser information, see @@ -1026,6 +1039,92 @@ impl RMain { }; } + /// Take result from `self.autoprint_output` and R's `.Last.value` object + fn take_result(&mut self) -> serde_json::Map { + // TODO: Implement rich printing of certain outputs. + // Will we need something similar to the RStudio model, + // where we implement custom print() methods? Or can + // we make the stub below behave sensibly even when + // streaming R output? + let mut data = serde_json::Map::new(); + + // The output generated by autoprint is emitted as an + // `execute_result` message. + let mut autoprint = std::mem::take(&mut self.autoprint_output); + + if autoprint.ends_with('\n') { + // Remove the trailing newlines that R adds to outputs but that + // Jupyter frontends are not expecting. Is it worth taking a + // mutable self ref across calling methods to avoid the clone? + autoprint.pop(); + } + if autoprint.len() != 0 { + data.insert("text/plain".to_string(), json!(autoprint)); + } + + // Include HTML representation of data.frame + unsafe { + let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); + if r_is_data_frame(value) { + match to_html(value) { + Ok(html) => { + data.insert("text/html".to_string(), json!(html)); + }, + Err(err) => { + log::error!("{:?}", err); + }, + }; + } + } + + data + } + + fn take_exception(&mut self) -> Option { + let mut exception = if self.error_occurred { + // Reset flag + self.error_occurred = false; + + // We don't fill out `ename` with anything meaningful because typically + // R errors don't have names. We could consider using the condition class + // here, which r-lib/tidyverse packages have been using more heavily. + Exception { + ename: String::from(""), + evalue: self.error_message.clone(), + traceback: self.error_traceback.clone(), + } + } else if stack_overflow_occurred() { + // Call `base::traceback()` since we don't have a handled error + // object carrying a backtrace. This won't be formatted as a + // tree which is just as well since the recursive calls would + // push a tree too far to the right. + let traceback = r_traceback(); + + // Reset error buffer so we don't display this message again + let _ = RFunction::new("base", "stop").call(); + + Exception { + ename: String::from(""), + evalue: r_peek_error_buffer(), + traceback, + } + } else { + return None; + }; + + // Jupyter clients typically discard the `evalue` when a `traceback` is + // present. Jupyter-Console even disregards `evalue` in all cases. So + // include it here if we are in Notebook mode. But should Positron + // implement similar behaviour as the other frontends eventually? The + // first component of `traceback` could be compared to `evalue` and + // discarded from the traceback if the same. + if let SessionMode::Notebook = self.session_mode { + exception.traceback.insert(0, exception.evalue.clone()); + } + + Some(exception) + } + fn read_console_cleanup(&mut self) { // The debug environment is only valid while R is idle self.debug_env = None; @@ -1034,7 +1133,7 @@ impl RMain { /// Returns: /// - `None` if we should fall through to the event loop to wait for more user input /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` - fn handle_active_request(&mut self, info: &PromptInfo) { + fn handle_active_request(&mut self, info: &PromptInfo, value: ConsoleValue) { // If we get here we finished evaluating all pending inputs. Check if we // have an active request from a previous `read_console()` iteration. If // so, we `take()` and clear the `active_request` as we're about to @@ -1058,7 +1157,9 @@ impl RMain { // Let frontend know the last request is complete. This turns us // back to Idle. - self.reply_execute_request(req, &info); + self.reply_execute_request(req, &info, value); + } else { + log::info!("No active request to handle, discarding: {value:?}"); } } @@ -1103,9 +1204,6 @@ impl RMain { }, }; - // Clear error flag - self.error_occurred = false; - match input { ConsoleInput::Input(code) => { // Parse input into pending expressions @@ -1123,7 +1221,7 @@ impl RMain { // Otherwise we got an empty input, e.g. `""` and there's // nothing to do. Close active request. - self.handle_active_request(info); + self.handle_active_request(info, ConsoleValue::Success(Default::default())); // And return to event loop None @@ -1723,142 +1821,56 @@ impl RMain { // Reply to the previously active request. The current prompt type and // whether an error has occurred defines the reply kind. - fn reply_execute_request(&mut self, req: ActiveReadConsoleRequest, prompt_info: &PromptInfo) { + fn reply_execute_request( + &mut self, + req: ActiveReadConsoleRequest, + prompt_info: &PromptInfo, + value: ConsoleValue, + ) { let prompt = &prompt_info.input_prompt; - let (reply, result) = if prompt_info.incomplete { - log::trace!("Got prompt {} signaling incomplete request", prompt); - (new_incomplete_reply(&req.request, req.exec_count), None) - } else if prompt_info.input_request { - unreachable!(); - } else { - log::trace!("Got R prompt '{}', completing execution", prompt); + log::trace!("Got R prompt '{}', completing execution", prompt); - self.make_execute_reply_error(req.exec_count) - .unwrap_or_else(|| self.make_execute_reply(req.exec_count)) - }; + let exec_count = req.exec_count; - if let Some(result) = result { - self.iopub_tx.send(result).unwrap(); - } + let (reply, result) = match value { + ConsoleValue::Success(data) => { + let reply = Ok(ExecuteReply { + status: Status::Ok, + execution_count: exec_count, + user_expressions: json!({}), + }); - log::trace!("Sending `execute_reply`: {reply:?}"); - req.reply_tx.send(reply).unwrap(); - } + let result = if data.len() > 0 { + Some(IOPubMessage::ExecuteResult(ExecuteResult { + execution_count: exec_count, + data: serde_json::Value::Object(data), + metadata: json!({}), + })) + } else { + None + }; - fn make_execute_reply_error( - &mut self, - exec_count: u32, - ) -> Option<(amalthea::Result, Option)> { - // Save and reset error occurred flag - let error_occurred = self.error_occurred; - self.error_occurred = false; - - // Error handlers are not called on stack overflow so the error flag - // isn't set. Instead we detect stack overflows by peeking at the error - // buffer. The message is explicitly not translated to save stack space - // so the matching should be reliable. - let err_buf = r_peek_error_buffer(); - let stack_overflow_occurred = RE_STACK_OVERFLOW.is_match(&err_buf); - - // Reset error buffer so we don't display this message again - if stack_overflow_occurred { - let _ = RFunction::new("base", "stop").call(); - } + (reply, result) + }, - // Send the reply to the frontend - if !error_occurred && !stack_overflow_occurred { - return None; - } + ConsoleValue::Error(exception) => { + let reply = Err(amalthea::Error::ShellErrorExecuteReply( + exception.clone(), + exec_count, + )); + let result = IOPubMessage::ExecuteError(ExecuteError { exception }); - // We don't fill out `ename` with anything meaningful because typically - // R errors don't have names. We could consider using the condition class - // here, which r-lib/tidyverse packages have been using more heavily. - let mut exception = if error_occurred { - Exception { - ename: String::from(""), - evalue: self.error_message.clone(), - traceback: self.error_traceback.clone(), - } - } else { - // Call `base::traceback()` since we don't have a handled error - // object carrying a backtrace. This won't be formatted as a - // tree which is just as well since the recursive calls would - // push a tree too far to the right. - let traceback = r_traceback(); - Exception { - ename: String::from(""), - evalue: err_buf.clone(), - traceback, - } + (reply, Some(result)) + }, }; - // Jupyter clients typically discard the `evalue` when a `traceback` is - // present. Jupyter-Console even disregards `evalue` in all cases. So - // include it here if we are in Notebook mode. But should Positron - // implement similar behaviour as the other frontends eventually? The - // first component of `traceback` could be compared to `evalue` and - // discarded from the traceback if the same. - if let SessionMode::Notebook = self.session_mode { - exception.traceback.insert(0, exception.evalue.clone()) - } - - let reply = new_execute_reply_error(exception.clone(), exec_count); - let result = IOPubMessage::ExecuteError(ExecuteError { exception }); - - Some((reply, Some(result))) - } - - fn make_execute_reply( - &mut self, - exec_count: u32, - ) -> (amalthea::Result, Option) { - // TODO: Implement rich printing of certain outputs. - // Will we need something similar to the RStudio model, - // where we implement custom print() methods? Or can - // we make the stub below behave sensibly even when - // streaming R output? - let mut data = serde_json::Map::new(); - - // The output generated by autoprint is emitted as an - // `execute_result` message. - let mut autoprint = std::mem::take(&mut self.autoprint_output); - - if autoprint.ends_with('\n') { - // Remove the trailing newlines that R adds to outputs but that - // Jupyter frontends are not expecting. Is it worth taking a - // mutable self ref across calling methods to avoid the clone? - autoprint.pop(); - } - if autoprint.len() != 0 { - data.insert("text/plain".to_string(), json!(autoprint)); - } - - // Include HTML representation of data.frame - unsafe { - let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); - if r_is_data_frame(value) { - match to_html(value) { - Ok(html) => data.insert("text/html".to_string(), json!(html)), - Err(err) => { - log::error!("{:?}", err); - None - }, - }; - } + if let Some(result) = result { + self.iopub_tx.send(result).unwrap(); } - let reply = new_execute_reply(exec_count); - - let result = (data.len() > 0).then(|| { - IOPubMessage::ExecuteResult(ExecuteResult { - execution_count: exec_count, - data: serde_json::Value::Object(data), - metadata: json!({}), - }) - }); - - (reply, result) + log::trace!("Sending `execute_reply`: {reply:?}"); + req.reply_tx.send(reply).unwrap(); } /// Sends a `Wait` message to IOPub, which responds when the IOPub thread @@ -2288,28 +2300,6 @@ impl RMain { } } -/// Report an incomplete request to the frontend -fn new_incomplete_reply(req: &ExecuteRequest, exec_count: u32) -> amalthea::Result { - let error = Exception { - ename: "IncompleteInput".to_string(), - evalue: format!("Code fragment is not complete: {}", req.code), - traceback: vec![], - }; - Err(amalthea::Error::ShellErrorExecuteReply(error, exec_count)) -} - -fn new_execute_reply(exec_count: u32) -> amalthea::Result { - Ok(ExecuteReply { - status: Status::Ok, - execution_count: exec_count, - user_expressions: json!({}), - }) -} - -fn new_execute_reply_error(error: Exception, exec_count: u32) -> amalthea::Result { - Err(amalthea::Error::ShellErrorExecuteReply(error, exec_count)) -} - /// Converts a data frame to HTML fn to_html(frame: SEXP) -> Result { unsafe { diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 450f86b89..45cef3b58 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -373,11 +373,13 @@ fn test_execute_request_error() { fn test_execute_request_error_multiple_expressions() { let frontend = DummyArkFrontend::lock(); - frontend.send_execute_request("1\nstop('foobar')\n2", ExecuteRequestOptions::default()); + // `print(2)` and `3` are never evaluated + let code = "1\nstop('foobar')\nprint(2)\n3"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, "1\nstop('foobar')\n2"); + assert_eq!(input.code, code); frontend.recv_iopub_stream_stdout("[1] 1\n"); assert!(frontend.recv_iopub_execute_error().contains("foobar")); From a0d2e0b8a5cfe04e8d6bb741d87f54c3e70b0813 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 15:22:48 +0100 Subject: [PATCH 09/63] Cancel pending inputs when we get in the debugger --- crates/ark/src/interface.rs | 6 ++++++ crates/ark/tests/kernel.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 87f00141e..0fbe8ad42 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -840,6 +840,12 @@ impl RMain { // Start or continue debugging with the `debug_preserve_focus` hint // from the last expression we evaluated self.start_debug(self.debug_preserve_focus); + + // Clear any pending inputs, if any. Ideally we'd preserve them and + // run them once the debugging session is over, but that'd require + // keeping a stack of pending expressions and accurately tracking + // the lifetime of nested debug sessions. + self.pending_inputs = None; } else if self.dap.is_debugging() { self.stop_debug(); } diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 45cef3b58..b113081ca 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -350,6 +350,36 @@ fn test_execute_request_browser_stdin() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_pending_cancelled() { + let frontend = DummyArkFrontend::lock(); + + // The `print()` call should be cancelled when we get in the debugger + let code = "browser()\nprint('hello')"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // We don't get any output for "hello" + frontend.recv_iopub_stream_stdout("Called from: top level \n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_execute_request_error() { let frontend = DummyArkFrontend::lock(); From e3cd0ca4c527c7819b4f6051973e59fab66eeaa8 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 30 Oct 2025 15:37:59 +0100 Subject: [PATCH 10/63] Add test for invalid syntax --- crates/ark/tests/kernel.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index b113081ca..063fabeb5 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -129,6 +129,30 @@ fn test_execute_request_incomplete_multiple_lines() { ) } +#[test] +fn test_execute_request_invalid() { + let frontend = DummyArkFrontend::lock(); + + let code = "1 + )"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert_eq!( + frontend.recv_iopub_execute_error(), + "Error:\n\nSyntax error: unexpected ')'" + ); + + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ) +} + #[test] fn test_execute_request_browser() { let frontend = DummyArkFrontend::lock(); From 73bb637caf662bf885b43c427a69638d5753f164 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 11:32:05 +0100 Subject: [PATCH 11/63] Tweak documentation --- .../amalthea/src/fixtures/dummy_frontend.rs | 2 ++ crates/ark/src/interface.rs | 34 +++++++++++-------- crates/ark/tests/kernel.rs | 4 +-- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 8d427a9d9..b6520ab7b 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -247,6 +247,8 @@ impl DummyFrontend { // // Note that the panic hook will still have run to record the panic, so we'll get // expected panic information in the test output. + // + // If you're debugging tests, you'll need to bump this timeout to a large value. if socket.poll_incoming(10000).unwrap() { return Message::read_from_socket(socket).unwrap(); } diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 0fbe8ad42..eb57c89cb 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -270,10 +270,15 @@ pub struct RMain { debug_session_index: u32, } +/// Stack of pending inputs struct PendingInputs { + /// EXPRSXP vector of parsed expressions exprs: RObject, + /// List of srcrefs, the same length as `exprs` srcrefs: RObject, + /// Length of `exprs` and `srcrefs` len: isize, + /// Index into the stack index: isize, } @@ -884,6 +889,8 @@ impl RMain { // Signal prompt EVENTS.console_prompt.emit(()); + // --- Event loop part of ReadConsole + let mut select = crossbeam::channel::Select::new(); // Cloning is necessary to avoid a double mutable borrow error @@ -1145,10 +1152,8 @@ impl RMain { // so, we `take()` and clear the `active_request` as we're about to // complete it and send a reply to unblock the active Shell request. if let Some(req) = std::mem::take(&mut self.active_request) { - // FIXME: Race condition between the comm and shell socket threads. - // - // Perform a refresh of the frontend state - // (Prompts, working directory, etc) + // Perform a refresh of the frontend state (Prompts, working + // directory, etc) self.with_mut_ui_comm_tx(|ui_comm_tx| { let input_prompt = info.input_prompt.clone(); let continuation_prompt = info.continuation_prompt.clone(); @@ -1169,6 +1174,9 @@ impl RMain { } } + // Called from Ark's ReadConsole event loop when we get a new execute + // request. It's not possible to get one while an active request is ongoing + // because of Jupyter's queueing of Shell messages. fn handle_execute_request( &mut self, req: RRequest, @@ -1340,7 +1348,7 @@ impl RMain { srcref: libr::SEXP, buf: *mut c_uchar, buflen: c_int, - ) -> Option<()> { + ) { // SAFETY: This may jump in case of error, keep this POD unsafe { // The global source reference is stored in this global variable by @@ -1371,8 +1379,6 @@ impl RMain { // Unwrap safety: The input always fits in the buffer Self::on_console_input(buf, buflen, code).unwrap(); - - Some(()) } /// Handle an `input_request` received outside of an `execute_request` context @@ -2292,11 +2298,11 @@ impl RMain { } } - fn propagate_error(&mut self, err: anyhow::Error) -> ! { + fn propagate_error(&mut self, message: String) -> ! { // Save error message to `RMain`'s buffer to avoid leaking memory when `Rf_error()` jumps. // Some gymnastics are required to deal with the possibility of `CString` conversion failure // since the error message comes from the frontend and might be corrupted. - self.r_error_buffer = Some(new_cstring(format!("\n{err}"))); + self.r_error_buffer = Some(new_cstring(message)); unsafe { Rf_error(self.r_error_buffer.as_ref().unwrap().as_ptr()) } } @@ -2363,13 +2369,13 @@ pub extern "C-unwind" fn r_read_console( let PendingInput { expr, srcref } = input; unsafe { + // The pointer protection stack is restored by `run_Rmainloop()` + // after a longjump to top-level, so it's safe to protect here + // even if the evaluation throws let expr = expr.into_protected(); let srcref = srcref.into_protected(); - match main.eval_pending(expr, srcref, buf, buflen) { - None => todo!(), - Some(()) => Some(ConsoleResult::NewInput), - }; + main.eval_pending(expr, srcref, buf, buflen); libr::Rf_unprotect(2); return 1; @@ -2391,7 +2397,7 @@ pub extern "C-unwind" fn r_read_console( }, ConsoleResult::Error(err) => { - main.propagate_error(anyhow::anyhow!("{err}")); + main.propagate_error(format!("{err}")); }, }; } diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 063fabeb5..b25aa3d31 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -142,7 +142,7 @@ fn test_execute_request_invalid() { assert_eq!( frontend.recv_iopub_execute_error(), - "Error:\n\nSyntax error: unexpected ')'" + "Error:\nSyntax error: unexpected ')'" ); frontend.recv_iopub_idle(); @@ -257,7 +257,7 @@ fn test_execute_request_browser_incomplete() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - frontend.recv_iopub_stream_stderr("Error: \nCan't execute incomplete input:\n1 +\n"); + frontend.recv_iopub_stream_stderr("Error: Can't execute incomplete input:\n1 +\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); From b8b3f89849587d1ddde1b2bae933098822cd04d2 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 12:05:43 +0100 Subject: [PATCH 12/63] Remove `into_protected()` method --- crates/ark/src/interface.rs | 4 ++-- crates/harp/src/object.rs | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index eb57c89cb..f85fd1884 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -2372,8 +2372,8 @@ pub extern "C-unwind" fn r_read_console( // The pointer protection stack is restored by `run_Rmainloop()` // after a longjump to top-level, so it's safe to protect here // even if the evaluation throws - let expr = expr.into_protected(); - let srcref = srcref.into_protected(); + let expr = libr::Rf_protect(expr.into()); + let srcref = libr::Rf_protect(srcref.into()); main.eval_pending(expr, srcref, buf, buflen); diff --git a/crates/harp/src/object.rs b/crates/harp/src/object.rs index 5ce34a236..8d4f3eb8e 100644 --- a/crates/harp/src/object.rs +++ b/crates/harp/src/object.rs @@ -325,15 +325,6 @@ impl RObject { } } - /// Consume the `RObject` and return the underlying `SEXP`, protected via - /// `Rf_protect`. - pub unsafe fn into_protected(self) -> SEXP { - unsafe { - libr::Rf_protect(self.sexp); - } - self.sexp - } - pub fn view(data: SEXP) -> Self { RObject { sexp: data, From 96d8c7ac9bebcc5b0881395b3fe6bd81c1c356fe Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 12:18:09 +0100 Subject: [PATCH 13/63] Make `reply_execute_request()` a free function --- crates/ark/src/interface.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index f85fd1884..3092615e8 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1168,7 +1168,7 @@ impl RMain { // Let frontend know the last request is complete. This turns us // back to Idle. - self.reply_execute_request(req, &info, value); + Self::reply_execute_request(&self.iopub_tx, req, &info, value); } else { log::info!("No active request to handle, discarding: {value:?}"); } @@ -1834,7 +1834,7 @@ impl RMain { // Reply to the previously active request. The current prompt type and // whether an error has occurred defines the reply kind. fn reply_execute_request( - &mut self, + iopub_tx: &Sender, req: ActiveReadConsoleRequest, prompt_info: &PromptInfo, value: ConsoleValue, @@ -1878,7 +1878,7 @@ impl RMain { }; if let Some(result) = result { - self.iopub_tx.send(result).unwrap(); + iopub_tx.send(result).unwrap(); } log::trace!("Sending `execute_reply`: {reply:?}"); From fe06d3a7f09d79d94f8422fd0a871873c9becbba Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 12:26:32 +0100 Subject: [PATCH 14/63] Create Jupyter exception in the global condition handler --- crates/ark/src/errors.rs | 14 +++++++++++--- crates/ark/src/interface.rs | 25 ++++++------------------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/crates/ark/src/errors.rs b/crates/ark/src/errors.rs index 0b98aee32..960d1b296 100644 --- a/crates/ark/src/errors.rs +++ b/crates/ark/src/errors.rs @@ -5,6 +5,7 @@ // // +use amalthea::wire::exception::Exception; use harp::exec::r_peek_error_buffer; use harp::exec::RE_STACK_OVERFLOW; use harp::object::RObject; @@ -39,9 +40,16 @@ unsafe extern "C-unwind" fn ps_record_error(evalue: SEXP, traceback: SEXP) -> an Vec::::new() }); - main.error_occurred = true; - main.error_message = evalue; - main.error_traceback = traceback; + main.last_error = Some( + // We don't fill out `ename` with anything meaningful because typically + // R errors don't have names. We could consider using the condition class + // here, which r-lib/tidyverse packages have been using more heavily. + Exception { + ename: String::from(""), + evalue, + traceback, + }, + ); Ok(R_NilValue) } diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 3092615e8..96d6e08fa 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -215,10 +215,9 @@ pub struct RMain { /// by forwarding them through the UI comm. Optional, and really Positron specific. ui_comm_tx: Option, - /// Represents whether an error occurred during R code execution. - pub error_occurred: bool, - pub error_message: String, // `evalue` in the Jupyter protocol - pub error_traceback: Vec, + /// Error captured by our global condition handler during the last iteration + /// of the REPL. + pub(crate) last_error: Option, /// Channel to communicate with the Help thread help_event_tx: Option>, @@ -688,9 +687,7 @@ impl RMain { execution_count: 0, autoprint_output: String::new(), ui_comm_tx: None, - error_occurred: false, - error_message: String::new(), - error_traceback: Vec::new(), + last_error: None, help_event_tx: None, help_port: None, lsp_events_tx: None, @@ -1094,18 +1091,8 @@ impl RMain { } fn take_exception(&mut self) -> Option { - let mut exception = if self.error_occurred { - // Reset flag - self.error_occurred = false; - - // We don't fill out `ename` with anything meaningful because typically - // R errors don't have names. We could consider using the condition class - // here, which r-lib/tidyverse packages have been using more heavily. - Exception { - ename: String::from(""), - evalue: self.error_message.clone(), - traceback: self.error_traceback.clone(), - } + let mut exception = if let Some(exception) = self.last_error.take() { + exception } else if stack_overflow_occurred() { // Call `base::traceback()` since we don't have a handled error // object carrying a backtrace. This won't be formatted as a From c50273c75171c5aa58a184f7a84b5e2c784fd4ec Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 12:40:11 +0100 Subject: [PATCH 15/63] Fully remove incomplete prompts heuristic since they are now impossible --- crates/ark/src/interface.rs | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 96d6e08fa..c24890baf 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -392,9 +392,6 @@ pub struct PromptInfo { /// incomplete but is never a user request. browser: bool, - /// Whether the last input didn't fully parse and R is waiting for more input - incomplete: bool, - /// Whether this is a prompt from a fresh REPL iteration (browser or /// top level) or a prompt from some user code, e.g. via `readline()` input_request: bool, @@ -830,12 +827,6 @@ impl RMain { let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); - // Since we parse expressions ourselves and only send complete inputs to - // the base REPL, we never should be asked for completing input - if info.incomplete { - unreachable!("Incomplete input in `ReadConsole` handler"); - } - // Invariant: If we detect a browser prompt, `self.dap.is_debugging()` // is true. Otherwise it is false. if info.browser { @@ -1014,8 +1005,9 @@ impl RMain { let prompt_slice = unsafe { CStr::from_ptr(prompt_c) }; let prompt = prompt_slice.to_string_lossy().into_owned(); + // Sent to the frontend after each top-level command so users can + // customise their prompts let continuation_prompt: String = harp::get_option("continue").try_into().unwrap(); - let matches_continuation = prompt == continuation_prompt; // Detect browser prompt by matching the prompt string // https://github.com/posit-dev/positron/issues/4742. @@ -1023,28 +1015,16 @@ impl RMain { // `options(prompt =, continue = ` to something that looks like // a browser prompt, or doing the same with `readline()`. We have // chosen to not support these edge cases. - // Additionally, we send code to R one line at a time, so even if we are debugging - // it can look like we are in a continuation state. To try and detect that, we - // detect if we matched the continuation prompt while the DAP is active. - let browser = - RE_DEBUG_PROMPT.is_match(&prompt) || (self.dap.is_debugging() && matches_continuation); + let browser = RE_DEBUG_PROMPT.is_match(&prompt); // If there are frames on the stack and we're not in a browser prompt, // this means some user code is requesting input, e.g. via `readline()` let user_request = !browser && n_frame > 0; - // The request is incomplete if we see the continue prompt, except if - // we're in a user request, e.g. `readline("+ ")`. To guard against - // this, we check that we are at top-level (call stack is empty or we - // have a debug prompt). - let top_level = n_frame == 0 || browser; - let incomplete = matches_continuation && top_level; - return PromptInfo { input_prompt: prompt, continuation_prompt, browser, - incomplete, input_request: user_request, }; } From 03aa0b545a29f50a4a8394226ea66a91f9629daf Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 14:13:18 +0100 Subject: [PATCH 16/63] Extract ReadConsole event loop into method --- crates/ark/src/interface.rs | 93 ++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 32 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index c24890baf..ec031a7b6 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -351,6 +351,11 @@ enum ConsoleValue { Error(Exception), } +enum WaitFor { + InputReply, + ExecuteRequest, +} + /// Represents the currently active execution request from the frontend. It /// resolves at the next invocation of the `ReadConsole()` frontend method. struct ActiveReadConsoleRequest { @@ -810,11 +815,16 @@ impl RMain { /// * `prompt` - The prompt shown to the user /// * `buf` - Pointer to buffer to receive the user's input (type `CONSOLE_BUFFER_CHAR`) /// * `buflen` - Size of the buffer to receiver user's input - /// * `hist` - Whether to add the input to the history (1) or not (0) + /// * `_hist` - Whether to add the input to the history (1) or not (0) /// - /// Returns a tuple. First value is to be passed on to `ReadConsole()` and - /// indicates whether new input is available. Second value indicates whether - /// we need to call `Rf_onintr()` to process an interrupt. + /// This does two things: + /// - Move the Console state machine to the next state: + /// - Wait for input + /// - Set an active execute request and a list of pending expressions + /// - Set `self.dap.is_debugging()` depending on presence or absence of debugger prompt + /// - Evaluate next pending expression + /// - Close active execute request if pending list is empty + /// - Run an event loop while waiting for input fn read_console( &mut self, prompt: *const c_char, @@ -824,6 +834,8 @@ impl RMain { ) -> ConsoleResult { self.dap.handle_read_console(); + // State machine part of ReadConsole + let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); @@ -850,14 +862,13 @@ impl RMain { // Reply to active request with error self.handle_active_request(&info, ConsoleValue::Error(exception)); } else if info.input_request { - if let Some(input) = self.handle_input_request(&info.input_prompt, buf, buflen) { - return input; - } + // Request input reply to the frontend and return it to R + return self.handle_input_request(&info, buf, buflen); } else if let Some(input) = self.pop_pending() { // Evaluate pending expression if there is any remaining return self.handle_pending_input(input, buf, buflen); } else { - // Otherwise close active request + // Otherwise reply to active request with accumulated result let result = self.take_result(); self.handle_active_request(&info, ConsoleValue::Success(result)); } @@ -877,8 +888,23 @@ impl RMain { // Signal prompt EVENTS.console_prompt.emit(()); - // --- Event loop part of ReadConsole + self.run_event_loop(&info, buf, buflen, WaitFor::ExecuteRequest) + } + /// Runs the ReadConsole event loop. + /// This handles events for: + /// - Reception of either input replies or execute requests (as determined + /// by `wait_for`) + /// - Idle-time and interrupt-time tasks + /// - Requests from the frontend (currently only used for establishing UI comm) + /// - R's polled events + fn run_event_loop( + &mut self, + info: &PromptInfo, + buf: *mut c_uchar, + buflen: c_int, + wait_for: WaitFor, + ) -> ConsoleResult { let mut select = crossbeam::channel::Select::new(); // Cloning is necessary to avoid a double mutable borrow error @@ -894,8 +920,14 @@ impl RMain { // package. 50ms seems to be more in line with RStudio (posit-dev/positron#7235). let polled_events_rx = crossbeam::channel::tick(Duration::from_millis(50)); - let r_request_index = select.recv(&r_request_rx); - let stdin_reply_index = select.recv(&stdin_reply_rx); + // This is the main kind of message from the frontend that we are expecting. + // We either wait for `input_reply` messages on StdIn, or for + // `execute_request` on Shell. + let (r_request_index, stdin_reply_index) = match wait_for { + WaitFor::ExecuteRequest => (Some(select.recv(&r_request_rx)), None), + WaitFor::InputReply => (None, Some(select.recv(&stdin_reply_rx))), + }; + let kernel_request_index = select.recv(&kernel_request_rx); let tasks_interrupt_index = select.recv(&tasks_interrupt_rx); let polled_events_index = select.recv(&polled_events_rx); @@ -930,17 +962,13 @@ impl RMain { // reset the flag set_interrupts_pending(false); - // FIXME: Race between interrupt and new code request. To fix - // this, we could manage the Shell and Control sockets on the - // common message event thread. The Control messages would need - // to be handled in a blocking way to ensure subscribers are - // notified before the next incoming message is processed. - // First handle execute requests outside of `select` to ensure they // have priority. `select` chooses at random. - if let Ok(req) = r_request_rx.try_recv() { - if let Some(input) = self.handle_execute_request(req, &info, buf, buflen) { - return input; + if let WaitFor::ExecuteRequest = wait_for { + if let Ok(req) = r_request_rx.try_recv() { + if let Some(input) = self.handle_execute_request(req, &info, buf, buflen) { + return input; + } } } @@ -948,7 +976,7 @@ impl RMain { match oper.index() { // We've got an execute request from the frontend - i if i == r_request_index => { + i if Some(i) == r_request_index => { let req = oper.recv(&r_request_rx); let Ok(req) = req else { // The channel is disconnected and empty @@ -961,7 +989,7 @@ impl RMain { }, // We've got a reply for readline - i if i == stdin_reply_index => { + i if Some(i) == stdin_reply_index => { let reply = oper.recv(&stdin_reply_rx).unwrap(); return self.handle_input_reply(reply, buf, buflen); }, @@ -1213,20 +1241,19 @@ impl RMain { } /// Handles user input requests (e.g., readline, menu) and special prompts. - /// Returns `Some()` if this handler needs to return to the base R REPL, or - /// `None` if it needs to run Ark's `ReadConsole` event loop. + /// Runs the ReadConsole event loop until a reply comes in. fn handle_input_request( &mut self, - input_prompt: &str, + info: &PromptInfo, buf: *mut c_uchar, buflen: c_int, - ) -> Option { + ) -> ConsoleResult { // If the prompt begins with "Save workspace", respond with (n) // and allow R to immediately exit. - if input_prompt.starts_with("Save workspace") { + if info.input_prompt.starts_with("Save workspace") { match Self::on_console_input(buf, buflen, String::from("n")) { - Ok(()) => return Some(ConsoleResult::NewInput), - Err(err) => return Some(ConsoleResult::Error(err)), + Ok(()) => return ConsoleResult::NewInput, + Err(err) => return ConsoleResult::Error(err), } } @@ -1234,11 +1261,13 @@ impl RMain { // Send request to frontend. We'll wait for an `input_reply` // from the frontend in the event loop in `read_console()`. // The active request remains active. - self.request_input(req.originator.clone(), String::from(input_prompt)); - None + self.request_input(req.originator.clone(), String::from(&info.input_prompt)); + + // Run the event loop, waiting for stdin replies but not execute requests + self.run_event_loop(info, buf, buflen, WaitFor::InputReply) } else { // Invalid input request, propagate error to R - Some(self.handle_invalid_input_request(buf, buflen)) + self.handle_invalid_input_request(buf, buflen) } } From 38c20909c9853ef271ef1a21f3cc1d4b3bb7dce5 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 16:25:59 +0100 Subject: [PATCH 17/63] Return to base REPL in case of error I couldn't produce any issues with `R_EvalDepth` but it seems more sound to let R reset state before evaluation --- crates/ark/src/interface.rs | 32 ++++++++ crates/ark/src/modules/positron/errors.R | 8 ++ crates/ark/tests/kernel.rs | 94 ++++++++++++++++++++++++ 3 files changed, 134 insertions(+) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index ec031a7b6..0da832284 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -219,6 +219,8 @@ pub struct RMain { /// of the REPL. pub(crate) last_error: Option, + console_need_reset: bool, + /// Channel to communicate with the Help thread help_event_tx: Option>, /// R help port @@ -340,6 +342,7 @@ impl PendingInputs { } } +#[derive(Debug)] pub(crate) struct PendingInput { expr: RObject, srcref: RObject, @@ -407,6 +410,7 @@ pub enum ConsoleInput { Input(String), } +#[derive(Debug)] pub(crate) enum ConsoleResult { NewInput, NewPendingInput(PendingInput), @@ -561,6 +565,9 @@ impl RMain { if let Err(err) = apply_default_repos(default_repos) { log::error!("Error setting default repositories: {err:?}"); } + + // Initialise Ark's last value + libr::SETCDR(r_symbol!(".ark_last_value"), harp::r_null()); } // Now that R has started (emitting any startup messages that we capture in the @@ -708,6 +715,7 @@ impl RMain { debug_env: None, debug_session_index: 1, pending_inputs: None, + console_need_reset: false, } } @@ -2350,6 +2358,29 @@ pub extern "C-unwind" fn r_read_console( hist: c_int, ) -> i32 { let main = RMain::get_mut(); + + // In case of error, we haven't had a chance to evaluate ".ark_last_value". + // So we return to the R REPL to give R a chance to run the state + // restoration that occurs between `R_ReadConsole()` and `eval()`: + // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 + // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 + // + // Technically this also resets time limits (see `base::setTimeLimit()`) but + // these aren't supported in Ark because they cause errors when we poll R + // events. + if main.last_error.is_some() && main.console_need_reset { + main.console_need_reset = false; + + // Evaluate last value so that `base::.Last.value` remains the same + RMain::on_console_input( + buf, + buflen, + String::from("base::invisible(base::.ark_last_value)"), + ) + .unwrap(); + return 1; + } + let result = r_sandbox(|| main.read_console(prompt, buf, buflen, hist)); main.read_console_cleanup(); @@ -2371,6 +2402,7 @@ pub extern "C-unwind" fn r_read_console( let expr = libr::Rf_protect(expr.into()); let srcref = libr::Rf_protect(srcref.into()); + main.console_need_reset = true; main.eval_pending(expr, srcref, buf, buflen); libr::Rf_unprotect(2); diff --git a/crates/ark/src/modules/positron/errors.R b/crates/ark/src/modules/positron/errors.R index 11e082cb5..12263fa76 100644 --- a/crates/ark/src/modules/positron/errors.R +++ b/crates/ark/src/modules/positron/errors.R @@ -33,6 +33,14 @@ #' @export .ps.errors.globalErrorHandler <- function(cnd) { + # Unlike C stack overflow errors, expressions nested too deeply errors allow + # calling handlers. But since we run R code, we need to temporarily bump the + # threshold to give a little room while we handle the error. + if (inherits(cnd, "expressionStackOverflowError")) { + old <- options(expressions = getOption("expressions") + 500) + defer(options(old)) + } + # This reproduces the behaviour of R's default error handler: # - Invoke `getOption("error")` # - Save backtrace for `traceback()` diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index b25aa3d31..db92da3bf 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -423,6 +423,100 @@ fn test_execute_request_error() { ); } +#[test] +fn test_execute_request_error_expressions_overflow() { + let frontend = DummyArkFrontend::lock(); + // Deterministically produce an "evaluation too deeply nested" error + let code = "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); f(100)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend + .recv_iopub_execute_error() + .contains("evaluation nested too deeply")); + + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); + + // Check we can still evaluate without causing another too deeply nested error + let code = "f(10)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + +#[test] +fn test_execute_request_error_expressions_overflow_last_value() { + let frontend = DummyArkFrontend::lock(); + + // Set state and last value + let code = + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); invisible('hello')"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Check last value is set + let code = ".Last.value"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] \"hello\""); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Deterministically produce an "evaluation too deeply nested" error + let code = "f(100)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend + .recv_iopub_execute_error() + .contains("evaluation nested too deeply")); + + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); + + // Check last value is still set + let code = ".Last.value"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] \"hello\""); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_execute_request_error_multiple_expressions() { let frontend = DummyArkFrontend::lock(); From 5699d2f254e97dcd7d02f7547954bd5263711f70 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 17:33:05 +0100 Subject: [PATCH 18/63] Rename `eval_pending()` to `eval()` --- crates/ark/src/interface.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 0da832284..a4ef9c3a7 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1346,7 +1346,7 @@ impl RMain { } // SAFETY: Call this from a POD frame. Inputs must be protected. - unsafe fn eval_pending( + unsafe fn eval( &mut self, expr: libr::SEXP, srcref: libr::SEXP, @@ -2403,7 +2403,7 @@ pub extern "C-unwind" fn r_read_console( let srcref = libr::Rf_protect(srcref.into()); main.console_need_reset = true; - main.eval_pending(expr, srcref, buf, buflen); + main.eval(expr, srcref, buf, buflen); libr::Rf_unprotect(2); return 1; From 115240b6c150d29e987c7784da208aabb35eed9d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 31 Oct 2025 18:00:28 +0100 Subject: [PATCH 19/63] Don't clear pending expressions in browser sessions Would be hard to get right in the case of nested browser sessions --- crates/ark/src/interface.rs | 6 ------ crates/ark/tests/kernel.rs | 39 +++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index a4ef9c3a7..e0865d232 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -853,12 +853,6 @@ impl RMain { // Start or continue debugging with the `debug_preserve_focus` hint // from the last expression we evaluated self.start_debug(self.debug_preserve_focus); - - // Clear any pending inputs, if any. Ideally we'd preserve them and - // run them once the debugging session is over, but that'd require - // keeping a stack of pending expressions and accurately tracking - // the lifetime of nested debug sessions. - self.pending_inputs = None; } else if self.dap.is_debugging() { self.stop_debug(); } diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index db92da3bf..547be5637 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -375,23 +375,54 @@ fn test_execute_request_browser_stdin() { } #[test] -fn test_execute_request_browser_pending_cancelled() { +fn test_execute_request_browser_multiple_expressions() { let frontend = DummyArkFrontend::lock(); - // The `print()` call should be cancelled when we get in the debugger - let code = "browser()\nprint('hello')"; + // Ideally the evaluation of `1` would be cancelled + let code = "browser()\n1"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - // We don't get any output for "hello" frontend.recv_iopub_stream_stdout("Called from: top level \n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 1"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Even if we could cancel pending expressions, it would still be possible + // to run multiple expressions in a debugger prompt + let code = "1\n2"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("[1] 1\n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 2"); frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // But getting in a nested browser session with a pending expression would + // cancel it (not the case currently) + let code = "browser()\n1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("Called from: top level \n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 1"); + frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + // Quit session let code = "Q"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); From f584454b52e5af3ffdc9270ed7e8eff47245dc3f Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 4 Nov 2025 10:41:35 +0100 Subject: [PATCH 20/63] Tweak docs --- crates/ark/src/interface.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index e0865d232..f3760851e 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -388,7 +388,7 @@ pub struct PromptInfo { input_prompt: String, /// The continuation prompt string when user supplies incomplete - /// inputs. This always corresponds to `getOption("continue"). We send + /// inputs. This always corresponds to `getOption("continue")`. We send /// it to frontends along with `prompt` because some frontends such as /// Positron do not send incomplete inputs to Ark and take charge of /// continuation prompts themselves. For frontends that can send @@ -396,8 +396,8 @@ pub struct PromptInfo { /// error on them rather than requesting that this be shown. continuation_prompt: String, - /// Whether this is a `browser()` prompt. A browser prompt can be - /// incomplete but is never a user request. + /// Whether this is a `browser()` prompt. A browser prompt is never a user + /// request. browser: bool, /// Whether this is a prompt from a fresh REPL iteration (browser or From 730109d18e11ebb15517ca63cb2320d78d7abc12 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 4 Nov 2025 10:42:26 +0100 Subject: [PATCH 21/63] Add failing test --- crates/ark/tests/kernel.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 547be5637..d6dc903e2 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -184,6 +184,37 @@ fn test_execute_request_browser() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_continue() { + let frontend = DummyArkFrontend::lock(); + + let code = "browser()"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend + .recv_iopub_execute_result() + .contains("Called from: top level")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "n"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_execute_request_browser_error() { // The behaviour for errors is different in browsers than at top-level From fa1af5429f17a16e580491e6500113bb47686da3 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 4 Nov 2025 13:02:28 +0100 Subject: [PATCH 22/63] `RMain::eval()` doesn't need self ref --- crates/ark/src/interface.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index f3760851e..035325f6f 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1340,13 +1340,7 @@ impl RMain { } // SAFETY: Call this from a POD frame. Inputs must be protected. - unsafe fn eval( - &mut self, - expr: libr::SEXP, - srcref: libr::SEXP, - buf: *mut c_uchar, - buflen: c_int, - ) { + unsafe fn eval(expr: libr::SEXP, srcref: libr::SEXP, buf: *mut c_uchar, buflen: c_int) { // SAFETY: This may jump in case of error, keep this POD unsafe { // The global source reference is stored in this global variable by @@ -2397,7 +2391,7 @@ pub extern "C-unwind" fn r_read_console( let srcref = libr::Rf_protect(srcref.into()); main.console_need_reset = true; - main.eval(expr, srcref, buf, buflen); + RMain::eval(expr, srcref, buf, buflen); libr::Rf_unprotect(2); return 1; From 898e9dacb36befb7b6bcceea3675724f3e3ad5f7 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 4 Nov 2025 13:55:57 +0100 Subject: [PATCH 23/63] Add `harp::exec_with_cleanup()` --- crates/harp/src/exec.rs | 145 ++++++++++++++++++++++++++++++++++++++++ crates/libr/src/r.rs | 7 ++ 2 files changed, 152 insertions(+) diff --git a/crates/harp/src/exec.rs b/crates/harp/src/exec.rs index 808a954da..672fcaec8 100644 --- a/crates/harp/src/exec.rs +++ b/crates/harp/src/exec.rs @@ -367,6 +367,101 @@ where } } +/// Execute a function with a cleanup handler using R's cleanup mechanism. +/// +/// This wraps `R_ExecWithCleanup` to provide execution with guaranteed cleanup, +/// even in case of an R longjump. +/// +/// In case of longjump, `cleanup()` runs but `exec_with_cleanup()` does not +/// return, the lonjump propagates. +/// +/// Note that `fun` and `cleanup` must be longjump-safe: +/// - Only POD types without Drop destructors on the stack +/// - Or protects itself from longjumps via e.g. `try_catch()` +/// ``` +pub fn exec_with_cleanup<'env, F, C, T>(fun: F, cleanup: C) -> T +where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, +{ + struct CleanupData<'a, F, C, T> + where + F: FnOnce() -> T + 'a, + C: FnOnce() + 'a, + { + // slot for the result of the closure + result: &'a mut Option, + closure: Option, + cleanup: Option, + } + + // Allocate stack memory for the result + let mut result: Option = None; + + // Move closures to the payload + let mut callback_data = CleanupData { + result: &mut result, + closure: Some(fun), + cleanup: Some(cleanup), + }; + let payload = &mut callback_data as *mut _ as *mut c_void; + + extern "C-unwind" fn exec_callback<'env, F, C, T>(data: *mut c_void) -> SEXP + where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, + { + // SAFETY: `data` points to a `CleanupData` allocated on the caller's stack. + let data: &mut CleanupData = unsafe { &mut *(data as *mut CleanupData) }; + + // Move closure here so it can be called. Required since that's an `FnOnce`. + let closure = take(&mut data.closure).unwrap(); + + // Call closure and store the result in the payload + let result = closure(); + *(data.result) = Some(result); + + // Always return R_NilValue to R_ExecWithCleanup; the real result is in `payload`. + unsafe { R_NilValue } + } + + extern "C-unwind" fn cleanup_callback<'env, F, C, T>(data: *mut c_void) + where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, + { + // SAFETY: `data` points to a `CleanupData` allocated on the caller's stack. + let data: &mut CleanupData = unsafe { &mut *(data as *mut CleanupData) }; + + // Move cleanup closure here so it can be called + if let Some(cleanup) = take(&mut data.cleanup) { + cleanup(); + } + } + + // Call into R; the callbacks will populate `res` and always return R_NilValue. + unsafe { + R_ExecWithCleanup( + Some(exec_callback::), + payload, + Some(cleanup_callback::), + payload, + ) + }; + + // Unwrap Safety: If we get here, we're in the happy path and the result is Some + result.unwrap() +} + pub fn r_peek_error_buffer() -> String { // SAFETY: Returns pointer to static memory buffer owned by R. let buffer = unsafe { R_curErrorBuf() }; @@ -495,6 +590,8 @@ pub fn r_check_stack(size: Option) -> Result<()> { #[cfg(test)] mod tests { use std::ffi::CString; + use std::sync::Arc; + use std::sync::Mutex; use stdext::assert_match; @@ -644,4 +741,52 @@ mod tests { }); }) } + + #[test] + fn test_exec_with_cleanup() { + crate::r_task(|| { + let cleanup_called = Arc::new(Mutex::new(false)); + let cleanup_called_clone = cleanup_called.clone(); + + let result = exec_with_cleanup( + || { + // Create a simple R object and return it directly (T = RObject) + let obj = RObject::from(unsafe { Rf_ScalarInteger(42) }); + obj + }, + || { + *cleanup_called_clone.lock().unwrap() = true; + }, + ); + + assert_eq!(unsafe { Rf_asInteger(*result) }, 42); + assert!( + *cleanup_called.lock().unwrap(), + "Cleanup should have been called" + ); + + // Test error case - cleanup should still be called. + let cleanup_called_error = Arc::new(Mutex::new(false)); + let cleanup_called_error_clone = cleanup_called_error.clone(); + + let result = try_catch(|| { + exec_with_cleanup( + || -> RObject { + let msg = CString::new("ouch").unwrap(); // This leaks + unsafe { Rf_error(msg.as_ptr()) }; + }, + || { + *cleanup_called_error_clone.lock().unwrap() = true; + }, + ) + }); + + assert!(result.is_err()); + + assert!( + *cleanup_called_error.lock().unwrap(), + "Cleanup should have been called on error" + ); + }) + } } diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index 1c1b15663..8fb8b4e1f 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -98,6 +98,13 @@ functions::generate! { data: *mut std::ffi::c_void ) -> Rboolean; + pub fn R_ExecWithCleanup( + fun: Option SEXP>, + data: *mut std::ffi::c_void, + cleanfun: Option, + cleandata: *mut std::ffi::c_void + ) -> SEXP; + pub fn R_withCallingErrorHandler( body: Option SEXP>, bdata: *mut std::ffi::c_void, From 63c5bdb56129ab6673503738924820c547e2868f Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 4 Nov 2025 22:31:08 +0100 Subject: [PATCH 24/63] Keep track of nested REPLs and clean up R's state when needed --- crates/ark/src/interface.rs | 161 ++++++++++++++++++++++++++++++------ crates/ark/tests/kernel.rs | 155 ++++++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+), 27 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 035325f6f..051da0c68 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -10,6 +10,7 @@ // The frontend methods called by R are forwarded to the corresponding // `RMain` methods via `R_MAIN`. +use std::cell::Cell; use std::cell::RefCell; use std::cell::UnsafeCell; use std::collections::HashMap; @@ -59,6 +60,7 @@ use harp::command::r_home_setup; use harp::environment::r_ns_env; use harp::environment::Environment; use harp::environment::R_ENVS; +use harp::exec::exec_with_cleanup; use harp::exec::r_check_stack; use harp::exec::r_peek_error_buffer; use harp::exec::r_sandbox; @@ -219,8 +221,6 @@ pub struct RMain { /// of the REPL. pub(crate) last_error: Option, - console_need_reset: bool, - /// Channel to communicate with the Help thread help_event_tx: Option>, /// R help port @@ -269,6 +269,24 @@ pub struct RMain { /// Ever increasing debug session index. Used to create URIs that are only /// valid for a single session. debug_session_index: u32, + + /// Tracks how many nested `r_read_console()` calls are on the stack. + /// Incremented when entering `r_read_console(),` decremented on exit. + read_console_depth: Cell, + + /// Set to true when `r_read_console()` exits. Reset to false at the start + /// of each `r_read_console()` call. Used to detect if `eval()` returned + /// from a nested REPL (the flag will be true when the evaluation returns). + nested_read_console_returned: Cell, + + /// Set to true `r_read_console()` exits via an error longjump. Used to + /// detect if we need to go return from `r_read_console()` with a dummy + /// evaluation to reset things like `R_EvalDepth`. + read_console_threw_error: Cell, + + /// Used to track an input to evaluate upon returning to `r_read_console()`, + /// after having returned a dummy input to reset `R_ConsoleIob` in R's REPL. + next_read_console_input: Cell>, } /// Stack of pending inputs @@ -715,7 +733,10 @@ impl RMain { debug_env: None, debug_session_index: 1, pending_inputs: None, - console_need_reset: false, + read_console_depth: Cell::new(0), + nested_read_console_returned: Cell::new(false), + read_console_threw_error: Cell::new(false), + next_read_console_input: Cell::new(None), } } @@ -1816,6 +1837,13 @@ impl RMain { Ok(()) } + fn console_input(buf: *mut c_uchar, _buflen: c_int) -> String { + unsafe { + let cstr = CStr::from_ptr(buf as *const c_char); + cstr.to_string_lossy().into_owned() + } + } + // Hitting this means a SINGLE line from the user was longer than the buffer size (>4000 characters) fn buffer_overflow_error() -> amalthea::Error { Error::InvalidConsoleInput(String::from( @@ -2345,30 +2373,90 @@ pub extern "C-unwind" fn r_read_console( buflen: c_int, hist: c_int, ) -> i32 { - let main = RMain::get_mut(); - - // In case of error, we haven't had a chance to evaluate ".ark_last_value". - // So we return to the R REPL to give R a chance to run the state - // restoration that occurs between `R_ReadConsole()` and `eval()`: - // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 - // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 + // In this entry point we handle two kinds of state: + // - The number of nested REPLs `read_console_depth` + // - A bunch of flags that help us reset the calling R REPL // - // Technically this also resets time limits (see `base::setTimeLimit()`) but - // these aren't supported in Ark because they cause errors when we poll R - // events. - if main.last_error.is_some() && main.console_need_reset { - main.console_need_reset = false; - - // Evaluate last value so that `base::.Last.value` remains the same - RMain::on_console_input( - buf, - buflen, - String::from("base::invisible(base::.ark_last_value)"), - ) - .unwrap(); - return 1; + // The second kind is unfortunate and due to us taking charge of parsing and + // evaluation. Ideally R would extend their frontend API so that this would + // only be necessary for backward compatibility with old versions of R. + + { + let main = RMain::get_mut(); + + // We've finished evaluating a dummy value to reset state in R's REPL, + // and are now ready to evaluate the actual input + if let Some(next_input) = main.next_read_console_input.take() { + RMain::on_console_input(buf, buflen, next_input).unwrap(); + return 1; + } + + // In case of error, we haven't had a chance to evaluate ".ark_last_value". + // So we return to the R REPL to give R a chance to run the state + // restoration that occurs between `R_ReadConsole()` and `eval()`: + // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 + // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 + // + // Technically this also resets time limits (see `base::setTimeLimit()`) but + // these aren't supported in Ark because they cause errors when we poll R + // events. + if main.last_error.is_some() && main.read_console_threw_error.get() { + main.read_console_threw_error.set(false); + + // Evaluate last value so that `base::.Last.value` remains the same + RMain::on_console_input( + buf, + buflen, + String::from("base::invisible(base::.Last.value)"), + ) + .unwrap(); + return 1; + } + + // Track nesting depth of ReadConsole REPLs + main.read_console_depth + .set(main.read_console_depth.get() + 1); + + // Reset flag that helps us figure out when a nested REPL returns + main.nested_read_console_returned.set(false); + + // Reset flag that helps us figure out when an error occurred and needs a + // reset of `R_EvalDepth` and friends + main.read_console_threw_error.set(true); } + exec_with_cleanup( + || { + let main = RMain::get_mut(); + let result = r_read_console_impl(main, prompt, buf, buflen, hist); + + // If we get here, there was no error + main.read_console_threw_error.set(false); + + result + }, + || { + let main = RMain::get_mut(); + + // We're exiting, decrease depth of nested consoles + main.read_console_depth + .set(main.read_console_depth.get() - 1); + + // Set flag so that parent read console, if any, can detect that a + // nested console returned (if it indeed returns instead of looping + // for another iteration) + main.nested_read_console_returned.set(true); + }, + ) +} + +fn r_read_console_impl( + main: &mut RMain, + prompt: *const c_char, + buf: *mut c_uchar, + buflen: c_int, + hist: c_int, +) -> i32 { let result = r_sandbox(|| main.read_console(prompt, buf, buflen, hist)); main.read_console_cleanup(); @@ -2390,16 +2478,35 @@ pub extern "C-unwind" fn r_read_console( let expr = libr::Rf_protect(expr.into()); let srcref = libr::Rf_protect(srcref.into()); - main.console_need_reset = true; RMain::eval(expr, srcref, buf, buflen); + // Check if a nested read_console() just returned. If that's the + // case, we need to reset the `R_ConsoleIob` by first returning + // a dummy value causing a `PARSE_NULL` event. + if main.nested_read_console_returned.get() { + let next_input = RMain::console_input(buf, buflen); + main.next_read_console_input.set(Some(next_input)); + + // Evaluating a space causes a `PARSE_NULL` event. Don't + // evaluate a newline, that would cause a parent debug REPL + // to interpret it as `n`, causing it to exit instead of + // being a no-op. + RMain::on_console_input(buf, buflen, String::from(" ")).unwrap(); + main.nested_read_console_returned.set(false); + } + libr::Rf_unprotect(2); return 1; } }, - ConsoleResult::NewInput => return 1, - ConsoleResult::Disconnected => return 0, + ConsoleResult::NewInput => { + return 1; + }, + + ConsoleResult::Disconnected => { + return 0; + }, ConsoleResult::Interrupt => { log::trace!("Interrupting `ReadConsole()`"); diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index d6dc903e2..7cff6e7e8 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -215,6 +215,161 @@ fn test_execute_request_browser_continue() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_nested() { + // Test nested browser() calls - entering a browser within a browser + let frontend = DummyArkFrontend::lock(); + + // Start first browser + let code = "browser()"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend + .recv_iopub_execute_result() + .contains("Called from: top level")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Evaluate a value in the outer browser + let code = "42"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend.recv_iopub_execute_result().contains("[1] 42")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Start nested browser from within the first browser + let code = "browser()"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Nested browser() produces execute_result output + frontend.recv_iopub_execute_result(); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Evaluate a command in the nested browser + let code = "1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend.recv_iopub_execute_result().contains("[1] 1")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Evaluate another value in the nested browser + let code = "\"hello\""; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend.recv_iopub_execute_result().contains("hello")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Throw an error in the nested browser + let code = "stop('error in nested')"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stderr("Error: error in nested\n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Continue to exit the nested browser and return to parent + let code = "c"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Back in the parent browser, evaluate another value + let code = "3.14"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend.recv_iopub_execute_result().contains("[1] 3.14")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Throw an error in the outer browser + let code = "stop('error in parent')"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stderr("Error: error in parent\n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "NA"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend.recv_iopub_execute_result().contains("[1] NA")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + // Quit the outer browser + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_execute_request_browser_error() { // The behaviour for errors is different in browsers than at top-level From d462ce5591e0dcf79a20277a46ee9c6c5b5adeb3 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 08:59:13 +0100 Subject: [PATCH 25/63] Flush autoprint buffer in case of error --- crates/ark/src/interface.rs | 11 +++++++++++ crates/ark/tests/kernel.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 051da0c68..5ca4b7aad 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1143,6 +1143,17 @@ impl RMain { return None; }; + // Flush any accumulated output to StdOut. This can happen if + // the last input errors out during autoprint. + let autoprint = std::mem::take(&mut self.autoprint_output); + if !autoprint.is_empty() { + let message = IOPubMessage::Stream(StreamOutput { + name: Stream::Stdout, + text: autoprint, + }); + self.iopub_tx.send(message).unwrap(); + } + // Jupyter clients typically discard the `evalue` when a `traceback` is // present. Jupyter-Console even disregards `evalue` in all cases. So // include it here if we are in Notebook mode. But should Positron diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 7cff6e7e8..6e9f38780 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -640,6 +640,39 @@ fn test_execute_request_error() { ); } +#[test] +fn test_execute_request_error_with_accumulated_output() { + // Test that when the very last input output and then throws an error, + // the accumulated output is flushed before the error is reported. + // This tests the autoprint buffer flush logic in error handling. + let frontend = DummyArkFrontend::lock(); + + let code = "{ + print.foo <- function(x) { + print(unclass(x)) + stop(\"foo\") + } + structure(42, class = \"foo\") + }"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // The output from print(1) should be flushed to stdout + frontend.recv_iopub_stream_stdout("[1] 42\n"); + + // Then the error should be reported on stderr + assert!(frontend.recv_iopub_execute_error().contains("foo")); + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + #[test] fn test_execute_request_error_expressions_overflow() { let frontend = DummyArkFrontend::lock(); From a911e297e7377ff9ba1b75351a95f46122efa858 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 09:28:43 +0100 Subject: [PATCH 26/63] Use a `PromptKind` enum to discriminate prompts --- crates/ark/src/interface.rs | 58 +++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 5ca4b7aad..01879c010 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -395,6 +395,19 @@ pub struct KernelInfo { pub continuation_prompt: Option, } +/// The kind of prompt we're handling in the REPL. +#[derive(Clone, Debug, PartialEq)] +pub enum PromptKind { + /// A top-level REPL prompt + TopLevel, + + /// A `browser()` debugging prompt + Browser, + + /// A user input request from code, e.g., via `readline()` + InputRequest, +} + /// This struct represents the data that we wish R would pass to /// `ReadConsole()` methods. We need this information to determine what kind /// of prompt we are dealing with. @@ -414,13 +427,8 @@ pub struct PromptInfo { /// error on them rather than requesting that this be shown. continuation_prompt: String, - /// Whether this is a `browser()` prompt. A browser prompt is never a user - /// request. - browser: bool, - - /// Whether this is a prompt from a fresh REPL iteration (browser or - /// top level) or a prompt from some user code, e.g. via `readline()` - input_request: bool, + /// The kind of prompt we're handling. + kind: PromptKind, } pub enum ConsoleInput { @@ -870,7 +878,7 @@ impl RMain { // Invariant: If we detect a browser prompt, `self.dap.is_debugging()` // is true. Otherwise it is false. - if info.browser { + if matches!(info.kind, PromptKind::Browser) { // Start or continue debugging with the `debug_preserve_focus` hint // from the last expression we evaluated self.start_debug(self.debug_preserve_focus); @@ -884,7 +892,7 @@ impl RMain { // Reply to active request with error self.handle_active_request(&info, ConsoleValue::Error(exception)); - } else if info.input_request { + } else if matches!(info.kind, PromptKind::InputRequest) { // Request input reply to the frontend and return it to R return self.handle_input_request(&info, buf, buflen); } else if let Some(input) = self.pop_pending() { @@ -904,7 +912,7 @@ impl RMain { // often. We'd still push a `DidChangeConsoleInputs` notification from // here, but only containing high-level information such as `search()` // contents and `ls(rho)`. - if !self.dap.is_debugging() && !info.input_request { + if !self.dap.is_debugging() && !matches!(info.kind, PromptKind::InputRequest) { self.refresh_lsp(); } @@ -955,16 +963,16 @@ impl RMain { let tasks_interrupt_index = select.recv(&tasks_interrupt_rx); let polled_events_index = select.recv(&polled_events_rx); - // Don't process idle tasks in browser prompts. We currently don't want + // Don't process idle tasks unless at top level. We currently don't want // idle tasks (e.g. for srcref generation) to run when the call stack is - // empty. We could make this configurable though if needed, i.e. some + // not empty. We could make this configurable though if needed, i.e. some // idle tasks would be able to run in the browser. Those should be sent // to a dedicated channel that would always be included in the set of // recv channels. - let tasks_idle_index = if info.browser { - None - } else { + let tasks_idle_index = if matches!(info.kind, PromptKind::TopLevel) { Some(select.recv(&tasks_idle_rx)) + } else { + None }; loop { @@ -976,7 +984,7 @@ impl RMain { // `UserBreak`, but won't actually fire the interrupt b/c // we have them disabled, so it would end up swallowing the // user interrupt request. - if info.input_request && interrupts_pending() { + if matches!(info.kind, PromptKind::InputRequest) && interrupts_pending() { return ConsoleResult::Interrupt; } @@ -1068,15 +1076,21 @@ impl RMain { // chosen to not support these edge cases. let browser = RE_DEBUG_PROMPT.is_match(&prompt); - // If there are frames on the stack and we're not in a browser prompt, - // this means some user code is requesting input, e.g. via `readline()` - let user_request = !browser && n_frame > 0; + // Determine the prompt kind based on context + let kind = if browser { + PromptKind::Browser + } else if n_frame > 0 { + // If there are frames on the stack and we're not in a browser prompt, + // this means some user code is requesting input, e.g. via `readline()` + PromptKind::InputRequest + } else { + PromptKind::TopLevel + }; return PromptInfo { input_prompt: prompt, continuation_prompt, - browser, - input_request: user_request, + kind, }; } @@ -1213,7 +1227,7 @@ impl RMain { buf: *mut c_uchar, buflen: c_int, ) -> Option { - if info.input_request { + if matches!(info.kind, PromptKind::InputRequest) { panic!("Unexpected `execute_request` while waiting for `input_reply`."); } From 3012ee05a087c840b13d11bc684c96e81349c846 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 11:12:21 +0100 Subject: [PATCH 27/63] Evaluate in current environment --- crates/ark/src/interface.rs | 13 ++++++++--- crates/ark/tests/kernel.rs | 44 +++++++++++++++++++++++++++++++++++++ crates/harp/src/lib.rs | 1 + crates/harp/src/session.rs | 17 ++++++++++++++ 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 01879c010..bb69188d4 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1387,14 +1387,18 @@ impl RMain { // SAFETY: Call this from a POD frame. Inputs must be protected. unsafe fn eval(expr: libr::SEXP, srcref: libr::SEXP, buf: *mut c_uchar, buflen: c_int) { + let frame = harp::r_current_frame(); + // SAFETY: This may jump in case of error, keep this POD unsafe { + let frame = libr::Rf_protect(frame.into()); + // The global source reference is stored in this global variable by // the R REPL before evaluation. We do the same here. libr::set(libr::R_Srcref, srcref); // Evaluate the expression. Beware: this may throw an R longjump. - let value = libr::Rf_eval(expr, R_ENVS.global); + let value = libr::Rf_eval(expr, frame); libr::Rf_protect(value); // Store in the base environment for robust access from (almost) any @@ -1404,7 +1408,7 @@ impl RMain { // is stored in the `value` field of symbols, i.e. their "CDR". libr::SETCDR(r_symbol!(".ark_last_value"), value); - libr::Rf_unprotect(1); + libr::Rf_unprotect(2); value }; @@ -2643,7 +2647,10 @@ fn do_resource_namespaces() -> bool { fn is_auto_printing() -> bool { let n_frame = harp::session::r_n_frame().unwrap(); - // The call-stack is empty so this must be R auto-printing an unclassed object + // The call-stack is empty so this must be R auto-printing an unclassed + // object. Note that this might wrongly return true in debug REPLs. Ideally + // we'd take note of the number of frames on the stack when we enter + // `r_read_console()`, and compare against that. if n_frame == 0 { return true; } diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 6e9f38780..d53fcaa46 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -621,6 +621,50 @@ fn test_execute_request_browser_multiple_expressions() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_local_variable() { + let frontend = DummyArkFrontend::lock(); + + let code = "local({\n local_foo <- 1\n browser()\n})"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout( + "Called from: eval(quote({\n local_foo <- 1\n browser()\n}), new.env())\n", + ); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "local_foo"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Should ideally be `recv_iopub_execute_result()`, but auto-printing + // detection currently does not work reliably in debug REPLs + frontend.recv_iopub_stream_stdout("[1] 1\n"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_execute_request_error() { let frontend = DummyArkFrontend::lock(); diff --git a/crates/harp/src/lib.rs b/crates/harp/src/lib.rs index ea4d66e9f..2e5d17cc1 100644 --- a/crates/harp/src/lib.rs +++ b/crates/harp/src/lib.rs @@ -70,6 +70,7 @@ pub(crate) use harp::fixtures::r_task; pub use harp::object::list_get; pub use harp::object::list_poke; pub use harp::object::RObject; +pub use harp::session::*; pub use harp::symbol::RSymbol; pub use harp::utils::get_option; pub use harp::weak_ref::RWeakRef; diff --git a/crates/harp/src/session.rs b/crates/harp/src/session.rs index 6d75e53b0..261ab0164 100644 --- a/crates/harp/src/session.rs +++ b/crates/harp/src/session.rs @@ -27,6 +27,7 @@ static SESSION_INIT: Once = Once::new(); static mut NFRAME_CALL: Option = None; static mut SYS_CALLS_CALL: Option = None; static mut SYS_FRAMES_CALL: Option = None; +static mut CURRENT_ENV_CALL: Option = None; pub fn r_n_frame() -> crate::Result { SESSION_INIT.call_once(init_interface); @@ -60,6 +61,11 @@ pub fn r_sys_frames() -> crate::Result { } } +pub fn r_current_frame() -> RObject { + SESSION_INIT.call_once(init_interface); + unsafe { libr::Rf_eval(CURRENT_ENV_CALL.unwrap_unchecked(), R_BaseEnv) }.into() +} + pub fn r_sys_functions() -> crate::Result { unsafe { let mut protect = RProtect::new(); @@ -150,5 +156,16 @@ fn init_interface() { let sys_frames_call = r_lang!(r_symbol!("sys.frames")); R_PreserveObject(sys_frames_call); SYS_FRAMES_CALL = Some(sys_frames_call); + + // Create a closure that calls `sys.frame(-1)` to get the current + // evaluation environment. We use `sys.frame(-1)` from within a closure + // because `sys.nframe()` returns the frame number where evaluation + // occurs, not the number of frames on the stack. By calling from a + // closure, we push a new frame and use negative indexing to get the + // previous frame (the actual current environment). + let closure = harp::parse_eval_base("function() sys.frame(-1)").unwrap(); + let current_env_call = r_lang!(closure.sexp); + R_PreserveObject(current_env_call); + CURRENT_ENV_CALL = Some(current_env_call); } } From c4cf3edc8c8e00b69d4b848095fdbcb71280b2eb Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 11:33:04 +0100 Subject: [PATCH 28/63] Keep track of console frame via `r_read_console()` --- crates/ark/src/interface.rs | 105 ++++++++---------- .../sources/composite/search_path.rs | 18 +-- 2 files changed, 52 insertions(+), 71 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index bb69188d4..993b1c65f 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -261,11 +261,6 @@ pub struct RMain { /// reliable indication of whether we moved since last time. debug_last_stack: Vec, - /// Current topmost environment on the stack while waiting for input in the - /// debugger. This is `Some()` only when R is idle and in a `browser()` - /// prompt. - debug_env: Option, - /// Ever increasing debug session index. Used to create URIs that are only /// valid for a single session. debug_session_index: u32, @@ -287,6 +282,9 @@ pub struct RMain { /// Used to track an input to evaluate upon returning to `r_read_console()`, /// after having returned a dummy input to reset `R_ConsoleIob` in R's REPL. next_read_console_input: Cell>, + + /// Current topmost environment on the stack while waiting for input in ReadConsole + read_console_frame: RefCell, } /// Stack of pending inputs @@ -738,13 +736,13 @@ impl RMain { captured_output: String::new(), debug_preserve_focus: false, debug_last_stack: vec![], - debug_env: None, debug_session_index: 1, pending_inputs: None, read_console_depth: Cell::new(0), nested_read_console_returned: Cell::new(false), read_console_threw_error: Cell::new(false), next_read_console_input: Cell::new(None), + read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), } } @@ -1181,11 +1179,6 @@ impl RMain { Some(exception) } - fn read_console_cleanup(&mut self) { - // The debug environment is only valid while R is idle - self.debug_env = None; - } - /// Returns: /// - `None` if we should fall through to the event loop to wait for more user input /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` @@ -1468,13 +1461,6 @@ impl RMain { fn start_debug(&mut self, debug_preserve_focus: bool) { match self.dap.stack_info() { Ok(stack) => { - if let Some(frame) = stack.first() { - if let Some(ref env) = frame.environment { - // This is reset on exit in the cleanup phase, see `r_read_console()` - self.debug_env = Some(env.get().clone()); - } - } - // Figure out whether we changed location since last time, // e.g. because the user evaluated an expression that hit // another breakpoint. In that case we do want to move @@ -2356,8 +2342,8 @@ impl RMain { } #[cfg(not(test))] // Avoid warnings in unit test - pub(crate) fn debug_env(&self) -> Option { - self.debug_env.clone() + pub(crate) fn read_console_frame(&self) -> RObject { + self.read_console_frame.borrow().clone() } } @@ -2410,49 +2396,50 @@ pub extern "C-unwind" fn r_read_console( // evaluation. Ideally R would extend their frontend API so that this would // only be necessary for backward compatibility with old versions of R. - { - let main = RMain::get_mut(); + let main = RMain::get_mut(); - // We've finished evaluating a dummy value to reset state in R's REPL, - // and are now ready to evaluate the actual input - if let Some(next_input) = main.next_read_console_input.take() { - RMain::on_console_input(buf, buflen, next_input).unwrap(); - return 1; - } + // We've finished evaluating a dummy value to reset state in R's REPL, + // and are now ready to evaluate the actual input + if let Some(next_input) = main.next_read_console_input.take() { + RMain::on_console_input(buf, buflen, next_input).unwrap(); + return 1; + } - // In case of error, we haven't had a chance to evaluate ".ark_last_value". - // So we return to the R REPL to give R a chance to run the state - // restoration that occurs between `R_ReadConsole()` and `eval()`: - // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 - // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 - // - // Technically this also resets time limits (see `base::setTimeLimit()`) but - // these aren't supported in Ark because they cause errors when we poll R - // events. - if main.last_error.is_some() && main.read_console_threw_error.get() { - main.read_console_threw_error.set(false); + // In case of error, we haven't had a chance to evaluate ".ark_last_value". + // So we return to the R REPL to give R a chance to run the state + // restoration that occurs between `R_ReadConsole()` and `eval()`: + // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 + // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 + // + // Technically this also resets time limits (see `base::setTimeLimit()`) but + // these aren't supported in Ark because they cause errors when we poll R + // events. + if main.last_error.is_some() && main.read_console_threw_error.get() { + main.read_console_threw_error.set(false); + + // Evaluate last value so that `base::.Last.value` remains the same + RMain::on_console_input( + buf, + buflen, + String::from("base::invisible(base::.Last.value)"), + ) + .unwrap(); + return 1; + } - // Evaluate last value so that `base::.Last.value` remains the same - RMain::on_console_input( - buf, - buflen, - String::from("base::invisible(base::.Last.value)"), - ) - .unwrap(); - return 1; - } + // Track nesting depth of ReadConsole REPLs + main.read_console_depth + .set(main.read_console_depth.get() + 1); - // Track nesting depth of ReadConsole REPLs - main.read_console_depth - .set(main.read_console_depth.get() + 1); + // Reset flag that helps us figure out when a nested REPL returns + main.nested_read_console_returned.set(false); - // Reset flag that helps us figure out when a nested REPL returns - main.nested_read_console_returned.set(false); + // Reset flag that helps us figure out when an error occurred and needs a + // reset of `R_EvalDepth` and friends + main.read_console_threw_error.set(true); - // Reset flag that helps us figure out when an error occurred and needs a - // reset of `R_EvalDepth` and friends - main.read_console_threw_error.set(true); - } + // Set current frame environment + let current_frame = main.read_console_frame.replace(harp::r_current_frame()); exec_with_cleanup( || { @@ -2475,6 +2462,9 @@ pub extern "C-unwind" fn r_read_console( // nested console returned (if it indeed returns instead of looping // for another iteration) main.nested_read_console_returned.set(true); + + // Restore current frame + main.read_console_frame.replace(current_frame); }, ) } @@ -2487,7 +2477,6 @@ fn r_read_console_impl( hist: c_int, ) -> i32 { let result = r_sandbox(|| main.read_console(prompt, buf, buflen, hist)); - main.read_console_cleanup(); let result = unwrap!(result, Err(err) => { panic!("Unexpected longjump while reading from console: {err:?}"); diff --git a/crates/ark/src/lsp/completions/sources/composite/search_path.rs b/crates/ark/src/lsp/completions/sources/composite/search_path.rs index 0adfd8004..273cd7951 100644 --- a/crates/ark/src/lsp/completions/sources/composite/search_path.rs +++ b/crates/ark/src/lsp/completions/sources/composite/search_path.rs @@ -13,7 +13,6 @@ use harp::vector::CharacterVector; use harp::vector::Vector; use harp::RObject; use libr::R_EmptyEnv; -use libr::R_GlobalEnv; use libr::R_lsInternal; use libr::ENCLOS; use tower_lsp::lsp_types::CompletionItem; @@ -51,19 +50,12 @@ fn completions_from_search_path( ]; unsafe { - // Iterate through environments starting from the global environment. - let mut env = R_GlobalEnv; - - // If we're waiting for input in `read_console()` with a debugger - // prompt, start from current environment + // Iterate through environments starting from the current frame environment. #[cfg(not(test))] // Unit tests do not have an `RMain` - { - use crate::interface::RMain; - if let Some(debug_env) = &RMain::get().debug_env() { - // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` - env = debug_env.sexp; - } - } + // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` + let mut env = crate::interface::RMain::get().read_console_frame().sexp; + #[cfg(test)] + let mut env = libr::R_GlobalEnv; while env != R_EmptyEnv { let is_pkg_env = r_env_is_pkg_env(env); From ab17063b24230586dbca588ac9d019c944914035 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 18:37:57 +0100 Subject: [PATCH 29/63] Clearer documentation and variable name --- crates/ark/src/interface.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 993b1c65f..2bf0b464e 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -2427,20 +2427,25 @@ pub extern "C-unwind" fn r_read_console( return 1; } - // Track nesting depth of ReadConsole REPLs + // Keep track of state that we care about + + // - Track nesting depth of ReadConsole REPLs main.read_console_depth .set(main.read_console_depth.get() + 1); - // Reset flag that helps us figure out when a nested REPL returns + // - Set current frame environment + let old_current_frame = main.read_console_frame.replace(harp::r_current_frame()); + + // Keep track of state that we use for workarounds while interacting + // with the R REPL and force it to reset state + + // - Reset flag that helps us figure out when a nested REPL returns main.nested_read_console_returned.set(false); - // Reset flag that helps us figure out when an error occurred and needs a - // reset of `R_EvalDepth` and friends + // - Reset flag that helps us figure out when an error occurred and needs a + // reset of `R_EvalDepth` and friends main.read_console_threw_error.set(true); - // Set current frame environment - let current_frame = main.read_console_frame.replace(harp::r_current_frame()); - exec_with_cleanup( || { let main = RMain::get_mut(); @@ -2464,7 +2469,7 @@ pub extern "C-unwind" fn r_read_console( main.nested_read_console_returned.set(true); // Restore current frame - main.read_console_frame.replace(current_frame); + main.read_console_frame.replace(old_current_frame); }, ) } From 42676d1c4869da84ecf4c621b3103abd516dcea6 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 13:12:41 +0100 Subject: [PATCH 30/63] Add shutdown handling to dummy frontend --- .../amalthea/src/fixtures/dummy_frontend.rs | 33 +++++++++++++++++-- crates/amalthea/src/wire/jupyter_message.rs | 6 ++++ crates/amalthea/tests/client.rs | 29 ++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index b6520ab7b..768523601 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -21,6 +21,8 @@ use crate::wire::jupyter_message::JupyterMessage; use crate::wire::jupyter_message::Message; use crate::wire::jupyter_message::ProtocolMessage; use crate::wire::jupyter_message::Status; +use crate::wire::shutdown_reply::ShutdownReply; +use crate::wire::shutdown_request::ShutdownRequest; use crate::wire::status::ExecutionState; use crate::wire::stream::Stream; use crate::wire::wire_message::WireMessage; @@ -36,7 +38,7 @@ pub struct DummyConnection { } pub struct DummyFrontend { - pub _control_socket: Socket, + pub control_socket: Socket, pub shell_socket: Socket, pub iopub_socket: Socket, pub stdin_socket: Socket, @@ -132,7 +134,7 @@ impl DummyFrontend { // the Jupyter specification, these must share a ZeroMQ identity. let shell_id = rand::thread_rng().gen::<[u8; 16]>(); - let _control_socket = Socket::new( + let control_socket = Socket::new( connection.session.clone(), connection.ctx.clone(), String::from("Control"), @@ -198,7 +200,7 @@ impl DummyFrontend { }); Self { - _control_socket, + control_socket, shell_socket, iopub_socket, stdin_socket, @@ -207,12 +209,22 @@ impl DummyFrontend { } } + /// Sends a Jupyter message on the Control socket; returns the ID of the newly + /// created message + pub fn send_control(&self, msg: T) -> String { + Self::send(&self.control_socket, &self.session, msg) + } + /// Sends a Jupyter message on the Shell socket; returns the ID of the newly /// created message pub fn send_shell(&self, msg: T) -> String { Self::send(&self.shell_socket, &self.session, msg) } + pub fn send_shutdown_request(&self, restart: bool) -> String { + self.send_control(ShutdownRequest { restart }) + } + pub fn send_execute_request(&self, code: &str, options: ExecuteRequestOptions) -> String { self.send_shell(ExecuteRequest { code: String::from(code), @@ -256,6 +268,12 @@ impl DummyFrontend { panic!("Timeout while expecting message on socket {}", socket.name); } + /// Receives a Jupyter message from the Control socket + #[track_caller] + pub fn recv_control(&self) -> Message { + Self::recv(&self.control_socket) + } + /// Receives a Jupyter message from the Shell socket #[track_caller] pub fn recv_shell(&self) -> Message { @@ -274,6 +292,15 @@ impl DummyFrontend { Self::recv(&self.stdin_socket) } + /// Receive from Control and assert `ShutdownReply` message. + #[track_caller] + pub fn recv_control_shutdown_reply(&self) -> ShutdownReply { + let message = self.recv_control(); + assert_matches!(message, Message::ShutdownReply(message) => { + message.content + }) + } + /// Receive from Shell and assert `ExecuteReply` message. /// Returns `execution_count`. #[track_caller] diff --git a/crates/amalthea/src/wire/jupyter_message.rs b/crates/amalthea/src/wire/jupyter_message.rs index 76d835605..b60ae002f 100644 --- a/crates/amalthea/src/wire/jupyter_message.rs +++ b/crates/amalthea/src/wire/jupyter_message.rs @@ -46,6 +46,7 @@ use crate::wire::is_complete_reply::IsCompleteReply; use crate::wire::is_complete_request::IsCompleteRequest; use crate::wire::kernel_info_request::KernelInfoRequest; use crate::wire::originator::Originator; +use crate::wire::shutdown_reply::ShutdownReply; use crate::wire::shutdown_request::ShutdownRequest; use crate::wire::status::KernelStatus; use crate::wire::wire_message::WireMessage; @@ -101,6 +102,7 @@ pub enum Message { // Control InterruptReply(JupyterMessage), InterruptRequest(JupyterMessage), + ShutdownReply(JupyterMessage), ShutdownRequest(JupyterMessage), // Registration HandshakeRequest(JupyterMessage), @@ -163,6 +165,7 @@ impl TryFrom<&Message> for WireMessage { Message::IsCompleteRequest(msg) => WireMessage::try_from(msg), Message::KernelInfoReply(msg) => WireMessage::try_from(msg), Message::KernelInfoRequest(msg) => WireMessage::try_from(msg), + Message::ShutdownReply(msg) => WireMessage::try_from(msg), Message::ShutdownRequest(msg) => WireMessage::try_from(msg), Message::Status(msg) => WireMessage::try_from(msg), Message::CommInfoReply(msg) => WireMessage::try_from(msg), @@ -245,6 +248,9 @@ impl TryFrom<&WireMessage> for Message { if kind == UpdateDisplayData::message_type() { return Ok(Message::UpdateDisplayData(JupyterMessage::try_from(msg)?)); } + if kind == ShutdownReply::message_type() { + return Ok(Message::ShutdownReply(JupyterMessage::try_from(msg)?)); + } if kind == ShutdownRequest::message_type() { return Ok(Message::ShutdownRequest(JupyterMessage::try_from(msg)?)); } diff --git a/crates/amalthea/tests/client.rs b/crates/amalthea/tests/client.rs index e20f27fdb..26f904093 100644 --- a/crates/amalthea/tests/client.rs +++ b/crates/amalthea/tests/client.rs @@ -19,6 +19,7 @@ use amalthea::wire::comm_info_request::CommInfoRequest; use amalthea::wire::comm_msg::CommWireMsg; use amalthea::wire::comm_open::CommOpen; use amalthea::wire::jupyter_message::Message; +use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use amalthea::wire::status::ExecutionState; use assert_matches::assert_matches; @@ -63,6 +64,34 @@ fn test_amalthea_execute_request() { frontend.recv_iopub_idle(); } +#[test] +fn test_amalthea_shutdown_request() { + let frontend = DummyAmaltheaFrontend::lock(); + + // Send a shutdown request with restart = false + frontend.send_shutdown_request(false); + + // Shutdown requests generate busy/idle status messages on IOPub + frontend.recv_iopub_busy(); + + // Receive the shutdown reply + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + frontend.recv_iopub_idle(); + + // Test with restart = true + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); +} + #[test] fn test_amalthea_input_request() { let frontend = DummyAmaltheaFrontend::lock(); From 64510f8921cbe5322d09f257c3af2f6f7a13ca3c Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 19:41:15 +0100 Subject: [PATCH 31/63] Add integration tests for shutdown --- crates/amalthea/src/socket/control.rs | 5 ++++ crates/ark/src/fixtures/dummy_frontend.rs | 18 ++++++++++++ crates/ark/src/interface.rs | 22 ++++++++++++++ crates/ark/src/sys/unix/interface.rs | 8 ++++++ crates/ark/tests/kernel.rs | 35 +++++++++++++++++++++++ crates/libr/src/r.rs | 3 ++ 6 files changed, 91 insertions(+) diff --git a/crates/amalthea/src/socket/control.rs b/crates/amalthea/src/socket/control.rs index 8e7d05716..829732da8 100644 --- a/crates/amalthea/src/socket/control.rs +++ b/crates/amalthea/src/socket/control.rs @@ -102,6 +102,11 @@ impl Control { H: FnOnce(JupyterMessage) -> Result<(), Error>, { // Enter the kernel-busy state in preparation for handling the message. + // The protocol specification does not mandate status messages for + // Control, but we emit them for compatibility with ipykernel: + // https://github.com/ipython/ipykernel/pull/585. These status messages + // can be discriminated from those on Shell by examining the parent + // header. if let Err(err) = self.send_state(req.clone(), ExecutionState::Busy) { warn!("Failed to change kernel status to busy: {err}"); } diff --git a/crates/ark/src/fixtures/dummy_frontend.rs b/crates/ark/src/fixtures/dummy_frontend.rs index 5823dd16c..74a951b7d 100644 --- a/crates/ark/src/fixtures/dummy_frontend.rs +++ b/crates/ark/src/fixtures/dummy_frontend.rs @@ -4,11 +4,13 @@ use std::sync::Arc; use std::sync::Mutex; use std::sync::MutexGuard; use std::sync::OnceLock; +use std::time::Duration; use amalthea::fixtures::dummy_frontend::DummyConnection; use amalthea::fixtures::dummy_frontend::DummyFrontend; use crate::interface::SessionMode; +use crate::interface::CLEANUP_SIGNAL; use crate::repos::DefaultRepos; // There can be only one frontend per process. Needs to be in a mutex because @@ -62,6 +64,22 @@ impl DummyArkFrontend { } } + /// Wait for R cleanup to start (indicating shutdown has been initiated). + /// Panics if cleanup doesn't start within the timeout. + #[track_caller] + pub fn wait_for_cleanup() { + let (lock, cvar) = &CLEANUP_SIGNAL; + let result = cvar + .wait_timeout_while(lock.lock().unwrap(), Duration::from_secs(3), |started| { + !*started + }) + .unwrap(); + + if !*result.0 { + panic!("Cleanup did not start within timeout"); + } + } + fn get_frontend() -> &'static Arc> { // These are the hard-coded defaults. Call `init()` explicitly to // override. diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 2bf0b464e..dc6b8592e 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -2585,6 +2585,28 @@ pub unsafe extern "C-unwind" fn r_polled_events() { }; } +// For integration tests +use std::sync::Condvar; +pub static CLEANUP_SIGNAL: (Mutex, Condvar) = (Mutex::new(false), Condvar::new()); + +#[no_mangle] +pub extern "C-unwind" fn r_cleanup_for_tests(_save_act: i32, _status: i32, _run_last: i32) { + // Signal that cleanup has started + let (lock, cvar) = &CLEANUP_SIGNAL; + + let mut started = lock.lock().unwrap(); + *started = true; + + cvar.notify_all(); + drop(started); + + // Sleep to give tests time to complete before we panic + std::thread::sleep(std::time::Duration::from_secs(5)); + + // Fallthrough to R which will call `exit()`. Note that panicking from here + // would be UB, we can't panic over a C stack. +} + // This hook is called like a user onLoad hook but for every package to be // loaded in the session #[harp::register] diff --git a/crates/ark/src/sys/unix/interface.rs b/crates/ark/src/sys/unix/interface.rs index db5bc60ce..0748ea367 100644 --- a/crates/ark/src/sys/unix/interface.rs +++ b/crates/ark/src/sys/unix/interface.rs @@ -73,6 +73,14 @@ pub fn setup_r(args: &Vec) { libr::set(ptr_R_Busy, Some(r_busy)); libr::set(ptr_R_Suicide, Some(r_suicide)); + if stdext::IS_TESTING { + use libr::ptr_R_CleanUp; + + use crate::interface::r_cleanup_for_tests; + + libr::set(ptr_R_CleanUp, Some(r_cleanup_for_tests)); + } + // In tests R may be run from various threads. This confuses R's stack // overflow checks so we disable those. This should not make it in // production builds as it causes stack overflows to crash R instead of diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index d53fcaa46..73cde8eec 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -1,5 +1,6 @@ use amalthea::fixtures::dummy_frontend::ExecuteRequestOptions; use amalthea::wire::jupyter_message::Message; +use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; use stdext::assert_match; @@ -1143,3 +1144,37 @@ fn test_env_vars() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } + +// Note that because of these shutdown tests you _have_ to use `cargo nextest` +// instead of `cargo test`, so that each test has its own process and R thread. +#[test] +fn test_shutdown_request() { + let frontend = DummyArkFrontend::lock(); + + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +#[test] +fn test_shutdown_request_with_restart() { + let frontend = DummyArkFrontend::lock(); + + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index 8fb8b4e1f..510a57dad 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -760,6 +760,9 @@ mutable_globals::generate! { #[cfg(target_family = "unix")] pub static mut ptr_R_Suicide: Option; + #[cfg(target_family = "unix")] + pub static mut ptr_R_CleanUp: Option; + // ----------------------------------------------------------------------------------- // Windows From eadbc10733f9ac1488673f3ad3ecd38cdf37daf6 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 19:53:58 +0100 Subject: [PATCH 32/63] Prevent R from asking about saving workspace We've never supported it, so better be explicit about it. This allows us to simplify some special-casing for that question. Also set `--no-restore-data` by default for consistency. --- crates/ark/src/interface.rs | 9 --------- crates/ark/src/main.rs | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index dc6b8592e..93cc057ce 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1289,15 +1289,6 @@ impl RMain { buf: *mut c_uchar, buflen: c_int, ) -> ConsoleResult { - // If the prompt begins with "Save workspace", respond with (n) - // and allow R to immediately exit. - if info.input_prompt.starts_with("Save workspace") { - match Self::on_console_input(buf, buflen, String::from("n")) { - Ok(()) => return ConsoleResult::NewInput, - Err(err) => return ConsoleResult::Error(err), - } - } - if let Some(req) = &self.active_request { // Send request to frontend. We'll wait for an `input_reply` // from the frontend in the event loop in `read_console()`. diff --git a/crates/ark/src/main.rs b/crates/ark/src/main.rs index b5e4d202d..746e43058 100644 --- a/crates/ark/src/main.rs +++ b/crates/ark/src/main.rs @@ -83,6 +83,9 @@ fn main() -> anyhow::Result<()> { let mut capture_streams = true; let mut default_repos = DefaultRepos::Auto; + // We don't support the asking the user whether to save the workspace data + // on exit because calling readline during shutdown puts in a precarious + // position. So effectively we're implementing "no-save" by default. // Process remaining arguments. TODO: Need an argument that can passthrough args to R while let Some(arg) = argv.next() { match arg.as_str() { @@ -325,6 +328,20 @@ fn main() -> anyhow::Result<()> { r_args.push(String::from("--interactive")); } + // Prepend the vector of arguments with our default. These can be overridden + // by user arguments (last one wins). + r_args.splice(0..0, [ + // We don't support the asking the user whether to save the workspace + // data on exit because calling readline during shutdown puts in a + // precarious position. So effectively we're implementing "no-save" by + // default. Note that there is no argument to opt into the "ask" + // behaviour, so it can't be reenabled by the user. + String::from("--no-save"), + // Since we don't save by default, we also don't restore by default for + // consistency + String::from("--no-restore-data"), + ]); + // This causes panics on background threads to propagate on the main // thread. If we don't propagate a background thread panic, the program // keeps running in an unstable state as all communications with this From c031575db03280006cde9a900a483f4174e64d28 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 5 Nov 2025 20:01:10 +0100 Subject: [PATCH 33/63] Shutdown all nested consoles in case of Shutdown request --- crates/ark/src/interface.rs | 21 +++++++++++++++++---- crates/ark/tests/kernel.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 93cc057ce..ae90e1010 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -281,7 +281,11 @@ pub struct RMain { /// Used to track an input to evaluate upon returning to `r_read_console()`, /// after having returned a dummy input to reset `R_ConsoleIob` in R's REPL. - next_read_console_input: Cell>, + read_console_next_input: Cell>, + + /// We've received a Shutdown signal and need to return EOF from all nested + /// consoles to get R to shut down + read_console_shutdown: Cell, /// Current topmost environment on the stack while waiting for input in ReadConsole read_console_frame: RefCell, @@ -741,8 +745,9 @@ impl RMain { read_console_depth: Cell::new(0), nested_read_console_returned: Cell::new(false), read_console_threw_error: Cell::new(false), - next_read_console_input: Cell::new(None), + read_console_next_input: Cell::new(None), read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), + read_console_shutdown: Cell::new(false), } } @@ -2389,9 +2394,15 @@ pub extern "C-unwind" fn r_read_console( let main = RMain::get_mut(); + // Propagate an EOF event (e.g. from a Shutdown request). We need to exit + // from all consoles on the stack to let R shut down with an `exit()`. + if main.read_console_shutdown.get() { + return 0; + } + // We've finished evaluating a dummy value to reset state in R's REPL, // and are now ready to evaluate the actual input - if let Some(next_input) = main.next_read_console_input.take() { + if let Some(next_input) = main.read_console_next_input.take() { RMain::on_console_input(buf, buflen, next_input).unwrap(); return 1; } @@ -2499,7 +2510,7 @@ fn r_read_console_impl( // a dummy value causing a `PARSE_NULL` event. if main.nested_read_console_returned.get() { let next_input = RMain::console_input(buf, buflen); - main.next_read_console_input.set(Some(next_input)); + main.read_console_next_input.set(Some(next_input)); // Evaluating a space causes a `PARSE_NULL` event. Don't // evaluate a newline, that would cause a parent debug REPL @@ -2519,6 +2530,8 @@ fn r_read_console_impl( }, ConsoleResult::Disconnected => { + // Cause parent consoles to shutdown too + main.read_console_shutdown.set(true); return 0; }, diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 73cde8eec..2c3fad113 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -1178,3 +1178,36 @@ fn test_shutdown_request_with_restart() { DummyArkFrontend::wait_for_cleanup(); } + +// Can shut down Ark when running a nested debug console +// https://github.com/posit-dev/positron/issues/6553 +#[test] +fn test_shutdown_request_browser() { + let frontend = DummyArkFrontend::lock(); + + let code = "browser()"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend + .recv_iopub_execute_result() + .contains("Called from: top level")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} From 4ef84e7614343faa4f849dcf0595d91b78bff7cd Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 12:06:01 +0100 Subject: [PATCH 34/63] Send interrupt before shutting down --- crates/ark/src/control.rs | 6 ++++++ crates/ark/tests/kernel.rs | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/crates/ark/src/control.rs b/crates/ark/src/control.rs index 93fcfbc9b..b005e8485 100644 --- a/crates/ark/src/control.rs +++ b/crates/ark/src/control.rs @@ -36,6 +36,12 @@ impl ControlHandler for Control { ) -> Result { log::info!("Received shutdown request: {msg:?}"); + // Interrupt any ongoing computation. We shut down from ReadConsole when + // R has become idle again. Note that Positron will have interrupted us + // beforehand, but another frontend might not have, and it's good to + // have this as a defensive measure in any case. + crate::sys::control::handle_interrupt_request(); + // According to the Jupyter protocol we should block here until the // shutdown is complete. However AFAICS ipykernel doesn't wait // until complete shutdown before replying and instead just signals diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 2c3fad113..e4eb4c156 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -3,6 +3,9 @@ use amalthea::wire::jupyter_message::Message; use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; +use nix::sys::signal::signal; +use nix::sys::signal::SigHandler; +use nix::sys::signal::Signal; use stdext::assert_match; #[test] @@ -1145,10 +1148,21 @@ fn test_env_vars() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +/// Install a SIGINT handler for shutdown tests. This overrides the test runner +/// handler so it doesn't cancel our test. +fn install_sigint_handler() { + extern "C" fn sigint_handler(_: libc::c_int) {} + #[cfg(unix)] + unsafe { + signal(Signal::SIGINT, SigHandler::Handler(sigint_handler)).unwrap(); + } +} + // Note that because of these shutdown tests you _have_ to use `cargo nextest` // instead of `cargo test`, so that each test has its own process and R thread. #[test] fn test_shutdown_request() { + install_sigint_handler(); let frontend = DummyArkFrontend::lock(); frontend.send_shutdown_request(false); @@ -1165,6 +1179,7 @@ fn test_shutdown_request() { #[test] fn test_shutdown_request_with_restart() { + install_sigint_handler(); let frontend = DummyArkFrontend::lock(); frontend.send_shutdown_request(true); @@ -1183,6 +1198,7 @@ fn test_shutdown_request_with_restart() { // https://github.com/posit-dev/positron/issues/6553 #[test] fn test_shutdown_request_browser() { + install_sigint_handler(); let frontend = DummyArkFrontend::lock(); let code = "browser()"; @@ -1211,3 +1227,31 @@ fn test_shutdown_request_browser() { DummyArkFrontend::wait_for_cleanup(); } + +#[test] +fn test_shutdown_request_while_busy() { + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + let code = "Sys.sleep(10)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + frontend.recv_iopub_stream_stderr("\n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} From 9414930207181819a05dc507af77414dbbe0470a Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 15:11:59 +0100 Subject: [PATCH 35/63] Move signal declaration inside of Unix context --- crates/ark/tests/kernel.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index e4eb4c156..68952df3a 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -3,9 +3,6 @@ use amalthea::wire::jupyter_message::Message; use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; -use nix::sys::signal::signal; -use nix::sys::signal::SigHandler; -use nix::sys::signal::Signal; use stdext::assert_match; #[test] @@ -1154,6 +1151,10 @@ fn install_sigint_handler() { extern "C" fn sigint_handler(_: libc::c_int) {} #[cfg(unix)] unsafe { + use nix::sys::signal::signal; + use nix::sys::signal::SigHandler; + use nix::sys::signal::Signal; + signal(Signal::SIGINT, SigHandler::Handler(sigint_handler)).unwrap(); } } From 1af35e6e86a7b3b0c0046df067337123507d7337 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 15:15:16 +0100 Subject: [PATCH 36/63] Make the error message test less brittle --- crates/ark/tests/kernel.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 68952df3a..11a123cef 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -141,10 +141,7 @@ fn test_execute_request_invalid() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - assert_eq!( - frontend.recv_iopub_execute_error(), - "Error:\nSyntax error: unexpected ')'" - ); + assert!(frontend.recv_iopub_execute_error().contains("Syntax error")); frontend.recv_iopub_idle(); From 6396371137a5a5db5bb50e6558e0f88ace5a3dc7 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 15:52:11 +0100 Subject: [PATCH 37/63] Disable brittle tests --- crates/ark/tests/kernel.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 11a123cef..8abfa4047 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -1192,10 +1192,16 @@ fn test_shutdown_request_with_restart() { DummyArkFrontend::wait_for_cleanup(); } +static SHUTDOWN_TESTS_ENABLED: bool = false; + // Can shut down Ark when running a nested debug console // https://github.com/posit-dev/positron/issues/6553 #[test] fn test_shutdown_request_browser() { + if !SHUTDOWN_TESTS_ENABLED { + return; + } + install_sigint_handler(); let frontend = DummyArkFrontend::lock(); @@ -1217,6 +1223,9 @@ fn test_shutdown_request_browser() { frontend.send_shutdown_request(true); frontend.recv_iopub_busy(); + // There is a race condition between the Control thread and the Shell + // threads. Ideally we'd wait for both the Shutdown reply and the IOPub Idle + // messages concurrently instead of sequentially. let reply = frontend.recv_control_shutdown_reply(); assert_eq!(reply.status, Status::Ok); assert_eq!(reply.restart, true); @@ -1228,6 +1237,10 @@ fn test_shutdown_request_browser() { #[test] fn test_shutdown_request_while_busy() { + if !SHUTDOWN_TESTS_ENABLED { + return; + } + install_sigint_handler(); let frontend = DummyArkFrontend::lock(); @@ -1245,6 +1258,7 @@ fn test_shutdown_request_while_busy() { assert_eq!(reply.status, Status::Ok); assert_eq!(reply.restart, false); + // It seems this isn't emitted on older R versions frontend.recv_iopub_stream_stderr("\n"); frontend.recv_iopub_idle(); From ca9dbbccbe805882bbd4d991521949f68d2c506d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 16:09:31 +0100 Subject: [PATCH 38/63] Opt out of Shutdown tests on Windows --- crates/ark/tests/kernel.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 8abfa4047..4d1e851be 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -1159,6 +1159,7 @@ fn install_sigint_handler() { // Note that because of these shutdown tests you _have_ to use `cargo nextest` // instead of `cargo test`, so that each test has its own process and R thread. #[test] +#[cfg(unix)] fn test_shutdown_request() { install_sigint_handler(); let frontend = DummyArkFrontend::lock(); @@ -1176,6 +1177,7 @@ fn test_shutdown_request() { } #[test] +#[cfg(unix)] fn test_shutdown_request_with_restart() { install_sigint_handler(); let frontend = DummyArkFrontend::lock(); @@ -1197,6 +1199,7 @@ static SHUTDOWN_TESTS_ENABLED: bool = false; // Can shut down Ark when running a nested debug console // https://github.com/posit-dev/positron/issues/6553 #[test] +#[cfg(unix)] fn test_shutdown_request_browser() { if !SHUTDOWN_TESTS_ENABLED { return; @@ -1236,6 +1239,7 @@ fn test_shutdown_request_browser() { } #[test] +#[cfg(unix)] fn test_shutdown_request_while_busy() { if !SHUTDOWN_TESTS_ENABLED { return; From ce0a7e6d4c0e932b1d2065999f5d83f3fd657a39 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 16:15:26 +0100 Subject: [PATCH 39/63] Improve naming a bit --- crates/ark/src/interface.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index ae90e1010..95f426f46 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -272,7 +272,7 @@ pub struct RMain { /// Set to true when `r_read_console()` exits. Reset to false at the start /// of each `r_read_console()` call. Used to detect if `eval()` returned /// from a nested REPL (the flag will be true when the evaluation returns). - nested_read_console_returned: Cell, + read_console_nested_return: Cell, /// Set to true `r_read_console()` exits via an error longjump. Used to /// detect if we need to go return from `r_read_console()` with a dummy @@ -743,7 +743,7 @@ impl RMain { debug_session_index: 1, pending_inputs: None, read_console_depth: Cell::new(0), - nested_read_console_returned: Cell::new(false), + read_console_nested_return: Cell::new(false), read_console_threw_error: Cell::new(false), read_console_next_input: Cell::new(None), read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), @@ -2442,7 +2442,7 @@ pub extern "C-unwind" fn r_read_console( // with the R REPL and force it to reset state // - Reset flag that helps us figure out when a nested REPL returns - main.nested_read_console_returned.set(false); + main.read_console_nested_return.set(false); // - Reset flag that helps us figure out when an error occurred and needs a // reset of `R_EvalDepth` and friends @@ -2468,7 +2468,7 @@ pub extern "C-unwind" fn r_read_console( // Set flag so that parent read console, if any, can detect that a // nested console returned (if it indeed returns instead of looping // for another iteration) - main.nested_read_console_returned.set(true); + main.read_console_nested_return.set(true); // Restore current frame main.read_console_frame.replace(old_current_frame); @@ -2508,7 +2508,7 @@ fn r_read_console_impl( // Check if a nested read_console() just returned. If that's the // case, we need to reset the `R_ConsoleIob` by first returning // a dummy value causing a `PARSE_NULL` event. - if main.nested_read_console_returned.get() { + if main.read_console_nested_return.get() { let next_input = RMain::console_input(buf, buflen); main.read_console_next_input.set(Some(next_input)); @@ -2517,7 +2517,7 @@ fn r_read_console_impl( // to interpret it as `n`, causing it to exit instead of // being a no-op. RMain::on_console_input(buf, buflen, String::from(" ")).unwrap(); - main.nested_read_console_returned.set(false); + main.read_console_nested_return.set(false); } libr::Rf_unprotect(2); From e21ce2c06ed9996b61c58d19e43fe224cfb6bf0f Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 6 Nov 2025 22:41:28 +0100 Subject: [PATCH 40/63] Don't include backtrace in syntax errors --- crates/amalthea/src/error.rs | 4 --- crates/ark/src/interface.rs | 59 +++++++++++++++++++++--------------- crates/ark/tests/kernel.rs | 30 +++++++++++++++--- 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/crates/amalthea/src/error.rs b/crates/amalthea/src/error.rs index c71a9aa59..119a39949 100644 --- a/crates/amalthea/src/error.rs +++ b/crates/amalthea/src/error.rs @@ -43,7 +43,6 @@ pub enum Error { UnknownCommName(String), UnknownCommId(String), InvalidCommMessage(String, String, String), - InvalidInputRequest(String), InvalidConsoleInput(String), Anyhow(anyhow::Error), ShellErrorReply(Exception), @@ -196,9 +195,6 @@ impl fmt::Display for Error { msg, id, err ) }, - Error::InvalidInputRequest(message) => { - write!(f, "{message}") - }, Error::InvalidConsoleInput(message) => { write!(f, "{message}") }, diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 95f426f46..ae33e6413 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -303,13 +303,18 @@ struct PendingInputs { index: isize, } +enum ParseResult { + Success(Option), + SyntaxError(String), +} + impl PendingInputs { - pub(crate) fn read(input: &str) -> anyhow::Result> { + pub(crate) fn read(input: &str) -> anyhow::Result> { let status = match harp::parse_status(&harp::ParseInput::Text(input)) { Err(err) => { // Failed to even attempt to parse the input, something is seriously wrong // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. - return Err(anyhow!("Failed to parse input: {err:?}")); + return Ok(ParseResult::SyntaxError(format!("{err}"))); }, Ok(status) => status, }; @@ -321,10 +326,12 @@ impl PendingInputs { let exprs = match status { harp::ParseResult::Complete(exprs) => exprs, harp::ParseResult::Incomplete => { - return Err(anyhow!("Can't execute incomplete input:\n{input}")); + return Ok(ParseResult::SyntaxError(format!( + "Can't execute incomplete input:\n{input}" + ))); }, harp::ParseResult::SyntaxError { message, .. } => { - return Err(anyhow!("Syntax error: {message}")); + return Ok(ParseResult::SyntaxError(format!("Syntax error: {message}"))); }, }; @@ -334,15 +341,15 @@ impl PendingInputs { let index = 0; if len == 0 { - return Ok(None); + return Ok(ParseResult::Success(None)); } - Ok(Some(Self { + Ok(ParseResult::Success(Some(Self { exprs, srcrefs, len, index, - })) + }))) } pub(crate) fn is_empty(&self) -> bool { @@ -444,7 +451,7 @@ pub(crate) enum ConsoleResult { NewPendingInput(PendingInput), Interrupt, Disconnected, - Error(amalthea::Error), + Error(String), } impl RMain { @@ -1263,10 +1270,17 @@ impl RMain { ConsoleInput::Input(code) => { // Parse input into pending expressions match PendingInputs::read(&code) { - Ok(inputs) => { + Ok(ParseResult::Success(inputs)) => { self.pending_inputs = inputs; }, - Err(err) => return Some(ConsoleResult::Error(amalthea::anyhow!("{err:?}"))), + Ok(ParseResult::SyntaxError(message)) => { + return Some(ConsoleResult::Error(message)) + }, + Err(err) => { + return Some(ConsoleResult::Error(format!( + "Error while parsing input: {err:?}" + ))) + }, } // Evaluate first expression if there is one @@ -1440,7 +1454,7 @@ impl RMain { log::info!("Detected `readline()` call in renv autoloader. Returning `'{input}'`."); match Self::on_console_input(buf, buflen, input) { Ok(()) => return ConsoleResult::NewInput, - Err(err) => return ConsoleResult::Error(err), + Err(err) => return ConsoleResult::Error(format!("{err}")), } } @@ -1451,7 +1465,7 @@ impl RMain { "Are you calling `readline()` or `menu()` from an `.Rprofile` or `.Rprofile.site` file? If so, that is the issue and you should remove that code." ].join("\n"); - return ConsoleResult::Error(Error::InvalidInputRequest(message)); + return ConsoleResult::Error(message); } fn start_debug(&mut self, debug_preserve_focus: bool) { @@ -1608,10 +1622,10 @@ impl RMain { let input = convert_line_endings(&input.value, LineEnding::Posix); match Self::on_console_input(buf, buflen, input) { Ok(()) => ConsoleResult::NewInput, - Err(err) => ConsoleResult::Error(err), + Err(err) => ConsoleResult::Error(format!("{err:?}")), } }, - Err(err) => ConsoleResult::Error(err), + Err(err) => ConsoleResult::Error(format!("{err:?}")), } } @@ -2329,14 +2343,6 @@ impl RMain { } } - fn propagate_error(&mut self, message: String) -> ! { - // Save error message to `RMain`'s buffer to avoid leaking memory when `Rf_error()` jumps. - // Some gymnastics are required to deal with the possibility of `CString` conversion failure - // since the error message comes from the frontend and might be corrupted. - self.r_error_buffer = Some(new_cstring(message)); - unsafe { Rf_error(self.r_error_buffer.as_ref().unwrap().as_ptr()) } - } - #[cfg(not(test))] // Avoid warnings in unit test pub(crate) fn read_console_frame(&self) -> RObject { self.read_console_frame.borrow().clone() @@ -2546,8 +2552,13 @@ fn r_read_console_impl( return 0; }, - ConsoleResult::Error(err) => { - main.propagate_error(format!("{err}")); + ConsoleResult::Error(message) => { + // Save error message in `RMain` to avoid leaking memory when + // `Rf_error()` jumps. Some gymnastics are required to deal with the + // possibility of `CString` conversion failure since the error + // message comes from the frontend and might be corrupted. + main.r_error_buffer = Some(new_cstring(message)); + unsafe { Rf_error(main.r_error_buffer.as_ref().unwrap().as_ptr()) } }, }; } diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 4d1e851be..63c99ca61 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -86,6 +86,11 @@ fn test_execute_request_multiple_lines() { #[test] fn test_execute_request_incomplete() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); + let frontend = DummyArkFrontend::lock(); let code = "1 +"; @@ -95,9 +100,10 @@ fn test_execute_request_incomplete() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + assert_eq!( + frontend.recv_iopub_execute_error(), + "Error:\nCan't execute incomplete input:\n1 +" + ); frontend.recv_iopub_idle(); @@ -132,6 +138,11 @@ fn test_execute_request_incomplete_multiple_lines() { #[test] fn test_execute_request_invalid() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); + let frontend = DummyArkFrontend::lock(); let code = "1 + )"; @@ -141,7 +152,13 @@ fn test_execute_request_invalid() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - assert!(frontend.recv_iopub_execute_error().contains("Syntax error")); + let error_msg = frontend.recv_iopub_execute_error(); + + // Expected error + assert!(error_msg.contains("Syntax error")); + + // Check that no Rust backtrace is injected in the error message + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); frontend.recv_iopub_idle(); @@ -417,6 +434,11 @@ fn test_execute_request_browser_error() { #[test] fn test_execute_request_browser_incomplete() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); + let frontend = DummyArkFrontend::lock(); let code = "browser()"; From 96f22aceeb8d9b42f33fe3e9b9e226cbafedbc2b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 7 Nov 2025 09:33:27 +0100 Subject: [PATCH 41/63] Fix backtraces in special syntax errors --- crates/ark/tests/kernel.rs | 49 +++++++++++++++++++++++++++++++++++++- crates/harp/src/error.rs | 5 ++-- crates/harp/src/parse.rs | 38 ++++++++++++++++++++--------- 3 files changed, 77 insertions(+), 15 deletions(-) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 63c99ca61..2ec81c773 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -165,7 +165,54 @@ fn test_execute_request_invalid() { assert_eq!( frontend.recv_shell_execute_reply_exception(), input.execution_count - ) + ); + + // https://github.com/posit-dev/ark/issues/598 + let code = "``"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let error_msg = frontend.recv_iopub_execute_error(); + + // Expected error + assert!(error_msg.contains("Syntax error")); + + // Check that no Rust backtrace is injected in the error message + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); + + // https://github.com/posit-dev/ark/issues/722 + + let code = "_ + _()"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let error_msg = frontend.recv_iopub_execute_error(); + + // Expected error + assert!(error_msg.contains("Syntax error")); + + // Check that no Rust backtrace is injected in the error message + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); } #[test] diff --git a/crates/harp/src/error.rs b/crates/harp/src/error.rs index 5b4f2a5f5..5989986f0 100644 --- a/crates/harp/src/error.rs +++ b/crates/harp/src/error.rs @@ -46,7 +46,6 @@ pub enum Error { InvalidUtf8(Utf8Error), ParseSyntaxError { message: String, - line: i32, }, MissingValueError, MissingColumnError { @@ -200,8 +199,8 @@ impl fmt::Display for Error { write!(f, "Invalid UTF-8 in string: {}", error) }, - Error::ParseSyntaxError { message, line } => { - write!(f, "Syntax error on line {} when parsing: {}", line, message) + Error::ParseSyntaxError { message } => { + write!(f, "Syntax error: {}", message) }, Error::MissingValueError => { diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index 7e819a8ca..d2426876c 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -28,7 +28,7 @@ pub struct RParseOptions { pub enum ParseResult { Complete(RObject), Incomplete, - SyntaxError { message: String, line: i32 }, + SyntaxError { message: String }, } pub enum ParseInput<'a> { @@ -79,9 +79,7 @@ pub fn parse_exprs_ext<'a>(input: &ParseInput<'a>) -> crate::Result { code: parse_input_as_string(input).unwrap_or(String::from("Conversion error")), message: String::from("Incomplete code"), }), - ParseResult::SyntaxError { message, line } => { - Err(crate::Error::ParseSyntaxError { message, line }) - }, + ParseResult::SyntaxError { message } => Err(crate::Error::ParseSyntaxError { message }), } } @@ -109,17 +107,34 @@ pub fn parse_status<'a>(input: &ParseInput<'a>) -> crate::Result { ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.clone()), }; - let result: RObject = - try_catch(|| libr::R_ParseVector(text.sexp, -1, &mut status, srcfile.sexp).into())?; + let result = + try_catch(|| libr::R_ParseVector(text.sexp, -1, &mut status, srcfile.sexp).into()); + + let value = match result { + Ok(value) => value, + Err(err) => match err { + // The parser sometimes throws errors instead of returning an + // error flag. Convert these errors to proper syntax errors so + // we don't leak a backtrace making it seem like an internal + // error. + // https://github.com/posit-dev/ark/issues/598 + // https://github.com/posit-dev/ark/issues/722 + crate::Error::TryCatchError { message, .. } => { + return Ok(ParseResult::SyntaxError { message }); + }, + _ => { + return Err(err); + }, + }, + }; match status { - libr::ParseStatus_PARSE_OK => Ok(ParseResult::Complete(result)), + libr::ParseStatus_PARSE_OK => Ok(ParseResult::Complete(value)), libr::ParseStatus_PARSE_INCOMPLETE => Ok(ParseResult::Incomplete), libr::ParseStatus_PARSE_ERROR => Ok(ParseResult::SyntaxError { message: CStr::from_ptr(libr::get(libr::R_ParseErrorMsg).as_ptr()) .to_string_lossy() .to_string(), - line: libr::get(libr::R_ParseError) as i32, }), _ => { // Should not get here @@ -207,15 +222,16 @@ mod tests { // Error assert_match!( parse_status(&ParseInput::Text("42 + _")), - Err(_) => {} + Ok(ParseResult::SyntaxError { message }) => { + assert!(message.contains("invalid use of pipe placeholder")); + } ); // "normal" syntax error assert_match!( parse_status(&ParseInput::Text("1+1\n*42")), - Ok(ParseResult::SyntaxError {message, line}) => { + Ok(ParseResult::SyntaxError { message }) => { assert!(message.contains("unexpected")); - assert_eq!(line, 2); } ); From c6699ebf155979b0d0be499bbe3c55a659f1807b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 7 Nov 2025 09:50:35 +0100 Subject: [PATCH 42/63] Disable error entracing in sensitive tests --- crates/ark/tests/kernel.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 2ec81c773..bb7d5741c 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -5,6 +5,9 @@ use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; use stdext::assert_match; +// Avoids our global calling handler from rlangifying errors. +// This causes some test instability across configs. + #[test] fn test_kernel_info() { let frontend = DummyArkFrontend::lock(); @@ -93,6 +96,18 @@ fn test_execute_request_incomplete() { let frontend = DummyArkFrontend::lock(); + let code = "options(positron.error_entrace = FALSE)"; + + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + let code = "1 +"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); From 99fa836d7a0ced8d200aca287fbfa94e7bafb4df Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 7 Nov 2025 10:08:16 +0100 Subject: [PATCH 43/63] Extract `FrontendDummy::execute_request()` and variants --- .../amalthea/src/fixtures/dummy_frontend.rs | 71 +++ crates/ark/tests/kernel.rs | 579 +++--------------- 2 files changed, 152 insertions(+), 498 deletions(-) diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 768523601..09b0bfe18 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -236,6 +236,77 @@ impl DummyFrontend { }) } + /// Sends an execute request and handles the standard message flow: + /// busy -> execute_input -> idle -> execute_reply. + /// Asserts that the input code matches and returns the execution count. + #[track_caller] + pub fn execute_request_invisibly(&self, code: &str) -> u32 { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + + /// Sends an execute request and handles the standard message flow with a result: + /// busy -> execute_input -> execute_result -> idle -> execute_reply. + /// Asserts that the input code matches and passes the result to the callback. + /// Returns the execution count. + #[track_caller] + pub fn execute_request(&self, code: &str, result_check: F) -> u32 + where + F: FnOnce(String), + { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let result = self.recv_iopub_execute_result(); + result_check(result); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + + /// Sends an execute request that produces an error and handles the standard message flow: + /// busy -> execute_input -> execute_error -> idle -> execute_reply_exception. + /// Passes the error message to the callback for custom assertions. + /// Returns the execution count. + #[track_caller] + pub fn execute_request_error(&self, code: &str, error_check: F) -> u32 + where + F: FnOnce(String), + { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let error_msg = self.recv_iopub_execute_error(); + error_check(error_msg); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply_exception(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + /// Sends a Jupyter message on the Stdin socket pub fn send_stdin(&self, msg: T) { Self::send(&self.stdin_socket, &self.session, msg); diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index bb7d5741c..3d657ea7b 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -28,63 +28,24 @@ fn test_kernel_info() { #[test] fn test_execute_request() { let frontend = DummyArkFrontend::lock(); - - let code = "42"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] 42"); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("42", |result| assert_eq!(result, "[1] 42")); } #[test] fn test_execute_request_empty() { let frontend = DummyArkFrontend::lock(); - let code = ""; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly(""); // Equivalent to invisible output - let code = "invisible(1)"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("invisible(1)"); } #[test] fn test_execute_request_multiple_lines() { let frontend = DummyArkFrontend::lock(); - let code = "1 +\n 2+\n 3"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] 6"); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count) + frontend.execute_request("1 +\n 2+\n 3", |result| assert_eq!(result, "[1] 6")); } #[test] @@ -96,59 +57,20 @@ fn test_execute_request_incomplete() { let frontend = DummyArkFrontend::lock(); - let code = "options(positron.error_entrace = FALSE)"; - - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); + frontend.execute_request_invisibly("options(positron.error_entrace = FALSE)"); - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - - let code = "1 +"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert_eq!( - frontend.recv_iopub_execute_error(), - "Error:\nCan't execute incomplete input:\n1 +" - ); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ) + frontend.execute_request_error("1 +", |error_msg| { + assert_eq!(error_msg, "Error:\nCan't execute incomplete input:\n1 +"); + }); } #[test] fn test_execute_request_incomplete_multiple_lines() { let frontend = DummyArkFrontend::lock(); - let code = "1 +\n2 +"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ) + frontend.execute_request_error("1 +\n2 +", |error_msg| { + assert!(error_msg.contains("Can't execute incomplete input")); + }); } #[test] @@ -160,136 +82,44 @@ fn test_execute_request_invalid() { let frontend = DummyArkFrontend::lock(); - let code = "1 + )"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - let error_msg = frontend.recv_iopub_execute_error(); - - // Expected error - assert!(error_msg.contains("Syntax error")); - - // Check that no Rust backtrace is injected in the error message - assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request_error("1 + )", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); // https://github.com/posit-dev/ark/issues/598 - let code = "``"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - let error_msg = frontend.recv_iopub_execute_error(); - - // Expected error - assert!(error_msg.contains("Syntax error")); - - // Check that no Rust backtrace is injected in the error message - assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request_error("``", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); // https://github.com/posit-dev/ark/issues/722 - - let code = "_ + _()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - let error_msg = frontend.recv_iopub_execute_error(); - - // Expected error - assert!(error_msg.contains("Syntax error")); - - // Check that no Rust backtrace is injected in the error message - assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request_error("_ + _()", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); } #[test] fn test_execute_request_browser() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] fn test_execute_request_browser_continue() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - - let code = "n"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("n"); } #[test] @@ -298,77 +128,22 @@ fn test_execute_request_browser_nested() { let frontend = DummyArkFrontend::lock(); // Start first browser - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); // Evaluate a value in the outer browser - let code = "42"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend.recv_iopub_execute_result().contains("[1] 42")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("42", |result| assert!(result.contains("[1] 42"))); // Start nested browser from within the first browser - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - // Nested browser() produces execute_result output - frontend.recv_iopub_execute_result(); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |_result| {}); // Evaluate a command in the nested browser - let code = "1"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend.recv_iopub_execute_result().contains("[1] 1")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("1", |result| assert!(result.contains("[1] 1"))); // Evaluate another value in the nested browser - let code = "\"hello\""; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend.recv_iopub_execute_result().contains("hello")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("\"hello\"", |result| assert!(result.contains("hello"))); // Throw an error in the nested browser let code = "stop('error in nested')"; @@ -384,30 +159,10 @@ fn test_execute_request_browser_nested() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); // Continue to exit the nested browser and return to parent - let code = "c"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("c"); // Back in the parent browser, evaluate another value - let code = "3.14"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend.recv_iopub_execute_result().contains("[1] 3.14")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("3.14", |result| assert!(result.contains("[1] 3.14"))); // Throw an error in the outer browser let code = "stop('error in parent')"; @@ -422,29 +177,9 @@ fn test_execute_request_browser_nested() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "NA"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend.recv_iopub_execute_result().contains("[1] NA")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("NA", |result| assert!(result.contains("[1] NA"))); // Quit the outer browser - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] @@ -456,20 +191,9 @@ fn test_execute_request_browser_error() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); frontend.send_execute_request("stop('foobar')", ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -482,16 +206,7 @@ fn test_execute_request_browser_error() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] @@ -503,20 +218,9 @@ fn test_execute_request_browser_incomplete() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); let code = "1 +"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); @@ -582,36 +286,16 @@ fn()"; assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] fn test_execute_request_browser_stdin() { let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); let options = ExecuteRequestOptions { allow_stdin: true }; let code = "readline('prompt>')"; @@ -630,16 +314,7 @@ fn test_execute_request_browser_stdin() { frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] @@ -735,35 +410,16 @@ fn test_execute_request_browser_local_variable() { frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] fn test_execute_request_error() { let frontend = DummyArkFrontend::lock(); - frontend.send_execute_request("stop('foobar')", ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, "stop('foobar')"); - assert!(frontend.recv_iopub_execute_error().contains("foobar")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request_error("stop('foobar')", |error_msg| { + assert!(error_msg.contains("foobar")); + }); } #[test] @@ -802,35 +458,17 @@ fn test_execute_request_error_with_accumulated_output() { #[test] fn test_execute_request_error_expressions_overflow() { let frontend = DummyArkFrontend::lock(); - // Deterministically produce an "evaluation too deeply nested" error - let code = "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); f(100)"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_error() - .contains("evaluation nested too deeply")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count + // Deterministically produce an "evaluation too deeply nested" error + frontend.execute_request_error( + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); f(100)", + |error_msg| { + assert!(error_msg.contains("evaluation nested too deeply")); + }, ); // Check we can still evaluate without causing another too deeply nested error - let code = "f(10)"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("f(10)"); } #[test] @@ -838,59 +476,24 @@ fn test_execute_request_error_expressions_overflow_last_value() { let frontend = DummyArkFrontend::lock(); // Set state and last value - let code = - "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); invisible('hello')"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly( + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); invisible('hello')", + ); // Check last value is set - let code = ".Last.value"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert_eq!(frontend.recv_iopub_execute_result(), "[1] \"hello\""); - frontend.recv_iopub_idle(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request(".Last.value", |result| { + assert_eq!(result, "[1] \"hello\""); + }); // Deterministically produce an "evaluation too deeply nested" error - let code = "f(100)"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_error() - .contains("evaluation nested too deeply")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request_error("f(100)", |error_msg| { + assert!(error_msg.contains("evaluation nested too deeply")); + }); // Check last value is still set - let code = ".Last.value"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert_eq!(frontend.recv_iopub_execute_result(), "[1] \"hello\""); - frontend.recv_iopub_idle(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request(".Last.value", |result| { + assert_eq!(result, "[1] \"hello\""); + }); } #[test] @@ -953,16 +556,7 @@ fn test_execute_request_single_line_buffer_overflow() { // not in text written to the R buffer that calls `stop()`. let aaa = "a".repeat(4096); let code = format!("quote(\n{aaa}\n)"); - frontend.send_execute_request(code.as_str(), ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend.recv_iopub_execute_result().contains(&aaa)); - - frontend.recv_iopub_idle(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request(code.as_str(), |result| assert!(result.contains(&aaa))); } #[test] @@ -1292,20 +886,9 @@ fn test_shutdown_request_browser() { install_sigint_handler(); let frontend = DummyArkFrontend::lock(); - let code = "browser()"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_result() - .contains("Called from: top level")); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("browser()", |result| { + assert!(result.contains("Called from: top level")); + }); frontend.send_shutdown_request(true); frontend.recv_iopub_busy(); From 726923144682f87aaed9df999d08efa33406526d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Tue, 18 Nov 2025 14:18:58 +0100 Subject: [PATCH 44/63] Respect `getOption("keep.source")` in ReadConsole parser wip --- crates/ark/src/interface.rs | 14 ++++++-- crates/ark/src/variables/variable.rs | 16 ++++----- crates/ark/tests/kernel-notebook.rs | 4 +-- crates/ark/tests/kernel.rs | 40 ++++++++++++++++++++-- crates/harp/.zed/settings.json | 1 + crates/harp/src/environment.rs | 10 +++--- crates/harp/src/environment_iter.rs | 2 +- crates/harp/src/lib.rs | 7 ++-- crates/harp/src/options.rs | 16 +++++++++ crates/harp/src/parse.rs | 5 +-- crates/harp/src/parser/srcref.rs | 40 +++++++++++++++------- crates/harp/src/utils.rs | 4 --- crates/harp/src/vector/character_vector.rs | 6 ++++ crates/libr/src/r.rs | 2 ++ 14 files changed, 127 insertions(+), 40 deletions(-) create mode 120000 crates/harp/.zed/settings.json create mode 100644 crates/harp/src/options.rs diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index ae33e6413..cb2377205 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -76,6 +76,7 @@ use harp::routines::r_register_routines; use harp::session::r_traceback; use harp::srcref::get_block_srcrefs; use harp::srcref::get_srcref; +use harp::srcref::SrcFile; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; use harp::R_MAIN_THREAD_ID; @@ -310,7 +311,16 @@ enum ParseResult { impl PendingInputs { pub(crate) fn read(input: &str) -> anyhow::Result> { - let status = match harp::parse_status(&harp::ParseInput::Text(input)) { + let mut _srcfile = None; + + let input = if harp::get_option_bool("keep.source") { + _srcfile = Some(SrcFile::new_virtual_empty_filename(input.into())); + harp::ParseInput::SrcFile(&_srcfile.unwrap()) + } else { + harp::ParseInput::Text(input) + }; + + let status = match harp::parse_status(&input) { Err(err) => { // Failed to even attempt to parse the input, something is seriously wrong // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. @@ -327,7 +337,7 @@ impl PendingInputs { harp::ParseResult::Complete(exprs) => exprs, harp::ParseResult::Incomplete => { return Ok(ParseResult::SyntaxError(format!( - "Can't execute incomplete input:\n{input}" + "Can't parse incomplete input" ))); }, harp::ParseResult::SyntaxError { message, .. } => { diff --git a/crates/ark/src/variables/variable.rs b/crates/ark/src/variables/variable.rs index 4653fabb2..6aa8c9ceb 100644 --- a/crates/ark/src/variables/variable.rs +++ b/crates/ark/src/variables/variable.rs @@ -2112,7 +2112,7 @@ mod tests { #[test] fn test_truncation_on_matrices() { r_task(|| { - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("matrix(0, nrow = 10000, ncol = 10000)").unwrap(); env.bind("x".into(), &value); @@ -2132,7 +2132,7 @@ mod tests { #[test] fn test_string_truncation() { r_task(|| { - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("paste(1:5e6, collapse = ' - ')").unwrap(); env.bind("x".into(), &value); @@ -2143,7 +2143,7 @@ mod tests { assert_eq!(vars[0].is_truncated, true); // Test for the empty string - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("''").unwrap(); env.bind("x".into(), &value); @@ -2157,7 +2157,7 @@ mod tests { #[test] fn test_s4_with_different_length() { r_task(|| { - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); // Matrix::Matrix objects have length != 1, but their format() method returns a length 1 character // describing their class. let value = harp::parse_eval_base("Matrix::Matrix(0, nrow= 10, ncol = 10)").unwrap(); @@ -2181,7 +2181,7 @@ mod tests { return; } - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base(r#"rlang:::chr_get("foo", 0L)"#).unwrap(); env.bind("x".into(), &value); @@ -2196,7 +2196,7 @@ mod tests { fn test_matrix_display() { r_task(|| { // Test 10x10 matrix - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("matrix(paste(1:90, collapse = ' - '), nrow = 9, ncol = 10)") .unwrap(); @@ -2220,7 +2220,7 @@ mod tests { assert_eq!(display_value_matrix, display_value_df); // Test plurals - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base("matrix(paste(1:100, collapse = ' - '), nrow = 1, ncol = 1)") .unwrap(); @@ -2231,7 +2231,7 @@ mod tests { assert_eq!(vars[0].display_value, "[1 row x 1 column] "); // Test class - let env = Environment::new_empty().unwrap(); + let env = Environment::new_empty(); let value = harp::parse_eval_base( "structure(matrix(paste(1:100, collapse = ' - '), nrow = 1, ncol = 1), class='foo')", ) diff --git a/crates/ark/tests/kernel-notebook.rs b/crates/ark/tests/kernel-notebook.rs index 9051111f2..965003805 100644 --- a/crates/ark/tests/kernel-notebook.rs +++ b/crates/ark/tests/kernel-notebook.rs @@ -72,7 +72,7 @@ fn test_notebook_execute_request_incomplete() { assert!(frontend .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + .contains("Can't parse incomplete input")); frontend.recv_iopub_idle(); @@ -95,7 +95,7 @@ fn test_notebook_execute_request_incomplete_multiple_lines() { assert!(frontend .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + .contains("Can't parse incomplete input")); frontend.recv_iopub_idle(); diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 3d657ea7b..75df19345 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -60,7 +60,7 @@ fn test_execute_request_incomplete() { frontend.execute_request_invisibly("options(positron.error_entrace = FALSE)"); frontend.execute_request_error("1 +", |error_msg| { - assert_eq!(error_msg, "Error:\nCan't execute incomplete input:\n1 +"); + assert_eq!(error_msg, "Error:\nCan't parse incomplete input"); }); } @@ -69,7 +69,7 @@ fn test_execute_request_incomplete_multiple_lines() { let frontend = DummyArkFrontend::lock(); frontend.execute_request_error("1 +\n2 +", |error_msg| { - assert!(error_msg.contains("Can't execute incomplete input")); + assert!(error_msg.contains("Can't parse incomplete input")); }); } @@ -229,7 +229,7 @@ fn test_execute_request_browser_incomplete() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - frontend.recv_iopub_stream_stderr("Error: Can't execute incomplete input:\n1 +\n"); + frontend.recv_iopub_stream_stderr("Error: Can't parse incomplete input\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); @@ -938,3 +938,37 @@ fn test_shutdown_request_while_busy() { DummyArkFrontend::wait_for_cleanup(); } + +#[test] +fn test_execute_request_source_references() { + let frontend = DummyArkFrontend::lock(); + + // Test that our parser attaches source references when global option is set + frontend.execute_request_invisibly("options(keep.source = TRUE)"); + frontend.execute_request_invisibly("f <- function() {}"); + + frontend.execute_request( + "srcref <- attr(f, 'srcref'); inherits(srcref, 'srcref')", + |result| { + assert_eq!(result, "[1] TRUE"); + }, + ); + + frontend.execute_request( + "srcfile <- attr(srcref, 'srcfile'); inherits(srcfile, 'srcfile')", + |result| { + assert_eq!(result, "[1] TRUE"); + }, + ); + + // When global option is unset, we don't attach source references + frontend.execute_request_invisibly("options(keep.source = FALSE)"); + frontend.execute_request_invisibly("g <- function() {}"); + + frontend.execute_request( + "srcref <- attr(g, 'srcref'); identical(srcref, NULL)", + |result| { + assert_eq!(result, "[1] TRUE"); + }, + ); +} diff --git a/crates/harp/.zed/settings.json b/crates/harp/.zed/settings.json new file mode 120000 index 000000000..5de98924d --- /dev/null +++ b/crates/harp/.zed/settings.json @@ -0,0 +1 @@ +../../../.zed/settings.json \ No newline at end of file diff --git a/crates/harp/src/environment.rs b/crates/harp/src/environment.rs index 8666369d6..e65705d62 100644 --- a/crates/harp/src/environment.rs +++ b/crates/harp/src/environment.rs @@ -56,10 +56,12 @@ impl Environment { Self::new_filtered(env, EnvironmentFilter::default()) } - pub fn new_empty() -> anyhow::Result { - Ok(Self::new(harp::parse_eval_base( - "new.env(parent = emptyenv())", - )?)) + /// Creates hashed environment of default size inheriting from the empty + /// environment + pub fn new_empty() -> Self { + // Passing `size = 0` causes default size to be picked up + let env = unsafe { libr::R_NewEnv(R_ENVS.empty, 1, 0) }; + Self::new(RObject::new(env)) } pub fn new_filtered(env: RObject, filter: EnvironmentFilter) -> Self { diff --git a/crates/harp/src/environment_iter.rs b/crates/harp/src/environment_iter.rs index a1fc8f04b..41c2b3e01 100644 --- a/crates/harp/src/environment_iter.rs +++ b/crates/harp/src/environment_iter.rs @@ -189,7 +189,7 @@ mod tests { #[allow(non_snake_case)] fn test_binding_eq() { r_task(|| { - let env: Environment = Environment::new_empty().unwrap(); + let env: Environment = Environment::new_empty(); let obj = harp::parse_eval_base("1").unwrap(); env.bind(RSymbol::from("a"), &obj); diff --git a/crates/harp/src/lib.rs b/crates/harp/src/lib.rs index 2e5d17cc1..a1409c3f1 100644 --- a/crates/harp/src/lib.rs +++ b/crates/harp/src/lib.rs @@ -1,9 +1,10 @@ // // lib.rs // -// Copyright (C) 2023 Posit Software, PBC. All rights reserved. +// Copyright (C) 2025 Posit Software, PBC. All rights reserved. // // + pub mod attrib; pub mod call; mod column_names; @@ -24,6 +25,7 @@ pub mod line_ending; mod matrix; pub mod modules; pub mod object; +pub mod options; pub mod parse; pub mod parser; pub mod polled_events; @@ -61,6 +63,7 @@ pub use vector::list::*; // resolve to the correct symbols extern crate self as harp; +pub use harp::environment::*; pub use harp::error::as_result; pub use harp::exec::top_level_exec; pub use harp::exec::try_catch; @@ -72,7 +75,7 @@ pub use harp::object::list_poke; pub use harp::object::RObject; pub use harp::session::*; pub use harp::symbol::RSymbol; -pub use harp::utils::get_option; +pub use harp::options::*; pub use harp::weak_ref::RWeakRef; pub use harp_macros::register; diff --git a/crates/harp/src/options.rs b/crates/harp/src/options.rs new file mode 100644 index 000000000..f2eb83003 --- /dev/null +++ b/crates/harp/src/options.rs @@ -0,0 +1,16 @@ +// +// options.rs +// +// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// +// + +use crate::{r_symbol, RObject}; + +pub fn get_option(name: &str) -> RObject { + unsafe { libr::Rf_GetOption1(r_symbol!(name)).into() } +} + +pub fn get_option_bool(name: &str) -> bool { + harp::get_option(name).try_into().unwrap_or(false) +} diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index d2426876c..d298198f2 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -31,6 +31,7 @@ pub enum ParseResult { SyntaxError { message: String }, } +#[derive(Clone, Debug)] pub enum ParseInput<'a> { Text(&'a str), SrcFile(&'a srcref::SrcFile), @@ -67,7 +68,7 @@ pub fn parse_exprs(text: &str) -> crate::Result { /// Same but creates srcrefs pub fn parse_exprs_with_srcrefs(text: &str) -> crate::Result { - let srcfile = srcref::SrcFile::try_from(text)?; + let srcfile = srcref::SrcFile::from(text); parse_exprs_ext(&ParseInput::SrcFile(&srcfile)) } @@ -84,7 +85,7 @@ pub fn parse_exprs_ext<'a>(input: &ParseInput<'a>) -> crate::Result { } pub fn parse_with_parse_data(text: &str) -> crate::Result<(ParseResult, ParseData)> { - let srcfile = srcref::SrcFile::try_from(text)?; + let srcfile = srcref::SrcFile::from(text); // Fill parse data in `srcfile` by side effect let status = parse_status(&ParseInput::SrcFile(&srcfile))?; diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index 65e7f58c0..e00e1b972 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -31,7 +31,7 @@ pub struct SrcRef { pub column_byte: std::ops::Range, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct SrcFile { pub inner: RObject, } @@ -118,13 +118,33 @@ impl TryFrom for SrcRef { /// Creates the same sort of srcfile object as with `parse(text = )`. /// Takes code as an R string containing newlines, or as a R vector of lines. impl SrcFile { - fn new_virtual(text: RObject) -> harp::Result { + // Created by the R function `parse()` + pub fn new_virtual(text: RObject) -> Self { let inner = RFunction::new("base", "srcfilecopy") .param("filename", "") .param("lines", text) - .call()?; + .call(); + + // Unwrap safety: Should never fail, unless something is seriously wrong + let inner = inner.unwrap(); - Ok(Self { inner }) + Self { inner } + } + + // Created by the C-level parser + pub fn new_virtual_empty_filename(text: RObject) -> Self { + let inner = harp::Environment::new_empty(); + inner.bind("filename".into(), &RObject::from("")); + inner.bind("lines".into(), &text); + + let inner: RObject = inner.into(); + + thread_local! { + static CLASS: RObject = crate::CharacterVector::create(vec!["srcfile", "srcfilecopy"]).into(); + } + CLASS.with(|c| inner.set_attribute("class", c.sexp)); + + Self { inner } } pub fn lines(&self) -> harp::Result { @@ -136,19 +156,15 @@ impl SrcFile { } } -impl TryFrom<&str> for SrcFile { - type Error = harp::Error; - - fn try_from(value: &str) -> harp::Result { +impl From<&str> for SrcFile { + fn from(value: &str) -> Self { let input = crate::as_parse_text(value); SrcFile::new_virtual(input) } } -impl TryFrom<&harp::CharacterVector> for SrcFile { - type Error = harp::Error; - - fn try_from(value: &harp::CharacterVector) -> harp::Result { +impl From<&harp::CharacterVector> for SrcFile { + fn from(value: &harp::CharacterVector) -> Self { SrcFile::new_virtual(value.object.clone()) } } diff --git a/crates/harp/src/utils.rs b/crates/harp/src/utils.rs index e9b40796f..951752f20 100644 --- a/crates/harp/src/utils.rs +++ b/crates/harp/src/utils.rs @@ -310,10 +310,6 @@ pub fn r_type2char>(kind: T) -> String { } } -pub fn get_option(name: &str) -> RObject { - unsafe { Rf_GetOption1(r_symbol!(name)).into() } -} - pub fn r_inherits(object: SEXP, class: &str) -> bool { let class = CString::new(class).unwrap(); unsafe { libr::Rf_inherits(object, class.as_ptr()) != 0 } diff --git a/crates/harp/src/vector/character_vector.rs b/crates/harp/src/vector/character_vector.rs index a6fea148a..35dc1c0c6 100644 --- a/crates/harp/src/vector/character_vector.rs +++ b/crates/harp/src/vector/character_vector.rs @@ -134,6 +134,12 @@ impl TryFrom<&CharacterVector> for Vec { } } +impl From for RObject { + fn from(value: CharacterVector) -> Self { + value.object + } +} + #[cfg(test)] mod test { use libr::STRSXP; diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index 510a57dad..54a9c0f1f 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -19,6 +19,8 @@ use crate::types::*; // Functions and globals functions::generate! { + pub fn R_NewEnv(enclos: SEXP, hash: std::ffi::c_int, size: std::ffi::c_int) -> SEXP; + pub fn Rf_initialize_R(ac: std::ffi::c_int, av: *mut *mut std::ffi::c_char) -> std::ffi::c_int; pub fn run_Rmainloop(); From 8294fc384818201b49dd639259bcb764e9fdafa9 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Nov 2025 09:11:48 +0100 Subject: [PATCH 45/63] Add `harp::once!` --- crates/harp/src/lib.rs | 24 +++++++++++++++++++++++- crates/harp/src/parser/srcref.rs | 2 +- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/harp/src/lib.rs b/crates/harp/src/lib.rs index a1409c3f1..c505e48ec 100644 --- a/crates/harp/src/lib.rs +++ b/crates/harp/src/lib.rs @@ -73,9 +73,9 @@ pub(crate) use harp::fixtures::r_task; pub use harp::object::list_get; pub use harp::object::list_poke; pub use harp::object::RObject; +pub use harp::options::*; pub use harp::session::*; pub use harp::symbol::RSymbol; -pub use harp::options::*; pub use harp::weak_ref::RWeakRef; pub use harp_macros::register; @@ -258,6 +258,28 @@ macro_rules! push_rds { }; } +/// Allocate global variable for the R thread with lazy init +/// +/// Uses thread_local storage to avoid issues with SEXP being non-Sync. +/// Usage: +/// +/// ``` +/// harp::once! { +/// static NAME: Type = initialization_expression; +/// } +/// NAME.with(|x| foo(x)); +/// ``` +/// +/// Expands to a thread-local static initialized on first access in the thread. +#[macro_export] +macro_rules! once { + ( $( static $name:ident : $ty:ty = $init:expr );* $(;)? ) => { + thread_local! { + $( static $name: $ty = $init; )* + } + }; +} + #[cfg(test)] mod tests { use libr::*; diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index e00e1b972..1a020d3b9 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -139,7 +139,7 @@ impl SrcFile { let inner: RObject = inner.into(); - thread_local! { + harp::once! { static CLASS: RObject = crate::CharacterVector::create(vec!["srcfile", "srcfilecopy"]).into(); } CLASS.with(|c| inner.set_attribute("class", c.sexp)); From 11f3708c6977521331a5c05df4bd87132904c505 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Nov 2025 14:05:10 +0100 Subject: [PATCH 46/63] Restore `R_Srcref` on exit to avoid changing the DAP's top frame --- crates/ark/src/interface.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index cb2377205..10be11e93 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1408,12 +1408,17 @@ impl RMain { // The global source reference is stored in this global variable by // the R REPL before evaluation. We do the same here. + let old_srcref = libr::Rf_protect(libr::get(libr::R_Srcref)); libr::set(libr::R_Srcref, srcref); // Evaluate the expression. Beware: this may throw an R longjump. let value = libr::Rf_eval(expr, frame); libr::Rf_protect(value); + // Restore `R_Srcref`, necessary at least to avoid messing with + // DAP's last frame info + libr::set(libr::R_Srcref, old_srcref); + // Store in the base environment for robust access from (almost) any // evaluation environment. We only require the presence of `::` so // we can reach into base. Note that unlike regular environments @@ -1421,7 +1426,7 @@ impl RMain { // is stored in the `value` field of symbols, i.e. their "CDR". libr::SETCDR(r_symbol!(".ark_last_value"), value); - libr::Rf_unprotect(2); + libr::Rf_unprotect(3); value }; From 4f3f2359cd6ce48145ed216671ad9003adbad28a Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 19 Nov 2025 14:49:12 +0100 Subject: [PATCH 47/63] Adjust for recent changes on main --- crates/ark/src/help/r_help.rs | 11 +++++------ crates/ark/src/interface.rs | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/ark/src/help/r_help.rs b/crates/ark/src/help/r_help.rs index 3bff54d39..aa5977572 100644 --- a/crates/ark/src/help/r_help.rs +++ b/crates/ark/src/help/r_help.rs @@ -271,22 +271,21 @@ impl RHelp { let env = (|| { #[cfg(not(test))] if RMain::is_initialized() { - if let Some(debug_env) = &RMain::get().debug_env() { - // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` - return debug_env.sexp; + if let Ok(debug_env) = &RMain::get().read_console_frame.try_borrow() { + return (*debug_env).clone(); } } - R_GlobalEnv + RObject::from(R_GlobalEnv) })(); - let obj = harp::parse_eval0(topic.as_str(), env)?; + let obj = harp::parse_eval0(topic.as_str(), env.sexp)?; let handler: Option = ArkGenerics::HelpGetHandler.try_dispatch(obj.sexp, vec![])?; if let Some(handler) = handler { let mut fun = RFunction::new_inlined(handler); - match fun.call_in(env) { + match fun.call_in(env.sexp) { Err(err) => { log::error!("Error calling help handler: {:?}", err); return Err(anyhow!("Error calling help handler: {:?}", err)); diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 10be11e93..833909390 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -289,7 +289,7 @@ pub struct RMain { read_console_shutdown: Cell, /// Current topmost environment on the stack while waiting for input in ReadConsole - read_console_frame: RefCell, + pub(crate) read_console_frame: RefCell, } /// Stack of pending inputs From 086c4452b2821dcf3ca46dacb8721496a34e248b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 20 Nov 2025 09:55:40 +0100 Subject: [PATCH 48/63] Add closure variant of IOPub Stream assertion --- .../amalthea/src/fixtures/dummy_frontend.rs | 78 +++++++++++++++---- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 09b0bfe18..9f10955c1 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -453,30 +453,63 @@ impl DummyFrontend { assert_matches!(msg, Message::UpdateDisplayData(_)) } + /// Receive from IOPub Stream + /// + /// Stdout and Stderr Stream messages are buffered, so to reliably test + /// against them we have to collect the messages in batches on the receiving + /// end and compare against an expected message. + /// + /// The comparison is done with an assertive closure: we'll wait for more + /// output as long as the closure panics. + /// + /// Because closures can't track callers yet, the `recv_iopub_stream()` + /// variant is more ergonomic and should be preferred. + /// See for tracking issue. #[track_caller] - pub fn recv_iopub_stream_stdout(&self, expect: &str) { - self.recv_iopub_stream(expect, Stream::Stdout) + fn recv_iopub_stream_with(&self, stream: Stream, mut f: F) + where + F: FnMut(&str), + { + let mut out = String::new(); + + loop { + let msg = self.recv_iopub(); + let piece = assert_matches!(msg, Message::Stream(data) => { + assert_eq!(data.content.name, stream); + data.content.text + }); + out.push_str(&piece); + + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + f(&out); + })) { + Ok(_) => break, + Err(_) => continue, + }; + } } #[track_caller] - pub fn recv_iopub_stream_stderr(&self, expect: &str) { - self.recv_iopub_stream(expect, Stream::Stderr) + pub fn recv_iopub_stream_stdout_with(&self, f: F) + where + F: FnMut(&str), + { + self.recv_iopub_stream_with(Stream::Stdout, f) } #[track_caller] - pub fn recv_iopub_comm_close(&self) -> String { - let msg = self.recv_iopub(); - - assert_matches!(msg, Message::CommClose(data) => { - data.content.comm_id - }) + pub fn recv_iopub_stream_stderr_with(&self, f: F) + where + F: FnMut(&str), + { + self.recv_iopub_stream_with(Stream::Stderr, f) } /// Receive from IOPub Stream /// - /// Stdout and Stderr Stream messages are buffered, so to reliably test against them - /// we have to collect the messages in batches on the receiving end and compare against - /// an expected message. + /// This variant compares the stream against its expected _last_ output. + /// We can't use `recv_iopub_stream_with()` here because closures + /// can't track callers. #[track_caller] fn recv_iopub_stream(&self, expect: &str, stream: Stream) { let mut out = String::new(); @@ -509,6 +542,25 @@ impl DummyFrontend { } } + #[track_caller] + pub fn recv_iopub_stream_stdout(&self, expect: &str) { + self.recv_iopub_stream(expect, Stream::Stdout) + } + + #[track_caller] + pub fn recv_iopub_stream_stderr(&self, expect: &str) { + self.recv_iopub_stream(expect, Stream::Stderr) + } + + #[track_caller] + pub fn recv_iopub_comm_close(&self) -> String { + let msg = self.recv_iopub(); + + assert_matches!(msg, Message::CommClose(data) => { + data.content.comm_id + }) + } + /// Receive from IOPub and assert ExecuteResult message. Returns compulsory /// `evalue` field. #[track_caller] From 036a924e30575c0903bdb9127b5ec964cd8644c4 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 20 Nov 2025 10:24:04 +0100 Subject: [PATCH 49/63] Collect IOPub streams until end matches --- crates/amalthea/src/fixtures/dummy_frontend.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 9f10955c1..04255b5e6 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -518,23 +518,15 @@ impl DummyFrontend { // Receive a piece of stream output (with a timeout) let msg = self.recv_iopub(); - // Assert its type let piece = assert_matches!(msg, Message::Stream(data) => { assert_eq!(data.content.name, stream); data.content.text }); - // Add to what we've already collected out += piece.as_str(); - if out == expect { - // Done, found the entire `expect` string - return; - } - - if !expect.starts_with(out.as_str()) { - // Something is wrong, message doesn't match up - panic!("Expected IOPub stream of '{expect}'. Actual stream of '{out}'."); + if out.ends_with(expect) { + break; } // We have a prefix of `expect`, but not the whole message yet. @@ -542,11 +534,15 @@ impl DummyFrontend { } } + /// Receives stdout stream output until the collected output ends with + /// `expect`. Note: The comparison uses `ends_with`, not full equality. #[track_caller] pub fn recv_iopub_stream_stdout(&self, expect: &str) { self.recv_iopub_stream(expect, Stream::Stdout) } + /// Receives stderr stream output until the collected output ends with + /// `expect`. Note: The comparison uses `ends_with`, not full equality. #[track_caller] pub fn recv_iopub_stream_stderr(&self, expect: &str) { self.recv_iopub_stream(expect, Stream::Stderr) From 5b377e2b3fff760d50dd586a7ad036dc6da323eb Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 20 Nov 2025 11:39:56 +0100 Subject: [PATCH 50/63] Better handle errors in `options(error = )` --- crates/ark/src/interface.rs | 4 +-- crates/ark/src/modules/positron/errors.R | 28 ++++++++++++++++++- crates/ark/tests/kernel.rs | 35 ++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 833909390..fd895c963 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1466,14 +1466,14 @@ impl RMain { /// https://github.com/rstudio/renv/blob/5d0d52c395e569f7f24df4288d949cef95efca4e/inst/resources/activate.R#L85-L87 fn handle_invalid_input_request(&self, buf: *mut c_uchar, buflen: c_int) -> ConsoleResult { if let Some(input) = Self::renv_autoloader_reply() { - log::info!("Detected `readline()` call in renv autoloader. Returning `'{input}'`."); + log::warn!("Detected `readline()` call in renv autoloader. Returning `'{input}'`."); match Self::on_console_input(buf, buflen, input) { Ok(()) => return ConsoleResult::NewInput, Err(err) => return ConsoleResult::Error(format!("{err}")), } } - log::info!("Detected invalid `input_request` outside an `execute_request`. Preparing to throw an R error."); + log::warn!("Detected invalid `input_request` outside an `execute_request`. Preparing to throw an R error."); let message = vec![ "Can't request input from the user at this time.", diff --git a/crates/ark/src/modules/positron/errors.R b/crates/ark/src/modules/positron/errors.R index 12263fa76..0b027d51c 100644 --- a/crates/ark/src/modules/positron/errors.R +++ b/crates/ark/src/modules/positron/errors.R @@ -201,7 +201,33 @@ invoke_option_error_handler <- function() { } for (hnd in handler) { - eval(hnd, globalenv()) + err <- tryCatch( + { + eval(hnd, globalenv()) + NULL + }, + error = identity + ) + + if (!is.null(err)) { + # Disable error handler to avoid cascading errors + options(error = NULL) + + # We don't let the error propagate to avoid a confusing sequence of + # error messages from R, such as "Error during wrapup" + writeLines( + c( + "The `getOption(\"error\")` handler failed.", + "This option was unset to avoid cascading errors.", + "Caused by:", + conditionMessage(err) + ), + con = stderr() + ) + + # Bail early + return() + } } } diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 75df19345..d84c70c81 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -820,6 +820,41 @@ fn test_env_vars() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_error_handler_failure() { + let frontend = DummyArkFrontend::lock(); + + // Define the functions and set the error handler + let code = r#" +f <- function() g() +g <- function() h() +h <- function() stop("foo") +options(error = function() stop("ouch")) +"#; + frontend.execute_request_invisibly(code); + + frontend.send_execute_request("f()", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, "f()"); + + frontend.recv_iopub_stream_stderr( + r#"The `getOption("error")` handler failed. +This option was unset to avoid cascading errors. +Caused by: +ouch +"#, + ); + + assert!(frontend.recv_iopub_execute_error().contains("foo")); + + frontend.recv_iopub_idle(); + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + /// Install a SIGINT handler for shutdown tests. This overrides the test runner /// handler so it doesn't cancel our test. fn install_sigint_handler() { From beef016f139e14ae8b532eb623c31a1286551ccc Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 20 Nov 2025 12:41:17 +0100 Subject: [PATCH 51/63] Prevent calling `readline()` or `menu()` from error handler From R's perspective, the handler runs _after_ the error was emitted. That's why the user is able to see the error message before the recover prompt. From our perspective though, we have to run the handler _before_ emitting the error, because all executed code needs to nested in an execution request so that we can properly match output to a prompt on the frontend side. The Jupyter protocol does not really support orphan side effects (streams, input requests). --- crates/ark/src/interface.rs | 26 +++++++++++++++++++++++++ crates/ark/tests/kernel.rs | 39 ++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index fd895c963..1298d9553 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -907,6 +907,20 @@ impl RMain { } if let Some(exception) = self.take_exception() { + // We might get an input request if `readline()` or `menu()` is + // called in `options(error = )`. We respond to this with an error + // as this is not supported by Ark. + if matches!(info.kind, PromptKind::InputRequest) { + // Reset error so we can handle it when we recurse here after + // the error aborts the readline. Note it's better to first emit + // the R invalid input request error, and then handle + // `exception` within the context of a new `ReadConsole` + // instance, so that we emit the proper execution prompts as + // part of the response, and not the readline prompt. + self.last_error = Some(exception); + return self.handle_invalid_input_request_after_error(); + } + // Clear any pending inputs, if any self.pending_inputs = None; @@ -1483,6 +1497,18 @@ impl RMain { return ConsoleResult::Error(message); } + fn handle_invalid_input_request_after_error(&self) -> ConsoleResult { + log::warn!("Detected invalid `input_request` after error (probably from `getOption('error')`). Preparing to throw an R error."); + + let message = vec![ + "Can't request input from the user at this time.", + "Are you calling `readline()` or `menu()` from `options(error = )`?", + ] + .join("\n"); + + return ConsoleResult::Error(message); + } + fn start_debug(&mut self, debug_preserve_focus: bool) { match self.dap.stack_info() { Ok(stack) => { diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index d84c70c81..67098b7a1 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -824,7 +824,6 @@ fn test_env_vars() { fn test_execute_request_error_handler_failure() { let frontend = DummyArkFrontend::lock(); - // Define the functions and set the error handler let code = r#" f <- function() g() g <- function() h() @@ -855,6 +854,44 @@ ouch ); } +#[test] +fn test_execute_request_error_handler_readline() { + let frontend = DummyArkFrontend::lock(); + + let code = r#" +f <- function() g() +g <- function() h() +h <- function() stop("foo") +options(error = function() menu("ouch")) +"#; + frontend.execute_request_invisibly(code); + + frontend.send_execute_request("f()", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, "f()"); + + frontend.recv_iopub_stream_stdout("Enter an item from the menu, or 0 to exit\n"); + + frontend.recv_iopub_stream_stderr( + r#"The `getOption("error")` handler failed. +This option was unset to avoid cascading errors. +Caused by: +Can't request input from the user at this time. +Are you calling `readline()` or `menu()` from `options(error = )`? +"#, + ); + + assert!(frontend.recv_iopub_execute_error().contains("foo")); + frontend.recv_iopub_idle(); + + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + +#[test] /// Install a SIGINT handler for shutdown tests. This overrides the test runner /// handler so it doesn't cancel our test. fn install_sigint_handler() { From 5ccb65c10b89d72f45a242449b9273765117e292 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 20 Nov 2025 13:28:29 +0100 Subject: [PATCH 52/63] Simplify call stack --- crates/ark/src/modules/positron/errors.R | 54 +++++++++++++----------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/crates/ark/src/modules/positron/errors.R b/crates/ark/src/modules/positron/errors.R index 0b027d51c..c5376d3df 100644 --- a/crates/ark/src/modules/positron/errors.R +++ b/crates/ark/src/modules/positron/errors.R @@ -200,34 +200,40 @@ invoke_option_error_handler <- function() { handler <- as.expression(list(handler)) } + delayedAssign("non_local_return", return()) + for (hnd in handler) { - err <- tryCatch( + # Use `withCallingHandlers()` instead of `tryCatch()` to avoid making + # the call stack too complex. We might be running `options(error = browser())` + withCallingHandlers( { - eval(hnd, globalenv()) - NULL + # Evaluate from a promise to keep a simple call stack. + # We do evaluate from a closure wrapped in `handler()` so that R + # can infer a named call, for instance in the "Called from:" + # output of `browser()`. + error_handler <- eval(bquote(function() .(hnd))) + error_handler() }, - error = identity + error = function(err) { + # Disable error handler to avoid cascading errors + options(error = NULL) + + # We don't let the error propagate to avoid a confusing sequence of + # error messages from R, such as "Error during wrapup" + writeLines( + c( + "The `getOption(\"error\")` handler failed.", + "This option was unset to avoid cascading errors.", + "Caused by:", + conditionMessage(err) + ), + con = stderr() + ) + + # Bail early + non_local_return + } ) - - if (!is.null(err)) { - # Disable error handler to avoid cascading errors - options(error = NULL) - - # We don't let the error propagate to avoid a confusing sequence of - # error messages from R, such as "Error during wrapup" - writeLines( - c( - "The `getOption(\"error\")` handler failed.", - "This option was unset to avoid cascading errors.", - "Caused by:", - conditionMessage(err) - ), - con = stderr() - ) - - # Bail early - return() - } } } From 8adaf78485db46fa6885675e106b46449313df3b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 20 Nov 2025 14:32:24 +0100 Subject: [PATCH 53/63] Hook `recover()` to call `browser()` The `recover()` functionality is provided by the call stack panes of frontends --- crates/ark/src/modules/positron/hooks.R | 16 ++++++++++++++ crates/ark/tests/kernel.rs | 28 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/crates/ark/src/modules/positron/hooks.R b/crates/ark/src/modules/positron/hooks.R index 227c12729..d25ce85ca 100644 --- a/crates/ark/src/modules/positron/hooks.R +++ b/crates/ark/src/modules/positron/hooks.R @@ -14,6 +14,14 @@ register_hooks <- function() { new_ark_debug(base::debugonce), namespace = TRUE ) + + rebind( + "utils", + "recover", + # Keep this wrapped up this way for a better "Called from:" call + function(...) ark_recover(), + namespace = TRUE + ) register_getHook_hook() } @@ -151,3 +159,11 @@ check_version <- function(pkg) { } ) } + +# We don't support `utils::recover()` in Ark, but the same functionality is +# provided via the call stack pane of IDEs. So replace it by `browser()` so that +# people can enter the debugger on error using the familiar `options(error = +# recover)` gesture. +ark_recover <- function(...) { + browser() +} diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 67098b7a1..45b638605 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -892,6 +892,34 @@ Are you calling `readline()` or `menu()` from `options(error = )`? } #[test] +fn test_execute_request_error_recover() { + let frontend = DummyArkFrontend::lock(); + + let code = r#" +f <- function() g() +g <- function() h() +h <- function() stop("foo") +options(error = recover) +"#; + frontend.execute_request_invisibly(code); + + frontend.send_execute_request("f()", ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, "f()"); + + // We set up the call stack to show a simple `error_handler()` + frontend.recv_iopub_stream_stdout("Called from: ark_recover()\n"); + + assert!(frontend.recv_iopub_execute_error().contains("foo")); + + frontend.recv_iopub_idle(); + assert_eq!( + frontend.recv_shell_execute_reply_exception(), + input.execution_count + ); +} + /// Install a SIGINT handler for shutdown tests. This overrides the test runner /// handler so it doesn't cancel our test. fn install_sigint_handler() { From e1585109a1fb2a1bf4857d608cc78048428c651d Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 12:34:56 +0100 Subject: [PATCH 54/63] Tweak comments --- crates/amalthea/src/socket/control.rs | 4 ++-- crates/amalthea/tests/client.rs | 9 +++------ crates/ark/src/interface.rs | 4 +++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/amalthea/src/socket/control.rs b/crates/amalthea/src/socket/control.rs index 829732da8..d62c19433 100644 --- a/crates/amalthea/src/socket/control.rs +++ b/crates/amalthea/src/socket/control.rs @@ -102,8 +102,8 @@ impl Control { H: FnOnce(JupyterMessage) -> Result<(), Error>, { // Enter the kernel-busy state in preparation for handling the message. - // The protocol specification does not mandate status messages for - // Control, but we emit them for compatibility with ipykernel: + // The protocol specification is vague about status messages for + // Control, we mostly emit them for compatibility with ipykernel: // https://github.com/ipython/ipykernel/pull/585. These status messages // can be discriminated from those on Shell by examining the parent // header. diff --git a/crates/amalthea/tests/client.rs b/crates/amalthea/tests/client.rs index 26f904093..3c1570bbd 100644 --- a/crates/amalthea/tests/client.rs +++ b/crates/amalthea/tests/client.rs @@ -70,25 +70,22 @@ fn test_amalthea_shutdown_request() { // Send a shutdown request with restart = false frontend.send_shutdown_request(false); - - // Shutdown requests generate busy/idle status messages on IOPub frontend.recv_iopub_busy(); - // Receive the shutdown reply let reply = frontend.recv_control_shutdown_reply(); assert_eq!(reply.status, Status::Ok); assert_eq!(reply.restart, false); - frontend.recv_iopub_idle(); - // Test with restart = true + // Test again with restart = true. + // Although the R thread has shut down, the Amalthea thread keeps running + // and is able to reply. frontend.send_shutdown_request(true); frontend.recv_iopub_busy(); let reply = frontend.recv_control_shutdown_reply(); assert_eq!(reply.status, Status::Ok); assert_eq!(reply.restart, true); - frontend.recv_iopub_idle(); } diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 1298d9553..2c247530d 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -273,9 +273,11 @@ pub struct RMain { /// Set to true when `r_read_console()` exits. Reset to false at the start /// of each `r_read_console()` call. Used to detect if `eval()` returned /// from a nested REPL (the flag will be true when the evaluation returns). + /// In these cases, we need to return from `r_read_console()` with a dummy + /// evaluation to reset things like `R_ConsoleIob`. read_console_nested_return: Cell, - /// Set to true `r_read_console()` exits via an error longjump. Used to + /// Set to true when `r_read_console()` exits via an error longjump. Used to /// detect if we need to go return from `r_read_console()` with a dummy /// evaluation to reset things like `R_EvalDepth`. read_console_threw_error: Cell, From cd03255e3b30bc3c0a35d4279a382a670812f82f Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 13:46:31 +0100 Subject: [PATCH 55/63] Consolidate RMain-related DAP state in RMain --- crates/ark/src/dap/dap.rs | 2 +- crates/ark/src/dap/dap_r_main.rs | 363 ------------------------------- crates/ark/src/dap/dap_server.rs | 4 +- crates/ark/src/dap/mod.rs | 1 - crates/ark/src/interface.rs | 126 ++++------- crates/ark/src/lib.rs | 1 + crates/ark/src/repl_debug.rs | 360 ++++++++++++++++++++++++++++++ 7 files changed, 410 insertions(+), 447 deletions(-) delete mode 100644 crates/ark/src/dap/dap_r_main.rs create mode 100644 crates/ark/src/repl_debug.rs diff --git a/crates/ark/src/dap/dap.rs b/crates/ark/src/dap/dap.rs index 10719f519..8a1c96f7c 100644 --- a/crates/ark/src/dap/dap.rs +++ b/crates/ark/src/dap/dap.rs @@ -18,8 +18,8 @@ use harp::object::RObject; use stdext::result::ResultExt; use stdext::spawn; -use crate::dap::dap_r_main::FrameInfo; use crate::dap::dap_server; +use crate::repl_debug::FrameInfo; use crate::request::RRequest; use crate::thread::RThreadSafe; diff --git a/crates/ark/src/dap/dap_r_main.rs b/crates/ark/src/dap/dap_r_main.rs deleted file mode 100644 index 00db8ed3f..000000000 --- a/crates/ark/src/dap/dap_r_main.rs +++ /dev/null @@ -1,363 +0,0 @@ -// -// dap_r_main.rs -// -// Copyright (C) 2024 Posit Software, PBC. All rights reserved. -// -// - -use std::collections::HashMap; -use std::sync::Arc; -use std::sync::Mutex; - -use anyhow::anyhow; -use harp::exec::RFunction; -use harp::exec::RFunctionExt; -use harp::object::RObject; -use harp::protect::RProtect; -use harp::r_string; -use harp::session::r_sys_calls; -use harp::session::r_sys_frames; -use harp::session::r_sys_functions; -use harp::utils::r_is_null; -use libr::R_NilValue; -use libr::R_Srcref; -use libr::Rf_allocVector; -use libr::Rf_xlength; -use libr::INTSXP; -use libr::SET_INTEGER_ELT; -use libr::SEXP; -use libr::VECTOR_ELT; -use stdext::result::ResultExt; - -use crate::dap::dap::DapBackendEvent; -use crate::dap::Dap; -use crate::modules::ARK_ENVS; -use crate::thread::RThreadSafe; - -pub struct RMainDap { - /// Underlying dap state - dap: Arc>, - - /// Whether or not we are currently in a debugging state. - debugging: bool, - - /// The current call emitted by R as `debug: `. - call_text: DebugCallText, - - /// The last known `start_line` for the active context frame. - last_start_line: Option, - - /// The current frame `id`. Unique across all frames within a single debug session. - /// Reset after `stop_debug()`, not between debug steps. If we reset between steps, - /// we could potentially have a race condition where `handle_scopes()` could request - /// a `variables_reference` for a `frame_id` that we've already overwritten the - /// `variables_reference` for, potentially sending back incorrect information. - current_frame_info_id: i64, -} - -#[derive(Clone, Debug)] -pub enum DebugCallText { - None, - Capturing(String), - Finalized(String), -} - -#[derive(Debug)] -pub struct FrameInfo { - pub id: i64, - /// The name shown in the editor tab bar when this frame is viewed. - pub source_name: String, - /// The name shown in the stack frame UI when this frame is visible. - pub frame_name: String, - pub source: FrameSource, - pub environment: Option>, - pub start_line: i64, - pub start_column: i64, - pub end_line: i64, - pub end_column: i64, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum FrameSource { - File(String), - Text(String), -} - -/// Version of `FrameInfo` that identifies the frame by value and doesn't keep a -/// reference to the environment. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct FrameInfoId { - pub source: FrameSource, - pub start_line: i64, - pub start_column: i64, - pub end_line: i64, - pub end_column: i64, -} - -impl From<&FrameInfo> for FrameInfoId { - fn from(info: &FrameInfo) -> Self { - FrameInfoId { - source: info.source.clone(), - start_line: info.start_line, - start_column: info.start_column, - end_line: info.end_line, - end_column: info.end_column, - } - } -} - -impl RMainDap { - pub fn new(dap: Arc>) -> Self { - Self { - dap, - debugging: false, - call_text: DebugCallText::None, - last_start_line: None, - current_frame_info_id: 0, - } - } - - pub fn is_debugging(&self) -> bool { - self.debugging - } - - pub fn start_debug( - &mut self, - stack: Vec, - preserve_focus: bool, - fallback_sources: HashMap, - ) { - self.debugging = true; - let mut dap = self.dap.lock().unwrap(); - dap.start_debug(stack, preserve_focus, fallback_sources) - } - - pub fn stop_debug(&mut self) { - let mut dap = self.dap.lock().unwrap(); - dap.stop_debug(); - drop(dap); - - self.reset_frame_id(); - self.debugging = false; - } - - pub fn handle_write_console(&mut self, content: &str) { - if let DebugCallText::Capturing(ref mut call_text) = self.call_text { - // Append to current expression if we are currently capturing stdout - call_text.push_str(content); - return; - } - - // `debug: ` is emitted by R (if no srcrefs are available!) right before it emits - // the current expression we are debugging, so we use that as a signal to begin - // capturing. - if content == "debug: " { - self.call_text = DebugCallText::Capturing(String::new()); - return; - } - - // Entering or exiting a closure, reset the debug start line state and call text - if content == "debugging in: " || content == "exiting from: " { - self.last_start_line = None; - self.call_text = DebugCallText::None; - return; - } - } - - pub fn handle_read_console(&mut self) { - // Upon entering read-console, finalize any debug call text that we were capturing. - // At this point, the user can either advance the debugger, causing us to capture - // a new expression, or execute arbitrary code, where we will reuse a finalized - // debug call text to maintain the debug state. - match &self.call_text { - // If not debugging, nothing to do. - DebugCallText::None => (), - // If already finalized, keep what we have. - DebugCallText::Finalized(_) => (), - // If capturing, transition to finalized. - DebugCallText::Capturing(call_text) => { - self.call_text = DebugCallText::Finalized(call_text.clone()) - }, - } - } - - pub fn send_dap(&self, event: DapBackendEvent) { - let dap = self.dap.lock().unwrap(); - if let Some(tx) = &dap.backend_events_tx { - tx.send(event).log_err(); - } - } - - pub fn stack_info(&mut self) -> anyhow::Result> { - // We leave finalized `call_text` in place rather than setting it to `None` here - // in case the user executes an arbitrary expression in the debug R console, which - // loops us back here without updating the `call_text` in any way, allowing us to - // recreate the debugger state after their code execution. - let call_text = match self.call_text.clone() { - DebugCallText::None => None, - DebugCallText::Capturing(call_text) => { - log::error!( - "Call text is in `Capturing` state, but should be `Finalized`: '{call_text}'." - ); - None - }, - DebugCallText::Finalized(call_text) => Some(call_text), - }; - - let last_start_line = self.last_start_line; - - let frames = self.r_stack_info(call_text, last_start_line)?; - - // If we have `frames`, update the `last_start_line` with the context - // frame's start line - if let Some(frame) = frames.get(0) { - self.last_start_line = Some(frame.start_line); - } - - Ok(frames) - } - - fn r_stack_info( - &mut self, - context_call_text: Option, - context_last_start_line: Option, - ) -> anyhow::Result> { - unsafe { - let mut protect = RProtect::new(); - - let context_srcref = libr::get(R_Srcref); - protect.add(context_srcref); - - let context_call_text = match context_call_text { - Some(context_call_text) => r_string!(context_call_text, &mut protect), - None => R_NilValue, - }; - - let context_last_start_line = match context_last_start_line { - Some(context_last_start_line) => { - let x = Rf_allocVector(INTSXP, 1); - protect.add(x); - SET_INTEGER_ELT(x, 0, i32::try_from(context_last_start_line)?); - x - }, - None => R_NilValue, - }; - - let functions = r_sys_functions()?; - protect.add(functions); - - let environments = r_sys_frames()?; - protect.add(environments.sexp); - - let calls = r_sys_calls()?; - protect.add(calls.sexp); - - let info = RFunction::new("", "debugger_stack_info") - .add(context_call_text) - .add(context_last_start_line) - .add(context_srcref) - .add(functions) - .add(environments) - .add(calls) - .call_in(ARK_ENVS.positron_ns)?; - - let n: isize = Rf_xlength(info.sexp); - - let mut out = Vec::with_capacity(n as usize); - - // Reverse the order for DAP - for i in (0..n).rev() { - let frame = VECTOR_ELT(info.sexp, i); - out.push(self.as_frame_info(frame)?); - } - - Ok(out) - } - } - - fn as_frame_info(&mut self, info: SEXP) -> anyhow::Result { - unsafe { - let mut i = 0; - - let source_name = VECTOR_ELT(info, i); - let source_name: String = RObject::view(source_name).try_into()?; - - i += 1; - let frame_name = VECTOR_ELT(info, i); - let frame_name: String = RObject::view(frame_name).try_into()?; - - let mut source = None; - - i += 1; - let file = VECTOR_ELT(info, i); - if file != R_NilValue { - let file: String = RObject::view(file).try_into()?; - source = Some(FrameSource::File(file)); - } - - i += 1; - let text = VECTOR_ELT(info, i); - if text != R_NilValue { - let text: String = RObject::view(text).try_into()?; - source = Some(FrameSource::Text(text)); - } - - let Some(source) = source else { - return Err(anyhow!( - "Expected either `file` or `text` to be non-`NULL`." - )); - }; - - i += 1; - let environment = VECTOR_ELT(info, i); - let environment = if r_is_null(environment) { - None - } else { - Some(RThreadSafe::new(RObject::from(environment))) - }; - - i += 1; - let start_line = VECTOR_ELT(info, i); - let start_line: i32 = RObject::view(start_line).try_into()?; - - i += 1; - let start_column = VECTOR_ELT(info, i); - let start_column: i32 = RObject::view(start_column).try_into()?; - - i += 1; - let end_line = VECTOR_ELT(info, i); - let end_line: i32 = RObject::view(end_line).try_into()?; - - // For `end_column`, the column range provided by R is inclusive `[,]`, but the - // one used on the DAP / Positron side is exclusive `[,)` so we have to add 1. - i += 1; - let end_column = VECTOR_ELT(info, i); - let end_column: i32 = RObject::view(end_column).try_into()?; - let end_column = end_column + 1; - - let id = self.next_frame_id(); - - Ok(FrameInfo { - id, - source_name, - frame_name, - source, - environment, - start_line: start_line.try_into()?, - start_column: start_column.try_into()?, - end_line: end_line.try_into()?, - end_column: end_column.try_into()?, - }) - } - } - - fn next_frame_id(&mut self) -> i64 { - let out = self.current_frame_info_id; - self.current_frame_info_id += 1; - out - } - - fn reset_frame_id(&mut self) { - self.current_frame_info_id = 0; - } -} diff --git a/crates/ark/src/dap/dap_server.rs b/crates/ark/src/dap/dap_server.rs index 8b7a02afb..841c7280f 100644 --- a/crates/ark/src/dap/dap_server.rs +++ b/crates/ark/src/dap/dap_server.rs @@ -34,11 +34,11 @@ use stdext::spawn; use super::dap::Dap; use super::dap::DapBackendEvent; use crate::dap::dap::DapStoppedEvent; -use crate::dap::dap_r_main::FrameInfo; -use crate::dap::dap_r_main::FrameSource; use crate::dap::dap_variables::object_variables; use crate::dap::dap_variables::RVariable; use crate::r_task; +use crate::repl_debug::FrameInfo; +use crate::repl_debug::FrameSource; use crate::request::debug_request_command; use crate::request::DebugRequest; use crate::request::RRequest; diff --git a/crates/ark/src/dap/mod.rs b/crates/ark/src/dap/mod.rs index 1d56d1f30..2482fdc87 100644 --- a/crates/ark/src/dap/mod.rs +++ b/crates/ark/src/dap/mod.rs @@ -6,7 +6,6 @@ // pub mod dap; -pub mod dap_r_main; pub mod dap_server; pub mod dap_variables; diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 2c247530d..0ffc280a7 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -97,8 +97,6 @@ use tokio::sync::mpsc::UnboundedReceiver as AsyncUnboundedReceiver; use uuid::Uuid; use crate::dap::dap::DapBackendEvent; -use crate::dap::dap_r_main::FrameInfoId; -use crate::dap::dap_r_main::RMainDap; use crate::dap::Dap; use crate::errors; use crate::errors::stack_overflow_occurred; @@ -120,6 +118,7 @@ use crate::r_task::BoxFuture; use crate::r_task::RTask; use crate::r_task::RTaskStartInfo; use crate::r_task::RTaskStatus; +use crate::repl_debug::FrameInfoId; use crate::repos::apply_default_repos; use crate::repos::DefaultRepos; use crate::request::debug_request_command; @@ -128,7 +127,6 @@ use crate::request::RRequest; use crate::signals::initialize_signal_handlers; use crate::signals::interrupts_pending; use crate::signals::set_interrupts_pending; -use crate::srcref::ark_uri; use crate::srcref::ns_populate_srcref; use crate::srcref::resource_loaded_namespaces; use crate::startup; @@ -152,6 +150,13 @@ pub enum SessionMode { Background, } +#[derive(Clone, Debug)] +pub enum DebugCallText { + None, + Capturing(String), + Finalized(String), +} + // --- Globals --- // These values must be global in order for them to be accessible from R // callbacks, which do not have a facility for passing or returning context. @@ -234,8 +239,6 @@ pub struct RMain { /// initially connects and after an LSP restart. lsp_virtual_documents: HashMap, - dap: RMainDap, - pub positron_ns: Option, pending_inputs: Option, @@ -258,13 +261,29 @@ pub struct RMain { /// See https://github.com/posit-dev/positron/issues/3151. debug_preserve_focus: bool, + /// Underlying dap state. Shared with the DAP server thread. + pub(crate) debug_dap: Arc>, + + /// Whether or not we are currently in a debugging state. + pub(crate) debug_is_debugging: bool, + + /// The current call emitted by R as `debug: `. + pub(crate) debug_call_text: DebugCallText, + + /// The last known `start_line` for the active context frame. + pub(crate) debug_last_line: Option, + /// The stack of frames we saw the last time we stopped. Used as a mostly /// reliable indication of whether we moved since last time. - debug_last_stack: Vec, + pub(crate) debug_last_stack: Vec, /// Ever increasing debug session index. Used to create URIs that are only /// valid for a single session. - debug_session_index: u32, + pub(crate) debug_session_index: u32, + + /// The current frame `id`. Unique across all frames within a single debug session. + /// Reset after `stop_debug()`, not between debug steps. + pub(crate) debug_current_frame_id: i64, /// Tracks how many nested `r_read_console()` calls are on the stack. /// Incremented when entering `r_read_console(),` decremented on exit. @@ -748,7 +767,8 @@ impl RMain { help_port: None, lsp_events_tx: None, lsp_virtual_documents: HashMap::new(), - dap: RMainDap::new(dap), + debug_dap: dap, + debug_is_debugging: false, tasks_interrupt_rx, tasks_idle_rx, pending_futures: HashMap::new(), @@ -757,9 +777,12 @@ impl RMain { banner: None, r_error_buffer: None, captured_output: String::new(), + debug_call_text: DebugCallText::None, + debug_last_line: None, debug_preserve_focus: false, debug_last_stack: vec![], debug_session_index: 1, + debug_current_frame_id: 0, pending_inputs: None, read_console_depth: Cell::new(0), read_console_nested_return: Cell::new(false), @@ -880,7 +903,7 @@ impl RMain { /// - Move the Console state machine to the next state: /// - Wait for input /// - Set an active execute request and a list of pending expressions - /// - Set `self.dap.is_debugging()` depending on presence or absence of debugger prompt + /// - Set `self.debug_is_debugging` depending on presence or absence of debugger prompt /// - Evaluate next pending expression /// - Close active execute request if pending list is empty /// - Run an event loop while waiting for input @@ -891,21 +914,23 @@ impl RMain { buflen: c_int, _hist: c_int, ) -> ConsoleResult { - self.dap.handle_read_console(); + self.debug_handle_read_console(); // State machine part of ReadConsole let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); - // Invariant: If we detect a browser prompt, `self.dap.is_debugging()` + // Invariant: If we detect a browser prompt, `self.debug_is_debugging` // is true. Otherwise it is false. if matches!(info.kind, PromptKind::Browser) { // Start or continue debugging with the `debug_preserve_focus` hint // from the last expression we evaluated - self.start_debug(self.debug_preserve_focus); - } else if self.dap.is_debugging() { - self.stop_debug(); + self.debug_is_debugging = true; + self.debug_start(self.debug_preserve_focus); + } else if self.debug_is_debugging { + self.debug_is_debugging = false; + self.debug_stop(); } if let Some(exception) = self.take_exception() { @@ -948,7 +973,7 @@ impl RMain { // often. We'd still push a `DidChangeConsoleInputs` notification from // here, but only containing high-level information such as `search()` // contents and `ls(rho)`. - if !self.dap.is_debugging() && !matches!(info.kind, PromptKind::InputRequest) { + if !self.debug_is_debugging && !matches!(info.kind, PromptKind::InputRequest) { self.refresh_lsp(); } @@ -1282,7 +1307,7 @@ impl RMain { RRequest::DebugCommand(cmd) => { // Just ignore command in case we left the debugging state already - if !self.dap.is_debugging() { + if !self.debug_is_debugging { return None; } @@ -1359,7 +1384,7 @@ impl RMain { // https://github.com/posit-dev/positron/issues/3151 self.debug_preserve_focus = true; - if self.dap.is_debugging() { + if self.debug_is_debugging { // Try to interpret this pending input as a symbol (debug commands // are entered as symbols). Whether or not it parses as a symbol, // if we're currently debugging we must set `debug_preserve_focus`. @@ -1382,7 +1407,7 @@ impl RMain { self.debug_preserve_focus = false; // Let the DAP client know that execution is now continuing - self.dap.send_dap(DapBackendEvent::Continued); + self.debug_send_dap(DapBackendEvent::Continued); } // All debug commands are forwarded to the base REPL as @@ -1511,40 +1536,6 @@ impl RMain { return ConsoleResult::Error(message); } - fn start_debug(&mut self, debug_preserve_focus: bool) { - match self.dap.stack_info() { - Ok(stack) => { - // Figure out whether we changed location since last time, - // e.g. because the user evaluated an expression that hit - // another breakpoint. In that case we do want to move - // focus, even though the user didn't explicitly used a step - // gesture. Our indication that we changed location is - // whether the call stack looks the same as last time. This - // is not 100% reliable as this heuristic might have false - // negatives, e.g. if the control flow exited the current - // context via condition catching and jumped back in the - // debugged function. - let stack_id: Vec = stack.iter().map(|f| f.into()).collect(); - let same_stack = stack_id == self.debug_last_stack; - - // Initialize fallback sources for this stack - let fallback_sources = self.load_fallback_sources(&stack); - - self.debug_last_stack = stack_id; - self.dap - .start_debug(stack, same_stack && debug_preserve_focus, fallback_sources); - }, - Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), - }; - } - - fn stop_debug(&mut self) { - self.debug_last_stack = vec![]; - self.clear_fallback_sources(); - self.debug_session_index += 1; - self.dap.stop_debug(); - } - /// Load `fallback_sources` with this stack's text sources /// @returns Map of `source` -> `source_reference` used for frames that don't have /// associated files (i.e. no `srcref` attribute). The `source` is the key to @@ -1552,12 +1543,12 @@ impl RMain { /// in duplicate virtual editors being opened on the client side. pub fn load_fallback_sources( &mut self, - stack: &Vec, + stack: &Vec, ) -> HashMap { let mut sources = HashMap::new(); for frame in stack.iter() { - if let crate::dap::dap_r_main::FrameSource::Text(source) = &frame.source { + if let crate::repl_debug::FrameSource::Text(source) = &frame.source { let uri = Self::ark_debug_uri(self.debug_session_index, &frame.source_name, source); if self.has_virtual_document(&uri) { @@ -1588,31 +1579,6 @@ impl RMain { } } - fn ark_debug_uri(debug_session_index: u32, source_name: &str, source: &str) -> String { - // Hash the source to generate a unique identifier used in - // the URI. This is needed to disambiguate frames that have - // the same source name (used as file name in the URI) but - // different sources. - use std::collections::hash_map::DefaultHasher; - use std::hash::Hash; - use std::hash::Hasher; - let mut hasher = DefaultHasher::new(); - source.hash(&mut hasher); - let hash = format!("{:x}", hasher.finish()); - - ark_uri(&format!( - "debug/session{i}/{hash}/{source_name}.R", - i = debug_session_index, - )) - } - - // Doesn't expect `ark:` scheme, used for checking keys in our vdoc map - fn is_ark_debug_path(uri: &str) -> bool { - static RE_ARK_DEBUG_URI: std::sync::OnceLock = std::sync::OnceLock::new(); - let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); - re.is_match(uri) - } - fn renv_autoloader_reply() -> Option { let is_autoloader_running = harp::get_option("renv.autoloader.running") .try_into() @@ -2050,7 +2016,7 @@ impl RMain { // To capture the current `debug: ` output, for use in the debugger's // match based fallback - r_main.dap.handle_write_console(&content); + r_main.debug_handle_write_console(&content); let stream = if otype == 0 { Stream::Stdout diff --git a/crates/ark/src/lib.rs b/crates/ark/src/lib.rs index 0f88a0d5c..6b3d88790 100644 --- a/crates/ark/src/lib.rs +++ b/crates/ark/src/lib.rs @@ -29,6 +29,7 @@ pub mod modules_utils; pub mod plots; pub mod r_task; pub mod repos; +pub mod repl_debug; pub mod request; pub mod reticulate; pub mod shell; diff --git a/crates/ark/src/repl_debug.rs b/crates/ark/src/repl_debug.rs new file mode 100644 index 000000000..da5f8b46f --- /dev/null +++ b/crates/ark/src/repl_debug.rs @@ -0,0 +1,360 @@ +// +// repl_debug.rs +// +// Copyright (C) 2025 Posit Software, PBC. All rights reserved. +// + +use anyhow::anyhow; +use anyhow::Result; +use harp::exec::RFunction; +use harp::exec::RFunctionExt; +use harp::object::RObject; +use harp::protect::RProtect; +use harp::r_string; +use harp::session::r_sys_calls; +use harp::session::r_sys_frames; +use harp::session::r_sys_functions; +use harp::utils::r_is_null; +use regex::Regex; +use stdext::result::ResultExt; + +use crate::dap::dap::DapBackendEvent; +use crate::interface::DebugCallText; +use crate::interface::RMain; +use crate::modules::ARK_ENVS; +use crate::srcref::ark_uri; +use crate::thread::RThreadSafe; + +#[derive(Debug)] +pub struct FrameInfo { + pub id: i64, + /// The name shown in the editor tab bar when this frame is viewed. + pub source_name: String, + /// The name shown in the stack frame UI when this frame is visible. + pub frame_name: String, + pub source: FrameSource, + pub environment: Option>, + pub start_line: i64, + pub start_column: i64, + pub end_line: i64, + pub end_column: i64, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum FrameSource { + File(String), + Text(String), +} + +/// Version of `FrameInfo` that identifies the frame by value and doesn't keep a +/// reference to the environment. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct FrameInfoId { + pub source: FrameSource, + pub start_line: i64, + pub start_column: i64, + pub end_line: i64, + pub end_column: i64, +} + +impl From<&FrameInfo> for FrameInfoId { + fn from(info: &FrameInfo) -> Self { + FrameInfoId { + source: info.source.clone(), + start_line: info.start_line, + start_column: info.start_column, + end_line: info.end_line, + end_column: info.end_column, + } + } +} + +impl RMain { + pub(crate) fn debug_start(&mut self, debug_preserve_focus: bool) { + match self.debug_stack_info() { + Ok(stack) => { + // Figure out whether we changed location since last time, + // e.g. because the user evaluated an expression that hit + // another breakpoint. In that case we do want to move + // focus, even though the user didn't explicitly used a step + // gesture. Our indication that we changed location is + // whether the call stack looks the same as last time. This + // is not 100% reliable as this heuristic might have false + // negatives, e.g. if the control flow exited the current + // context via condition catching and jumped back in the + // debugged function. + let stack_id: Vec = stack.iter().map(|f| f.into()).collect(); + let same_stack = stack_id == self.debug_last_stack; + + // Initialize fallback sources for this stack + let fallback_sources = self.load_fallback_sources(&stack); + + self.debug_last_stack = stack_id; + + let preserve_focus = same_stack && debug_preserve_focus; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.start_debug(stack, preserve_focus, fallback_sources) + }, + Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), + }; + } + + pub(crate) fn debug_stop(&mut self) { + self.debug_last_stack = vec![]; + self.clear_fallback_sources(); + self.debug_reset_frame_id(); + self.debug_session_index += 1; + + let mut dap = self.debug_dap.lock().unwrap(); + dap.stop_debug(); + } + + pub(crate) fn debug_send_dap(&self, event: DapBackendEvent) { + let dap = self.debug_dap.lock().unwrap(); + if let Some(tx) = &dap.backend_events_tx { + tx.send(event).log_err(); + } + } + + pub(crate) fn debug_handle_read_console(&mut self) { + // Upon entering read-console, finalize any debug call text that we were capturing. + // At this point, the user can either advance the debugger, causing us to capture + // a new expression, or execute arbitrary code, where we will reuse a finalized + // debug call text to maintain the debug state. + match &self.debug_call_text { + // If not debugging, nothing to do. + DebugCallText::None => (), + // If already finalized, keep what we have. + DebugCallText::Finalized(_) => (), + // If capturing, transition to finalized. + DebugCallText::Capturing(call_text) => { + self.debug_call_text = DebugCallText::Finalized(call_text.clone()) + }, + } + } + + pub(crate) fn debug_handle_write_console(&mut self, content: &str) { + if let DebugCallText::Capturing(ref mut call_text) = self.debug_call_text { + // Append to current expression if we are currently capturing stdout + call_text.push_str(content); + return; + } + + // `debug: ` is emitted by R (if no srcrefs are available!) right before it emits + // the current expression we are debugging, so we use that as a signal to begin + // capturing. + if content == "debug: " { + self.debug_call_text = DebugCallText::Capturing(String::new()); + return; + } + + // Entering or exiting a closure, reset the debug start line state and call text + if content == "debugging in: " || content == "exiting from: " { + self.debug_last_line = None; + self.debug_call_text = DebugCallText::None; + return; + } + } + + pub(crate) fn debug_stack_info(&mut self) -> Result> { + // We leave finalized `call_text` in place rather than setting it to `None` here + // in case the user executes an arbitrary expression in the debug R console, which + // loops us back here without updating the `call_text` in any way, allowing us to + // recreate the debugger state after their code execution. + let call_text = match self.debug_call_text.clone() { + DebugCallText::None => None, + DebugCallText::Capturing(call_text) => { + log::error!( + "Call text is in `Capturing` state, but should be `Finalized`: '{call_text}'." + ); + None + }, + DebugCallText::Finalized(call_text) => Some(call_text), + }; + + let last_start_line = self.debug_last_line; + + let frames = self.debug_r_stack_info(call_text, last_start_line)?; + + // If we have `frames`, update the `last_start_line` with the context + // frame's start line + if let Some(frame) = frames.get(0) { + self.debug_last_line = Some(frame.start_line); + } + + Ok(frames) + } + + pub(crate) fn debug_r_stack_info( + &mut self, + context_call_text: Option, + context_last_start_line: Option, + ) -> Result> { + unsafe { + let mut protect = RProtect::new(); + + let context_srcref = libr::get(libr::R_Srcref); + protect.add(context_srcref); + + let context_call_text = match context_call_text { + Some(context_call_text) => r_string!(context_call_text, &mut protect), + None => libr::R_NilValue, + }; + + let context_last_start_line = match context_last_start_line { + Some(context_last_start_line) => { + let x = libr::Rf_allocVector(libr::INTSXP, 1); + protect.add(x); + libr::SET_INTEGER_ELT(x, 0, i32::try_from(context_last_start_line)?); + x + }, + None => libr::R_NilValue, + }; + + let functions = r_sys_functions()?; + protect.add(functions); + + let environments = r_sys_frames()?; + protect.add(environments.sexp); + + let calls = r_sys_calls()?; + protect.add(calls.sexp); + + let info = RFunction::new("", "debugger_stack_info") + .add(context_call_text) + .add(context_last_start_line) + .add(context_srcref) + .add(functions) + .add(environments) + .add(calls) + .call_in(ARK_ENVS.positron_ns)?; + + let n: isize = libr::Rf_xlength(info.sexp); + + let mut out = Vec::with_capacity(n as usize); + + // Reverse the order for DAP + for i in (0..n).rev() { + let frame = libr::VECTOR_ELT(info.sexp, i); + let id = self.debug_next_frame_id(); + out.push(as_frame_info(frame, id)?); + } + + Ok(out) + } + } + + fn debug_next_frame_id(&mut self) -> i64 { + let out = self.debug_current_frame_id; + self.debug_current_frame_id += 1; + out + } + + pub(crate) fn debug_reset_frame_id(&mut self) { + self.debug_current_frame_id = 0; + } + + pub(crate) fn ark_debug_uri( + debug_session_index: u32, + source_name: &str, + source: &str, + ) -> String { + // Hash the source to generate a unique identifier used in + // the URI. This is needed to disambiguate frames that have + // the same source name (used as file name in the URI) but + // different sources. + use std::collections::hash_map::DefaultHasher; + use std::hash::Hash; + use std::hash::Hasher; + let mut hasher = DefaultHasher::new(); + source.hash(&mut hasher); + let hash = format!("{:x}", hasher.finish()); + + ark_uri(&format!( + "debug/session{i}/{hash}/{source_name}.R", + i = debug_session_index, + )) + } + + // Doesn't expect `ark:` scheme, used for checking keys in our vdoc map + pub(crate) fn is_ark_debug_path(uri: &str) -> bool { + static RE_ARK_DEBUG_URI: std::sync::OnceLock = std::sync::OnceLock::new(); + let re = RE_ARK_DEBUG_URI.get_or_init(|| Regex::new(r"^ark-\d+/debug/").unwrap()); + re.is_match(uri) + } +} + +fn as_frame_info(info: libr::SEXP, id: i64) -> Result { + unsafe { + let mut i = 0; + + let source_name = libr::VECTOR_ELT(info, i); + let source_name: String = RObject::view(source_name).try_into()?; + + i += 1; + let frame_name = libr::VECTOR_ELT(info, i); + let frame_name: String = RObject::view(frame_name).try_into()?; + + let mut source = None; + + i += 1; + let file = libr::VECTOR_ELT(info, i); + if file != libr::R_NilValue { + let file: String = RObject::view(file).try_into()?; + source = Some(FrameSource::File(file)); + } + + i += 1; + let text = libr::VECTOR_ELT(info, i); + if text != libr::R_NilValue { + let text: String = RObject::view(text).try_into()?; + source = Some(FrameSource::Text(text)); + } + + let Some(source) = source else { + return Err(anyhow!( + "Expected either `file` or `text` to be non-`NULL`." + )); + }; + + i += 1; + let environment = libr::VECTOR_ELT(info, i); + let environment = if r_is_null(environment) { + None + } else { + Some(RThreadSafe::new(RObject::from(environment))) + }; + + i += 1; + let start_line = libr::VECTOR_ELT(info, i); + let start_line: i32 = RObject::view(start_line).try_into()?; + + i += 1; + let start_column = libr::VECTOR_ELT(info, i); + let start_column: i32 = RObject::view(start_column).try_into()?; + + i += 1; + let end_line = libr::VECTOR_ELT(info, i); + let end_line: i32 = RObject::view(end_line).try_into()?; + + // For `end_column`, the column range provided by R is inclusive `[,]`, but the + // one used on the DAP / Positron side is exclusive `[,)` so we have to add 1. + i += 1; + let end_column = libr::VECTOR_ELT(info, i); + let end_column: i32 = RObject::view(end_column).try_into()?; + let end_column = end_column + 1; + + Ok(FrameInfo { + id, + source_name, + frame_name, + source, + environment, + start_line: start_line.try_into()?, + start_column: start_column.try_into()?, + end_line: end_line.try_into()?, + end_column: end_column.try_into()?, + }) + } +} From 44fa3558121fe78efd648bb804b049f6b5edb743 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 15:38:49 +0100 Subject: [PATCH 56/63] Rename to `read_console_nested_return_next_input` --- crates/ark/src/interface.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 0ffc280a7..cd52f7d93 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -289,6 +289,11 @@ pub struct RMain { /// Incremented when entering `r_read_console(),` decremented on exit. read_console_depth: Cell, + /// Set to true when `r_read_console()` exits via an error longjump. Used to + /// detect if we need to go return from `r_read_console()` with a dummy + /// evaluation to reset things like `R_EvalDepth`. + read_console_threw_error: Cell, + /// Set to true when `r_read_console()` exits. Reset to false at the start /// of each `r_read_console()` call. Used to detect if `eval()` returned /// from a nested REPL (the flag will be true when the evaluation returns). @@ -296,14 +301,9 @@ pub struct RMain { /// evaluation to reset things like `R_ConsoleIob`. read_console_nested_return: Cell, - /// Set to true when `r_read_console()` exits via an error longjump. Used to - /// detect if we need to go return from `r_read_console()` with a dummy - /// evaluation to reset things like `R_EvalDepth`. - read_console_threw_error: Cell, - /// Used to track an input to evaluate upon returning to `r_read_console()`, /// after having returned a dummy input to reset `R_ConsoleIob` in R's REPL. - read_console_next_input: Cell>, + read_console_nested_return_next_input: Cell>, /// We've received a Shutdown signal and need to return EOF from all nested /// consoles to get R to shut down @@ -787,7 +787,7 @@ impl RMain { read_console_depth: Cell::new(0), read_console_nested_return: Cell::new(false), read_console_threw_error: Cell::new(false), - read_console_next_input: Cell::new(None), + read_console_nested_return_next_input: Cell::new(None), read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), read_console_shutdown: Cell::new(false), } @@ -2417,7 +2417,7 @@ pub extern "C-unwind" fn r_read_console( // We've finished evaluating a dummy value to reset state in R's REPL, // and are now ready to evaluate the actual input - if let Some(next_input) = main.read_console_next_input.take() { + if let Some(next_input) = main.read_console_nested_return_next_input.take() { RMain::on_console_input(buf, buflen, next_input).unwrap(); return 1; } @@ -2525,7 +2525,8 @@ fn r_read_console_impl( // a dummy value causing a `PARSE_NULL` event. if main.read_console_nested_return.get() { let next_input = RMain::console_input(buf, buflen); - main.read_console_next_input.set(Some(next_input)); + main.read_console_nested_return_next_input + .set(Some(next_input)); // Evaluating a space causes a `PARSE_NULL` event. Don't // evaluate a newline, that would cause a parent debug REPL From 88e49558296f85a83428b45ae640fb5cbb8d50b0 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 15:43:04 +0100 Subject: [PATCH 57/63] Use `self.is_empty()` --- crates/ark/src/interface.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index cd52f7d93..d1f43279a 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -388,7 +388,7 @@ impl PendingInputs { } pub(crate) fn pop(&mut self) -> Option { - if self.index >= self.len { + if self.is_empty() { return None; } From f95a17e9ee0439bc86388a549cbcfd1233752251 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 16:21:57 +0100 Subject: [PATCH 58/63] Rework srcref getters --- crates/ark/src/interface.rs | 17 +++++++++++------ crates/harp/src/parser/srcref.rs | 11 +++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index d1f43279a..9e2023d3c 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -74,8 +74,8 @@ use harp::object::RObject; use harp::r_symbol; use harp::routines::r_register_routines; use harp::session::r_traceback; -use harp::srcref::get_block_srcrefs; -use harp::srcref::get_srcref; +use harp::srcref::get_srcref_list; +use harp::srcref::srcref_list_get; use harp::srcref::SrcFile; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; @@ -317,8 +317,8 @@ pub struct RMain { struct PendingInputs { /// EXPRSXP vector of parsed expressions exprs: RObject, - /// List of srcrefs, the same length as `exprs` - srcrefs: RObject, + /// List of srcrefs if any, the same length as `exprs` + srcrefs: Option, /// Length of `exprs` and `srcrefs` len: isize, /// Index into the stack @@ -366,7 +366,7 @@ impl PendingInputs { }, }; - let srcrefs = get_block_srcrefs(exprs.sexp); + let srcrefs = get_srcref_list(exprs.sexp); let len = exprs.length(); let index = 0; @@ -392,9 +392,14 @@ impl PendingInputs { return None; } - let srcref = get_srcref(self.srcrefs.sexp, self.index); let expr = harp::r_list_get(self.exprs.sexp, self.index); + let srcref = self + .srcrefs + .as_ref() + .map(|xs| srcref_list_get(xs.sexp, self.index)) + .unwrap_or(RObject::null()); + self.index += 1; Some(PendingInput { expr, srcref }) } diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index 1a020d3b9..d7b76782d 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -169,7 +169,7 @@ impl From<&harp::CharacterVector> for SrcFile { } } -pub fn get_srcref(srcrefs: libr::SEXP, ind: isize) -> RObject { +pub fn srcref_list_get(srcrefs: libr::SEXP, ind: isize) -> RObject { if crate::r_is_null(srcrefs) { return RObject::null(); } @@ -195,14 +195,17 @@ pub fn get_srcref(srcrefs: libr::SEXP, ind: isize) -> RObject { RObject::new(result) } -pub fn get_block_srcrefs(call: libr::SEXP) -> RObject { +// Some objects, such as calls to `{` and expression vectors returned by +// `parse()`, have a list of `srcref` objects attached as `srcref` attribute. +// This helper retrieves them if they exist. +pub fn get_srcref_list(call: libr::SEXP) -> Option { let srcrefs = unsafe { libr::Rf_getAttrib(call, libr::R_SrcrefSymbol) }; if unsafe { libr::TYPEOF(srcrefs) as u32 } == libr::VECSXP { - return RObject::new(srcrefs); + return Some(RObject::new(srcrefs)); } - RObject::null() + None } #[cfg(test)] From c021bd44bda554ad05b36638469507168e841e7b Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 16:31:09 +0100 Subject: [PATCH 59/63] Use existing list getter --- crates/ark/src/interface.rs | 2 +- crates/harp/src/object.rs | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 9e2023d3c..c8834c08b 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -392,7 +392,7 @@ impl PendingInputs { return None; } - let expr = harp::r_list_get(self.exprs.sexp, self.index); + let expr = RObject::new(harp::list_get(self.exprs.sexp, self.index)); let srcref = self .srcrefs diff --git a/crates/harp/src/object.rs b/crates/harp/src/object.rs index 8d4f3eb8e..3daa1e0f1 100644 --- a/crates/harp/src/object.rs +++ b/crates/harp/src/object.rs @@ -238,10 +238,6 @@ pub fn r_list_poke(x: SEXP, i: R_xlen_t, value: SEXP) { } } -pub fn r_list_get(x: SEXP, i: R_xlen_t) -> RObject { - unsafe { RObject::new(VECTOR_ELT(x, i)) } -} - pub fn r_lgl_begin(x: SEXP) -> *mut i32 { unsafe { LOGICAL(x) } } From ef63f57ab0bd9acdac1ddbb8fdf7885b2484a3b0 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 16:49:49 +0100 Subject: [PATCH 60/63] Fix timing of error buffer peeking --- crates/ark/src/interface.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index c8834c08b..32f9d9cd2 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1211,14 +1211,16 @@ impl RMain { // push a tree too far to the right. let traceback = r_traceback(); - // Reset error buffer so we don't display this message again - let _ = RFunction::new("base", "stop").call(); - - Exception { + let exception = Exception { ename: String::from(""), evalue: r_peek_error_buffer(), traceback, - } + }; + + // Reset error buffer so we don't display this message again + let _ = RFunction::new("base", "stop").call(); + + exception } else { return None; }; From ae0b42842c9581e5012180e0a280095ddd794de5 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 16:51:32 +0100 Subject: [PATCH 61/63] Tweak control flow --- crates/ark/src/interface.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 32f9d9cd2..6a5e6f20d 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -1343,15 +1343,15 @@ impl RMain { // Evaluate first expression if there is one if let Some(input) = self.pop_pending() { - return Some(self.handle_pending_input(input, buf, buflen)); - } - - // Otherwise we got an empty input, e.g. `""` and there's - // nothing to do. Close active request. - self.handle_active_request(info, ConsoleValue::Success(Default::default())); + Some(self.handle_pending_input(input, buf, buflen)) + } else { + // Otherwise we got an empty input, e.g. `""` and there's + // nothing to do. Close active request. + self.handle_active_request(info, ConsoleValue::Success(Default::default())); - // And return to event loop - None + // And return to event loop + None + } }, ConsoleInput::EOF => Some(ConsoleResult::Disconnected), From 7f6fa3a3751c691d7ee1ed5473aea0346c8cbcf1 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 15:40:57 +0100 Subject: [PATCH 62/63] Tweak more comments --- crates/ark/src/interface.rs | 35 ++++++++++++++-------------- crates/ark/src/main.rs | 4 ---- crates/ark/src/sys/unix/interface.rs | 13 +++++++---- crates/ark/tests/kernel.rs | 7 ++---- crates/harp/src/parse.rs | 4 ++-- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 6a5e6f20d..f5154b491 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -309,7 +309,8 @@ pub struct RMain { /// consoles to get R to shut down read_console_shutdown: Cell, - /// Current topmost environment on the stack while waiting for input in ReadConsole + /// Current topmost environment on the stack while waiting for input in ReadConsole. + /// This is a RefCell since we require `get()` for this field and `RObject` isn't `Copy`. pub(crate) read_console_frame: RefCell, } @@ -344,7 +345,6 @@ impl PendingInputs { let status = match harp::parse_status(&input) { Err(err) => { // Failed to even attempt to parse the input, something is seriously wrong - // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. return Ok(ParseResult::SyntaxError(format!("{err}"))); }, Ok(status) => status, @@ -353,7 +353,7 @@ impl PendingInputs { // - Incomplete inputs put R into a state where it expects more input that will never come, so we // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. // - Complete statements are obviously fine. - // - Syntax errors will cause R to throw an error, which is expected. + // - Syntax errors will get bubbled up as R errors via an `ConsoleResult::Error`. let exprs = match status { harp::ParseResult::Complete(exprs) => exprs, harp::ParseResult::Incomplete => { @@ -793,6 +793,7 @@ impl RMain { read_console_nested_return: Cell::new(false), read_console_threw_error: Cell::new(false), read_console_nested_return_next_input: Cell::new(None), + // Can't use `R_ENVS.global` here as it isn't initialised yet read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), read_console_shutdown: Cell::new(false), } @@ -956,16 +957,17 @@ impl RMain { // Clear any pending inputs, if any self.pending_inputs = None; - // Reply to active request with error + // Reply to active request with error, then fall through to event loop self.handle_active_request(&info, ConsoleValue::Error(exception)); } else if matches!(info.kind, PromptKind::InputRequest) { - // Request input reply to the frontend and return it to R + // Request input from the frontend and return it to R return self.handle_input_request(&info, buf, buflen); } else if let Some(input) = self.pop_pending() { // Evaluate pending expression if there is any remaining return self.handle_pending_input(input, buf, buflen); } else { - // Otherwise reply to active request with accumulated result + // Otherwise reply to active request with accumulated result, then + // fall through to event loop let result = self.take_result(); self.handle_active_request(&info, ConsoleValue::Success(result)); } @@ -1029,12 +1031,12 @@ impl RMain { let tasks_interrupt_index = select.recv(&tasks_interrupt_rx); let polled_events_index = select.recv(&polled_events_rx); - // Don't process idle tasks unless at top level. We currently don't want - // idle tasks (e.g. for srcref generation) to run when the call stack is - // not empty. We could make this configurable though if needed, i.e. some - // idle tasks would be able to run in the browser. Those should be sent - // to a dedicated channel that would always be included in the set of - // recv channels. + // Only process idle at top level. We currently don't want idle tasks + // (e.g. for srcref generation) to run when the call stack is not empty. + // We could make this configurable though if needed, i.e. some idle + // tasks would be able to run in the browser. Those should be sent to a + // dedicated channel that would always be included in the set of recv + // channels. let tasks_idle_index = if matches!(info.kind, PromptKind::TopLevel) { Some(select.recv(&tasks_idle_rx)) } else { @@ -1175,8 +1177,7 @@ impl RMain { if autoprint.ends_with('\n') { // Remove the trailing newlines that R adds to outputs but that - // Jupyter frontends are not expecting. Is it worth taking a - // mutable self ref across calling methods to avoid the clone? + // Jupyter frontends are not expecting autoprint.pop(); } if autoprint.len() != 0 { @@ -1249,9 +1250,6 @@ impl RMain { Some(exception) } - /// Returns: - /// - `None` if we should fall through to the event loop to wait for more user input - /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` fn handle_active_request(&mut self, info: &PromptInfo, value: ConsoleValue) { // If we get here we finished evaluating all pending inputs. Check if we // have an active request from a previous `read_console()` iteration. If @@ -2423,7 +2421,8 @@ pub extern "C-unwind" fn r_read_console( } // We've finished evaluating a dummy value to reset state in R's REPL, - // and are now ready to evaluate the actual input + // and are now ready to evaluate the actual input, which is typically + // just `.ark_last_value`. if let Some(next_input) = main.read_console_nested_return_next_input.take() { RMain::on_console_input(buf, buflen, next_input).unwrap(); return 1; diff --git a/crates/ark/src/main.rs b/crates/ark/src/main.rs index 746e43058..70cfe5e78 100644 --- a/crates/ark/src/main.rs +++ b/crates/ark/src/main.rs @@ -83,10 +83,6 @@ fn main() -> anyhow::Result<()> { let mut capture_streams = true; let mut default_repos = DefaultRepos::Auto; - // We don't support the asking the user whether to save the workspace data - // on exit because calling readline during shutdown puts in a precarious - // position. So effectively we're implementing "no-save" by default. - // Process remaining arguments. TODO: Need an argument that can passthrough args to R while let Some(arg) = argv.next() { match arg.as_str() { "--connection_file" => { diff --git a/crates/ark/src/sys/unix/interface.rs b/crates/ark/src/sys/unix/interface.rs index 0748ea367..a98c1b6d6 100644 --- a/crates/ark/src/sys/unix/interface.rs +++ b/crates/ark/src/sys/unix/interface.rs @@ -73,12 +73,15 @@ pub fn setup_r(args: &Vec) { libr::set(ptr_R_Busy, Some(r_busy)); libr::set(ptr_R_Suicide, Some(r_suicide)); + // Install a CleanUp hook for integration tests that test the shutdown process. + // We confirm that shutdown occurs by waiting in the test until `CLEANUP_SIGNAL`'s + // condition variable sends a notification, which occurs in this cleanup method + // that is called during R's shutdown process. if stdext::IS_TESTING { - use libr::ptr_R_CleanUp; - - use crate::interface::r_cleanup_for_tests; - - libr::set(ptr_R_CleanUp, Some(r_cleanup_for_tests)); + libr::set( + libr::ptr_R_CleanUp, + Some(crate::interface::r_cleanup_for_tests), + ); } // In tests R may be run from various threads. This confuses R's stack diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 45b638605..979b17667 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -5,9 +5,6 @@ use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; use stdext::assert_match; -// Avoids our global calling handler from rlangifying errors. -// This causes some test instability across configs. - #[test] fn test_kernel_info() { let frontend = DummyArkFrontend::lock(); @@ -424,8 +421,8 @@ fn test_execute_request_error() { #[test] fn test_execute_request_error_with_accumulated_output() { - // Test that when the very last input output and then throws an error, - // the accumulated output is flushed before the error is reported. + // Test that when the very last input throws an error after producing + // output, the accumulated output is flushed before the error is reported. // This tests the autoprint buffer flush logic in error handling. let frontend = DummyArkFrontend::lock(); diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index d298198f2..43a141afc 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -220,7 +220,7 @@ mod tests { Ok(ParseResult::Incomplete) ); - // Error + // Syntax error (error longjump thrown by parser) assert_match!( parse_status(&ParseInput::Text("42 + _")), Ok(ParseResult::SyntaxError { message }) => { @@ -228,7 +228,7 @@ mod tests { } ); - // "normal" syntax error + // Syntax error (error code returned by parser) assert_match!( parse_status(&ParseInput::Text("1+1\n*42")), Ok(ParseResult::SyntaxError { message }) => { From fbd76e2e94dae02a46d8e3d7aa315a999d7d5b2a Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 27 Nov 2025 16:59:26 +0100 Subject: [PATCH 63/63] Move `r_cleanup_for_tests()` to Unix file --- crates/ark/src/fixtures/dummy_frontend.rs | 4 +++- crates/ark/src/interface.rs | 22 ------------------ crates/ark/src/sys/unix/interface.rs | 28 +++++++++++++++++++---- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/crates/ark/src/fixtures/dummy_frontend.rs b/crates/ark/src/fixtures/dummy_frontend.rs index 74a951b7d..3a12f8fc5 100644 --- a/crates/ark/src/fixtures/dummy_frontend.rs +++ b/crates/ark/src/fixtures/dummy_frontend.rs @@ -10,7 +10,6 @@ use amalthea::fixtures::dummy_frontend::DummyConnection; use amalthea::fixtures::dummy_frontend::DummyFrontend; use crate::interface::SessionMode; -use crate::interface::CLEANUP_SIGNAL; use crate::repos::DefaultRepos; // There can be only one frontend per process. Needs to be in a mutex because @@ -66,8 +65,11 @@ impl DummyArkFrontend { /// Wait for R cleanup to start (indicating shutdown has been initiated). /// Panics if cleanup doesn't start within the timeout. + #[cfg(unix)] #[track_caller] pub fn wait_for_cleanup() { + use crate::sys::interface::CLEANUP_SIGNAL; + let (lock, cvar) = &CLEANUP_SIGNAL; let result = cvar .wait_timeout_while(lock.lock().unwrap(), Duration::from_secs(3), |started| { diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index f5154b491..b72894eae 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -2616,28 +2616,6 @@ pub unsafe extern "C-unwind" fn r_polled_events() { }; } -// For integration tests -use std::sync::Condvar; -pub static CLEANUP_SIGNAL: (Mutex, Condvar) = (Mutex::new(false), Condvar::new()); - -#[no_mangle] -pub extern "C-unwind" fn r_cleanup_for_tests(_save_act: i32, _status: i32, _run_last: i32) { - // Signal that cleanup has started - let (lock, cvar) = &CLEANUP_SIGNAL; - - let mut started = lock.lock().unwrap(); - *started = true; - - cvar.notify_all(); - drop(started); - - // Sleep to give tests time to complete before we panic - std::thread::sleep(std::time::Duration::from_secs(5)); - - // Fallthrough to R which will call `exit()`. Note that panicking from here - // would be UB, we can't panic over a C stack. -} - // This hook is called like a user onLoad hook but for every package to be // loaded in the session #[harp::register] diff --git a/crates/ark/src/sys/unix/interface.rs b/crates/ark/src/sys/unix/interface.rs index a98c1b6d6..30f146320 100644 --- a/crates/ark/src/sys/unix/interface.rs +++ b/crates/ark/src/sys/unix/interface.rs @@ -7,6 +7,8 @@ use std::ffi::CStr; use std::os::raw::c_char; +use std::sync::Condvar; +use std::sync::Mutex; use libr::ptr_R_Busy; use libr::ptr_R_ReadConsole; @@ -38,6 +40,9 @@ use crate::interface::r_write_console; use crate::interface::RMain; use crate::signals::initialize_signal_handlers; +// For shutdown signal in integration tests +pub static CLEANUP_SIGNAL: (Mutex, Condvar) = (Mutex::new(false), Condvar::new()); + pub fn setup_r(args: &Vec) { unsafe { // Before `Rf_initialize_R()` @@ -78,10 +83,7 @@ pub fn setup_r(args: &Vec) { // condition variable sends a notification, which occurs in this cleanup method // that is called during R's shutdown process. if stdext::IS_TESTING { - libr::set( - libr::ptr_R_CleanUp, - Some(crate::interface::r_cleanup_for_tests), - ); + libr::set(libr::ptr_R_CleanUp, Some(r_cleanup_for_tests)); } // In tests R may be run from various threads. This confuses R's stack @@ -133,3 +135,21 @@ pub fn run_activity_handlers() { } } } + +#[no_mangle] +pub extern "C-unwind" fn r_cleanup_for_tests(_save_act: i32, _status: i32, _run_last: i32) { + // Signal that cleanup has started + let (lock, cvar) = &CLEANUP_SIGNAL; + + let mut started = lock.lock().unwrap(); + *started = true; + + cvar.notify_all(); + drop(started); + + // Sleep to give tests time to complete before we panic + std::thread::sleep(std::time::Duration::from_secs(5)); + + // Fallthrough to R which will call `exit()`. Note that panicking from here + // would be UB, we can't panic over a C stack. +}