From 615c24a77d514f818a9fe1b62bebb8f46ac3d3c6 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Mon, 3 Nov 2025 13:15:36 +0100 Subject: [PATCH 1/5] Only use internal runtime in `VssStore` We previously attempted to drop the internal runtime from `VssStore`, resulting into blocking behavior. While we recently made changes that improved our situation (having VSS CI pass again pretty reliably), we just ran into yet another case where the VSS CI hung (cf. https://github.com/lightningdevkit/vss-server/actions/runs/19023212819/job/54322173817?pr=59). Here we attempt to restore even more of the original pre- ab3d78d1ecd05a755c836915284e5ca60c65692a / #623 behavior to get rid of the reappearing blocking behavior, i.e., only use the internal runtime in `VssStore`. --- src/builder.rs | 3 +- src/io/vss_store.rs | 77 +++++++++++++++++++++------------------------ 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index c0e39af7a..59f5b9b46 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -731,8 +731,7 @@ impl NodeBuilder { let vss_seed_bytes: [u8; 32] = vss_xprv.private_key.secret_bytes(); - let vss_store = - VssStore::new(vss_url, store_id, vss_seed_bytes, header_provider, Arc::clone(&runtime)); + let vss_store = VssStore::new(vss_url, store_id, vss_seed_bytes, header_provider); build_with_store_internal( config, self.chain_data_source_config.as_ref(), diff --git a/src/io/vss_store.rs b/src/io/vss_store.rs index 028eb87e4..91abfc557 100644 --- a/src/io/vss_store.rs +++ b/src/io/vss_store.rs @@ -35,7 +35,6 @@ use vss_client::util::retry::{ use vss_client::util::storable_builder::{EntropySource, StorableBuilder}; use crate::io::utils::check_namespace_key_validity; -use crate::runtime::Runtime; type CustomRetryPolicy = FilteredRetryPolicy< JitteredRetryPolicy< @@ -55,7 +54,6 @@ pub struct VssStore { // Version counter to ensure that writes are applied in the correct order. It is assumed that read and list // operations aren't sensitive to the order of execution. next_version: AtomicU64, - runtime: Arc, // A VSS-internal runtime we use to avoid any deadlocks we could hit when waiting on a spawned // blocking task to finish while the blocked thread had acquired the reactor. In particular, // this works around a previously-hit case where a concurrent call to @@ -68,7 +66,7 @@ pub struct VssStore { impl VssStore { pub(crate) fn new( base_url: String, store_id: String, vss_seed: [u8; 32], - header_provider: Arc, runtime: Arc, + header_provider: Arc, ) -> Self { let inner = Arc::new(VssStoreInner::new(base_url, store_id, vss_seed, header_provider)); let next_version = AtomicU64::new(1); @@ -86,7 +84,7 @@ impl VssStore { .unwrap(), ); - Self { inner, next_version, runtime, internal_runtime } + Self { inner, next_version, internal_runtime } } // Same logic as for the obfuscated keys below, but just for locking, using the plaintext keys @@ -133,13 +131,14 @@ impl KVStoreSync for VssStore { async move { inner.read_internal(primary_namespace, secondary_namespace, key).await }; // TODO: We could drop the timeout here once we ensured vss-client's Retry logic always // times out. - let spawned_fut = internal_runtime.spawn(async move { - tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { - let msg = "VssStore::read timed out"; - Error::new(ErrorKind::Other, msg) - }) - }); - self.runtime.block_on(spawned_fut).expect("We should always finish")? + tokio::task::block_in_place(move || { + internal_runtime.block_on(async move { + tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { + let msg = "VssStore::read timed out"; + Error::new(ErrorKind::Other, msg) + }) + })? + }) } fn write( @@ -171,13 +170,14 @@ impl KVStoreSync for VssStore { }; // TODO: We could drop the timeout here once we ensured vss-client's Retry logic always // times out. - let spawned_fut = internal_runtime.spawn(async move { - tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { - let msg = "VssStore::write timed out"; - Error::new(ErrorKind::Other, msg) - }) - }); - self.runtime.block_on(spawned_fut).expect("We should always finish")? + tokio::task::block_in_place(move || { + internal_runtime.block_on(async move { + tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { + let msg = "VssStore::write timed out"; + Error::new(ErrorKind::Other, msg) + }) + })? + }) } fn remove( @@ -208,13 +208,14 @@ impl KVStoreSync for VssStore { }; // TODO: We could drop the timeout here once we ensured vss-client's Retry logic always // times out. - let spawned_fut = internal_runtime.spawn(async move { - tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { - let msg = "VssStore::remove timed out"; - Error::new(ErrorKind::Other, msg) - }) - }); - self.runtime.block_on(spawned_fut).expect("We should always finish")? + tokio::task::block_in_place(move || { + internal_runtime.block_on(async move { + tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { + let msg = "VssStore::remove timed out"; + Error::new(ErrorKind::Other, msg) + }) + })? + }) } fn list(&self, primary_namespace: &str, secondary_namespace: &str) -> io::Result> { @@ -229,13 +230,14 @@ impl KVStoreSync for VssStore { let fut = async move { inner.list_internal(primary_namespace, secondary_namespace).await }; // TODO: We could drop the timeout here once we ensured vss-client's Retry logic always // times out. - let spawned_fut = internal_runtime.spawn(async move { - tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { - let msg = "VssStore::list timed out"; - Error::new(ErrorKind::Other, msg) - }) - }); - self.runtime.block_on(spawned_fut).expect("We should always finish")? + tokio::task::block_in_place(move || { + internal_runtime.block_on(async move { + tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { + let msg = "VssStore::list timed out"; + Error::new(ErrorKind::Other, msg) + }) + })? + }) } } @@ -610,7 +612,6 @@ mod tests { use super::*; use crate::io::test_utils::do_read_write_remove_list_persist; - use crate::logger::Logger; #[test] fn vss_read_write_remove_list_persist() { @@ -620,10 +621,7 @@ mod tests { let mut vss_seed = [0u8; 32]; rng.fill_bytes(&mut vss_seed); let header_provider = Arc::new(FixedHeaders::new(HashMap::new())); - let logger = Arc::new(Logger::new_log_facade()); - let runtime = Arc::new(Runtime::new(logger).unwrap()); - let vss_store = - VssStore::new(vss_base_url, rand_store_id, vss_seed, header_provider, runtime); + let vss_store = VssStore::new(vss_base_url, rand_store_id, vss_seed, header_provider); do_read_write_remove_list_persist(&vss_store); } @@ -636,10 +634,7 @@ mod tests { let mut vss_seed = [0u8; 32]; rng.fill_bytes(&mut vss_seed); let header_provider = Arc::new(FixedHeaders::new(HashMap::new())); - let logger = Arc::new(Logger::new_log_facade()); - let runtime = Arc::new(Runtime::new(logger).unwrap()); - let vss_store = - VssStore::new(vss_base_url, rand_store_id, vss_seed, header_provider, runtime); + let vss_store = VssStore::new(vss_base_url, rand_store_id, vss_seed, header_provider); do_read_write_remove_list_persist(&vss_store); drop(vss_store) From 05d270ada93d54f39e620d67e8f1d297c081c1fb Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Mon, 3 Nov 2025 14:44:23 +0100 Subject: [PATCH 2/5] DROPME: Run test 100 times --- .github/workflows/vss-integration.yml | 2 +- tests/integration_tests_vss.rs | 74 ++++++++++++++------------- 2 files changed, 39 insertions(+), 37 deletions(-) diff --git a/.github/workflows/vss-integration.yml b/.github/workflows/vss-integration.yml index 8473ed413..043f91d8f 100644 --- a/.github/workflows/vss-integration.yml +++ b/.github/workflows/vss-integration.yml @@ -45,4 +45,4 @@ jobs: cd ldk-node export TEST_VSS_BASE_URL="http://localhost:8080/vss" RUSTFLAGS="--cfg vss_test" cargo test io::vss_store - RUSTFLAGS="--cfg vss_test" cargo test --test integration_tests_vss + RUSTFLAGS="--cfg vss_test" cargo test --test integration_tests_vss -- --nocapture diff --git a/tests/integration_tests_vss.rs b/tests/integration_tests_vss.rs index 93f167dae..a9c397520 100644 --- a/tests/integration_tests_vss.rs +++ b/tests/integration_tests_vss.rs @@ -16,42 +16,44 @@ use ldk_node::Builder; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn channel_full_cycle_with_vss_store() { let (bitcoind, electrsd) = common::setup_bitcoind_and_electrsd(); - println!("== Node A =="); - let esplora_url = format!("http://{}", electrsd.esplora_url.as_ref().unwrap()); - let config_a = common::random_config(true); - let mut builder_a = Builder::from_config(config_a.node_config); - builder_a.set_chain_source_esplora(esplora_url.clone(), None); - let vss_base_url = std::env::var("TEST_VSS_BASE_URL").unwrap(); - let node_a = builder_a - .build_with_vss_store_and_fixed_headers( - vss_base_url.clone(), - "node_1_store".to_string(), - HashMap::new(), - ) - .unwrap(); - node_a.start().unwrap(); + for i in 1..100 { + println!("Run {}: == Node A ==", i); + let esplora_url = format!("http://{}", electrsd.esplora_url.as_ref().unwrap()); + let config_a = common::random_config(true); + let mut builder_a = Builder::from_config(config_a.node_config); + builder_a.set_chain_source_esplora(esplora_url.clone(), None); + let vss_base_url = std::env::var("TEST_VSS_BASE_URL").unwrap(); + let node_a = builder_a + .build_with_vss_store_and_fixed_headers( + vss_base_url.clone(), + format!("node_{}_1_store", i), + HashMap::new(), + ) + .unwrap(); + node_a.start().unwrap(); - println!("\n== Node B =="); - let config_b = common::random_config(true); - let mut builder_b = Builder::from_config(config_b.node_config); - builder_b.set_chain_source_esplora(esplora_url.clone(), None); - let node_b = builder_b - .build_with_vss_store_and_fixed_headers( - vss_base_url, - "node_2_store".to_string(), - HashMap::new(), - ) - .unwrap(); - node_b.start().unwrap(); + println!("\nRun {}: == Node B ==", i); + let config_b = common::random_config(true); + let mut builder_b = Builder::from_config(config_b.node_config); + builder_b.set_chain_source_esplora(esplora_url.clone(), None); + let node_b = builder_b + .build_with_vss_store_and_fixed_headers( + vss_base_url, + format!("node_{}_2_store", i), + HashMap::new(), + ) + .unwrap(); + node_b.start().unwrap(); - common::do_channel_full_cycle( - node_a, - node_b, - &bitcoind.client, - &electrsd.client, - false, - true, - false, - ) - .await; + common::do_channel_full_cycle( + node_a, + node_b, + &bitcoind.client, + &electrsd.client, + false, + true, + false, + ) + .await; + } } From 511b24e18ff10ae96395865fa2ebb680fe8fae2a Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Tue, 4 Nov 2025 09:30:55 +0100 Subject: [PATCH 3/5] Bump VSS retrying params --- src/io/vss_store.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/io/vss_store.rs b/src/io/vss_store.rs index 91abfc557..5dbe5e60e 100644 --- a/src/io/vss_store.rs +++ b/src/io/vss_store.rs @@ -29,24 +29,22 @@ use vss_client::types::{ }; use vss_client::util::key_obfuscator::KeyObfuscator; use vss_client::util::retry::{ - ExponentialBackoffRetryPolicy, FilteredRetryPolicy, JitteredRetryPolicy, - MaxAttemptsRetryPolicy, MaxTotalDelayRetryPolicy, RetryPolicy, + ExponentialBackoffRetryPolicy, FilteredRetryPolicy, MaxAttemptsRetryPolicy, + MaxTotalDelayRetryPolicy, RetryPolicy, }; use vss_client::util::storable_builder::{EntropySource, StorableBuilder}; use crate::io::utils::check_namespace_key_validity; type CustomRetryPolicy = FilteredRetryPolicy< - JitteredRetryPolicy< - MaxTotalDelayRetryPolicy>>, - >, + MaxTotalDelayRetryPolicy>>, Box bool + 'static + Send + Sync>, >; // We set this to a small number of threads that would still allow to make some progress if one // would hit a blocking case const INTERNAL_RUNTIME_WORKERS: usize = 2; -const VSS_IO_TIMEOUT: Duration = Duration::from_secs(5); +const VSS_IO_TIMEOUT: Duration = Duration::from_secs(100); /// A [`KVStoreSync`] implementation that writes to and reads from a [VSS](https://github.com/lightningdevkit/vss-server/blob/main/README.md) backend. pub struct VssStore { @@ -335,9 +333,8 @@ impl VssStoreInner { let key_obfuscator = KeyObfuscator::new(obfuscation_master_key); let storable_builder = StorableBuilder::new(data_encryption_key, RandEntropySource); let retry_policy = ExponentialBackoffRetryPolicy::new(Duration::from_millis(10)) - .with_max_attempts(10) - .with_max_total_delay(Duration::from_secs(15)) - .with_max_jitter(Duration::from_millis(10)) + .with_max_attempts(100) + .with_max_total_delay(VSS_IO_TIMEOUT) .skip_retry_on_error(Box::new(|e: &VssError| { matches!( e, From 29119202abb5c040459c7e41fd65778c24601078 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Tue, 4 Nov 2025 10:14:23 +0100 Subject: [PATCH 4/5] MOAR OUTPUT --- .github/workflows/vss-integration.yml | 2 +- src/io/vss_store.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vss-integration.yml b/.github/workflows/vss-integration.yml index 043f91d8f..7a167f45c 100644 --- a/.github/workflows/vss-integration.yml +++ b/.github/workflows/vss-integration.yml @@ -45,4 +45,4 @@ jobs: cd ldk-node export TEST_VSS_BASE_URL="http://localhost:8080/vss" RUSTFLAGS="--cfg vss_test" cargo test io::vss_store - RUSTFLAGS="--cfg vss_test" cargo test --test integration_tests_vss -- --nocapture + RUST_BACKTRACE=full RUSTFLAGS="--cfg vss_test" cargo test --test integration_tests_vss -- --nocapture diff --git a/src/io/vss_store.rs b/src/io/vss_store.rs index 5dbe5e60e..01494cb5b 100644 --- a/src/io/vss_store.rs +++ b/src/io/vss_store.rs @@ -133,6 +133,7 @@ impl KVStoreSync for VssStore { internal_runtime.block_on(async move { tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { let msg = "VssStore::read timed out"; + eprintln!("{}", msg); Error::new(ErrorKind::Other, msg) }) })? @@ -170,8 +171,9 @@ impl KVStoreSync for VssStore { // times out. tokio::task::block_in_place(move || { internal_runtime.block_on(async move { - tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { + tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|e| { let msg = "VssStore::write timed out"; + eprintln!("VssStore::write timed out: {:?}", e); Error::new(ErrorKind::Other, msg) }) })? @@ -210,6 +212,7 @@ impl KVStoreSync for VssStore { internal_runtime.block_on(async move { tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { let msg = "VssStore::remove timed out"; + eprintln!("{}", msg); Error::new(ErrorKind::Other, msg) }) })? @@ -232,6 +235,7 @@ impl KVStoreSync for VssStore { internal_runtime.block_on(async move { tokio::time::timeout(VSS_IO_TIMEOUT, fut).await.map_err(|_| { let msg = "VssStore::list timed out"; + eprintln!("{}", msg); Error::new(ErrorKind::Other, msg) }) })? From e61774412a9dde72ea68badcda547777d6d805c9 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Tue, 4 Nov 2025 12:28:35 +0100 Subject: [PATCH 5/5] Bump to branch --- Cargo.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 701d9ddb3..d6faf6137 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -101,7 +101,9 @@ serde = { version = "1.0.210", default-features = false, features = ["std", "der serde_json = { version = "1.0.128", default-features = false, features = ["std"] } log = { version = "0.4.22", default-features = false, features = ["std"]} -vss-client = "0.3" +#vss-client = "0.3" +#vss-client = { path = "../vss-rust-client" } +vss-client = { git = "https://github.com/tnull/vss-rust-client", branch = "2025-08-enable-client-side-delays-0.3.1" } prost = { version = "0.11.6", default-features = false} [target.'cfg(windows)'.dependencies]