From 2d31f0679b64b94722c05b8cb90ebd5f0970e1b9 Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Thu, 13 Nov 2025 08:16:04 +0100 Subject: [PATCH 1/7] added blockreplay profiler --- Cargo.lock | 63 +++- clarity/Cargo.toml | 3 + clarity/src/vm/contexts.rs | 23 +- clarity/src/vm/mod.rs | 368 ++++++++++++++++++++- stackslib/Cargo.toml | 3 + stackslib/src/net/api/blockreplay.rs | 128 ++++++- stackslib/src/net/api/tests/blockreplay.rs | 37 +++ 7 files changed, 611 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f619f40914..918dcd6eec8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -504,6 +504,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +[[package]] +name = "c-enum" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd17eb909a8c6a894926bfcc3400a4bb0e732f5a57d37b1f14e8b29e329bace8" + [[package]] name = "cc" version = "1.0.83" @@ -594,6 +600,7 @@ dependencies = [ "integer-sqrt", "lazy_static", "mutants", + "perf-event2", "proptest", "regex", "rstest", @@ -1974,6 +1981,15 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -1983,6 +1999,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -2119,7 +2144,7 @@ dependencies = [ "cc", "cfg-if 1.0.0", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -2241,6 +2266,41 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "perf-event-data" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "575828d9d7d205188048eb1508560607a03d21eafdbba47b8cade1736c1c28e1" +dependencies = [ + "bitflags 2.4.2", + "c-enum", + "perf-event-open-sys2", +] + +[[package]] +name = "perf-event-open-sys2" +version = "5.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c25955321465255e437600b54296983fab1feac2cd0c38958adeb26dbae49e" +dependencies = [ + "libc", + "memoffset 0.9.1", +] + +[[package]] +name = "perf-event2" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0939b8fad77dfaeb29ebbd35faaeaadbf833167f30975f1b8993bbba09ea0a0f" +dependencies = [ + "bitflags 2.4.2", + "c-enum", + "libc", + "memmap2", + "perf-event-data", + "perf-event-open-sys2", +] + [[package]] name = "pico-args" version = "0.5.0" @@ -3514,6 +3574,7 @@ dependencies = [ "mutants", "nix", "percent-encoding", + "perf-event2", "pox-locking 2.4.0", "prometheus", "proptest", diff --git a/clarity/Cargo.toml b/clarity/Cargo.toml index cba9468313f..bcdc12cb138 100644 --- a/clarity/Cargo.toml +++ b/clarity/Cargo.toml @@ -31,6 +31,9 @@ rstest = { version = "0.17.0", optional = true } rstest_reuse = { version = "0.5.0", optional = true } rusqlite = { workspace = true, optional = true } +[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] +perf-event2 = "0.7.4" + [dev-dependencies] assert-json-diff = "1.0.0" clarity-types = { package = "clarity-types", path = "../clarity-types", default-features = false, features = ["testing"] } diff --git a/clarity/src/vm/contexts.rs b/clarity/src/vm/contexts.rs index 5600ea8fe04..244d99f58aa 100644 --- a/clarity/src/vm/contexts.rs +++ b/clarity/src/vm/contexts.rs @@ -214,6 +214,11 @@ pub enum ExecutionTimeTracker { }, } +pub trait NativeFunctionsProfiler { + fn start(&mut self, identifier: &FunctionIdentifier, args: &Vec); + fn end(&mut self); +} + /** GlobalContext represents the outermost context for a single transaction's execution. It tracks an asset changes that occurred during the processing of the transaction, whether or not the current context is read_only, @@ -235,6 +240,17 @@ pub struct GlobalContext<'a, 'hooks> { pub execution_time_tracker: ExecutionTimeTracker, } +impl<'a, 'hooks> GlobalContext<'a, 'hooks> { + pub fn add_eval_hook(&mut self, hook: &'hooks mut dyn EvalHook) { + if let Some(mut hooks) = self.eval_hooks.take() { + hooks.push(hook); + self.eval_hooks = Some(hooks); + } else { + self.eval_hooks = Some(vec![hook]); + } + } +} + #[derive(Serialize, Deserialize, Clone)] pub struct ContractContext { pub contract_identifier: QualifiedContractIdentifier, @@ -852,12 +868,7 @@ impl<'a, 'hooks> OwnedEnvironment<'a, 'hooks> { } pub fn add_eval_hook(&mut self, hook: &'hooks mut dyn EvalHook) { - if let Some(mut hooks) = self.context.eval_hooks.take() { - hooks.push(hook); - self.context.eval_hooks = Some(hooks); - } else { - self.context.eval_hooks = Some(vec![hook]); - } + self.context.add_eval_hook(hook); } } diff --git a/clarity/src/vm/mod.rs b/clarity/src/vm/mod.rs index 408652321c9..de97e7ab4a7 100644 --- a/clarity/src/vm/mod.rs +++ b/clarity/src/vm/mod.rs @@ -623,17 +623,29 @@ pub fn execute_v2(program: &str) -> Result> { #[cfg(test)] mod test { + use std::collections::HashMap; + use std::env; + use std::fs::File; + use std::time::{Duration, Instant}; + + use clarity_types::types::ResponseData; + use perf_event::events::Hardware; + use perf_event::{Builder, Counter}; use stacks_common::consts::CHAIN_ID_TESTNET; use stacks_common::types::StacksEpochId; use super::ClarityVersion; use crate::vm::callables::{DefineType, DefinedFunction}; + use crate::vm::contexts::{Environment, LocalContext}; use crate::vm::costs::LimitedCostTracker; use crate::vm::database::MemoryBackingStore; - use crate::vm::types::{QualifiedContractIdentifier, TypeSignature}; + use crate::vm::errors::Error; + use crate::vm::functions::NativeFunctions; + use crate::vm::types::{QualifiedContractIdentifier, TypeSignature, Value}; + use crate::vm::variables::NativeVariables; use crate::vm::{ - eval, CallStack, ContractContext, Environment, GlobalContext, LocalContext, - SymbolicExpression, Value, + ast, eval, eval_all, CallStack, ContractContext, EvalHook, GlobalContext, + SymbolicExpression, SymbolicExpressionType, }; #[test] @@ -697,4 +709,354 @@ mod test { ); assert_eq!(Ok(Value::Int(64)), eval(&content[0], &mut env, &context)); } + + struct PerfEventExprState { + start_instant: Instant, + children_duration: Duration, + perf_counter_instructions: Counter, + perf_counter_cpu_cycles: Counter, + perf_counter_ref_cpu_cycles: Counter, + children_perf_counter_instructions: u64, + children_perf_counter_cpu_cycles: u64, + children_perf_counter_ref_cpu_cycles: u64, + } + + impl PerfEventExprState { + fn new( + perf_counter_instructions: Counter, + perf_counter_cpu_cycles: Counter, + perf_counter_ref_cpu_cycles: Counter, + ) -> Self { + Self { + start_instant: Instant::now(), + children_duration: Duration::default(), + perf_counter_instructions, + perf_counter_cpu_cycles, + perf_counter_ref_cpu_cycles, + children_perf_counter_instructions: 0, + children_perf_counter_cpu_cycles: 0, + children_perf_counter_ref_cpu_cycles: 0, + } + } + } + + #[derive(Serialize)] + struct PerfEventExprCounter { + expression: String, + args: Vec, + calls: u64, + duration: Duration, + instructions: u64, + cpu_cycles: u64, + ref_cpu_cycles: u64, + avg_instructions: u64, + avg_cpu_cycles: u64, + avg_ref_cpu_cycles: u64, + } + + impl PerfEventExprCounter { + fn new() -> Self { + Self { + expression: String::default(), + args: vec![], + calls: 0, + duration: Duration::default(), + instructions: 0, + cpu_cycles: 0, + ref_cpu_cycles: 0, + avg_instructions: 0, + avg_cpu_cycles: 0, + avg_ref_cpu_cycles: 0, + } + } + } + + #[derive(Default)] + + pub struct PerfEventCounterHook { + functions_ids: HashMap, + expression_states: HashMap, + expression_counters: HashMap, + call_stack: Vec, + } + + impl PerfEventCounterHook { + pub fn new() -> Self { + Self::default() + } + } + + impl EvalHook for PerfEventCounterHook { + fn will_begin_eval( + &mut self, + + env: &mut Environment, + + _context: &LocalContext, + + expr: &SymbolicExpression, + ) { + let mut key = None; + let mut function_args = vec![]; + + if let SymbolicExpressionType::Atom(atom) = &expr.expr { + if let Some(_native_variable) = NativeVariables::lookup_by_name_at_version( + atom.as_str(), + &ClarityVersion::latest(), + ) { + key = Some(atom.as_str().to_string()); + } + } else if let SymbolicExpressionType::List(list) = &expr.expr { + if let Some((function_name, args)) = list.split_first() { + if let Some(function_name) = function_name.match_atom() { + if let Some(_native_function) = NativeFunctions::lookup_by_name_at_version( + function_name, + &ClarityVersion::latest(), + ) { + let function_name_and_args = + format!("{} ({:?})", function_name, args).to_string(); + + for arg in args { + function_args.push(arg.expr.to_string()); + } + + key = Some(function_name_and_args); + } + } + } + }; + + if key.is_none() { + return; + } + + let function_key = key.unwrap().to_string(); + + let counter = self + .expression_counters + .entry(function_key.clone()) + .or_insert(PerfEventExprCounter::new()); + + counter.expression = function_key.clone(); + counter.args = function_args; + counter.calls += 1; + + let current_cost = env.global_context.cost_track.get_total(); + + self.functions_ids.insert(expr.id, function_key); + + self.call_stack.push(expr.id); + + let mut perf_event_instructions = Builder::new(Hardware::INSTRUCTIONS).build().unwrap(); + let mut perf_event_cpu_cycles = Builder::new(Hardware::CPU_CYCLES).build().unwrap(); + let mut perf_event_ref_cpu_cycles = + Builder::new(Hardware::REF_CPU_CYCLES).build().unwrap(); + + perf_event_instructions.enable().unwrap(); + perf_event_cpu_cycles.enable().unwrap(); + perf_event_ref_cpu_cycles.enable().unwrap(); + + self.expression_states.insert( + expr.id, + PerfEventExprState::new( + perf_event_instructions, + perf_event_cpu_cycles, + perf_event_ref_cpu_cycles, + ), + ); + } + + fn did_finish_eval( + &mut self, + + env: &mut Environment, + + _context: &LocalContext, + + expr: &SymbolicExpression, + + _res: &Result, + ) { + if self.expression_states.contains_key(&expr.id) { + let state = self.expression_states.get_mut(&expr.id).unwrap(); + + state.perf_counter_instructions.disable().unwrap(); + state.perf_counter_cpu_cycles.disable().unwrap(); + state.perf_counter_ref_cpu_cycles.disable().unwrap(); + + let mut instructions = state.perf_counter_instructions.read().unwrap(); + let mut cpu_cycles = state.perf_counter_cpu_cycles.read().unwrap(); + let mut ref_cpu_cycles = state.perf_counter_ref_cpu_cycles.read().unwrap(); + + let function_name = self.functions_ids.get(&expr.id).unwrap().to_string(); + + let elapsed = state.start_instant.elapsed(); + + let counter = self.expression_counters.get_mut(&function_name).unwrap(); + + let duration = elapsed - state.children_duration; + + counter.duration += duration; + + let children_instructions = state.children_perf_counter_instructions; + instructions -= children_instructions; + counter.instructions += instructions; + counter.avg_instructions = counter.instructions / counter.calls; + + let children_cpu_cycles = state.children_perf_counter_cpu_cycles; + cpu_cycles -= children_cpu_cycles; + counter.cpu_cycles += cpu_cycles; + counter.avg_cpu_cycles = counter.cpu_cycles / counter.calls; + + let children_ref_cpu_cycles = state.children_perf_counter_ref_cpu_cycles; + ref_cpu_cycles -= children_ref_cpu_cycles; + counter.ref_cpu_cycles += ref_cpu_cycles; + counter.avg_ref_cpu_cycles = counter.ref_cpu_cycles / counter.calls; + + let _expr_id = self.call_stack.pop().unwrap(); + + for expr_id in self.call_stack.iter().rev() { + let state = self.expression_states.get_mut(expr_id).unwrap(); + + state.children_duration += duration; + + state.children_perf_counter_instructions += instructions; + state.children_perf_counter_cpu_cycles += cpu_cycles; + state.children_perf_counter_ref_cpu_cycles += ref_cpu_cycles; + } + } + } + + fn did_complete( + &mut self, + + _result: core::result::Result<&mut crate::vm::ExecutionResult, String>, + ) { + } + } + + #[test] + fn test_native_functions_benchmark() { + let Ok(json_path) = env::var("CLARITY_BENCHMARK") else { + return; + }; + + let benchmark_iterations: u32 = env::var("CLARITY_BENCHMARK_ITERATIONS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(1); + + let context = LocalContext::new(); + let mut contract_context = ContractContext::new( + QualifiedContractIdentifier::transient(), + ClarityVersion::latest(), + ); + + let mut marf = MemoryBackingStore::new(); + let mut global_context = GlobalContext::new( + false, + CHAIN_ID_TESTNET, + marf.as_clarity_db(), + LimitedCostTracker::new_free(), + StacksEpochId::latest(), + ); + + let call_stack = CallStack::new(); + + let mut perf_event_counter_hook = PerfEventCounterHook::new(); + + let program = r#" + (+ 1 1) + (+ 1 1 1) + (+ 1 1 1 1) + (- 1 1) + (- 1 1 1) + (- 1 1 1 1) + (* 1 1) + (* 1 1 1) + (* 1 1 1 1) + (sha256 0x12) + (sha256 0x1234) + (sha256 0x12345678) + (sha256 0x1234567812345678123456781234567812345678) + (sha512 0x12) + (sha512 0x1234) + (sha512 0x12345678) + (sha512 0x1234567812345678123456781234567812345678) + (keccak256 0x12) + (keccak256 0x1234) + (keccak256 0x12345678) + (keccak256 0x1234567812345678123456781234567812345678) + (to-ascii? 1) + (to-ascii? 10) + (to-ascii? 100) + (to-ascii? 1000) + (to-ascii? 10000) + (to-ascii? u2) + (to-ascii? u30) + (to-ascii? u400) + (to-ascii? u5000) + (to-ascii? u60000) + (to-ascii? true) + (to-ascii? false) + (to-ascii? 'ST1HTBVD3JG9C05J7HBJTHGR0GGW7KXW28M5JS8QE) + (to-ascii? tx-sender) + (to-ascii? 'ST1HTBVD3JG9C05J7HBJTHGR0GGW7KXW28M5JS8QE.dummy001) + (to-ascii? 'ST1HTBVD3JG9C05J7HBJTHGR0GGW7KXW28M5JS8QE.dummy0123456789) + (to-ascii? 0x01) + (to-ascii? 0x0102) + (to-ascii? 0x010203) + (to-ascii? 0x0102030405060708090A0B0C0D0E0F) + (to-ascii? u"A") + (to-ascii? u"AB") + (to-ascii? u"ABCDEFGHIJKLMNOPQRSTUVWXYZ") + (contract-hash? current-contract) + (as-contract? ((with-stx u100)) (+ 1 1) (+ 2 2)) + (as-contract? ((with-all-assets-unsafe)) (+ 1 1) (+ 2 2)) + (restrict-assets? tx-sender () (+ u1 u2)) + (as-contract? () (+ u1 u2)) + (secp256r1-verify 0xc3abef6a775793dfbc8e0719e7a1de1fc2f90d37a7912b1ce8e300a5a03b06a8 0xf2b8c0645caa7250e3b96d633cf40a88456e4ffbddffb69200c4e019039dfd31f153a6d5c3dc192a5574f3a261b1b70570971b92d8ebf86c17b7670d13591c4e 0x031e18532fd4754c02f3041d9c75ceb33b83ffd81ac7ce4fe882ccb1c98bc5896e) + (secp256k1-verify 0x89171d7815da4bc1f644665a3234bc99d1680afa0b3285eff4878f4275fbfa89 0x54cd3f378a424a3e50ff1c911b7d80cf424e1b86dddecadbcf39077e62fa1e54ee6514347c1608df2c3995e7356f2d60a1fab60878214642134d78cd923ce27a01 0x0256b328b30c8bf5839e24058747879408bdb36241dc9c2e7c619faa12b2920967) + "#; + + { + global_context.add_eval_hook(&mut perf_event_counter_hook); + let value = Value::string_ascii_from_bytes("1".into()).unwrap(); + let response = Value::Response(ResponseData { + committed: true, + data: Box::new(value), + }); + + let contract_id = QualifiedContractIdentifier::transient(); + + let parsed = ast::build_ast( + &contract_id, + program, + &mut (), + ClarityVersion::latest(), + StacksEpochId::latest(), + ) + .unwrap() + .expressions; + + for _ in 0..benchmark_iterations { + eval_all(&parsed, &mut contract_context, &mut global_context, None).unwrap(); + } + } + + let mut results: HashMap> = HashMap::new(); + + for (key, value) in perf_event_counter_hook.expression_counters { + let symbol_name = key.split_whitespace().next().unwrap(); + let counter = results.entry(symbol_name.to_string()).or_insert(vec![]); + counter.push(value); + counter.sort_by_key(|peec| peec.expression.clone()); + } + + let file = File::create(json_path).unwrap(); + + let benchmark_value = serde_json::json!(results); + + serde_json::to_writer_pretty(file, &benchmark_value).unwrap(); + } } diff --git a/stackslib/Cargo.toml b/stackslib/Cargo.toml index 10e94f4b470..6111b3c7ad1 100644 --- a/stackslib/Cargo.toml +++ b/stackslib/Cargo.toml @@ -46,6 +46,9 @@ rusqlite = { workspace = true } time = "0.3.41" toml = { workspace = true } +[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] +perf-event2 = "0.7.4" + [target.'cfg(unix)'.dependencies] nix = "0.23" diff --git a/stackslib/src/net/api/blockreplay.rs b/stackslib/src/net/api/blockreplay.rs index f9b48db20e8..05448a2227f 100644 --- a/stackslib/src/net/api/blockreplay.rs +++ b/stackslib/src/net/api/blockreplay.rs @@ -15,6 +15,10 @@ use clarity::vm::costs::ExecutionCost; use clarity::vm::Value; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +use perf_event::events::Hardware; +#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +use perf_event::{Builder, Counter}; use regex::{Captures, Regex}; use stacks_common::codec::StacksMessageCodec; use stacks_common::types::chainstate::{BlockHeaderHash, ConsensusHash, StacksBlockId, TrieHash}; @@ -22,6 +26,7 @@ use stacks_common::types::net::PeerHost; use stacks_common::util::hash::Sha512Trunc256Sum; use stacks_common::util::secp256k1::MessageSignature; use stacks_common::util::serde_serializers::prefix_hex_codec; +use url::form_urlencoded; use crate::burnchains::Txid; use crate::chainstate::burn::db::sortdb::SortitionDB; @@ -43,6 +48,7 @@ use crate::net::{Error as NetError, StacksHttpRequest, StacksNodeState}; pub struct RPCNakamotoBlockReplayRequestHandler { pub block_id: Option, pub auth: Option, + pub profiler: bool, } impl RPCNakamotoBlockReplayRequestHandler { @@ -50,6 +56,7 @@ impl RPCNakamotoBlockReplayRequestHandler { Self { block_id: None, auth, + profiler: false, } } @@ -160,6 +167,37 @@ impl RPCNakamotoBlockReplayRequestHandler { for (i, tx) in block.txs.iter().enumerate() { let tx_len = tx.tx_len(); + let mut perf_event_cpu_instructions: Option = None; + let mut perf_event_cpu_cycles: Option = None; + let mut perf_event_cpu_ref_cycles: Option = None; + + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + if self.profiler { + if let Ok(mut perf_event_cpu_instructions_result) = + Builder::new(Hardware::INSTRUCTIONS).build() + { + if perf_event_cpu_instructions_result.enable().is_ok() { + perf_event_cpu_instructions = Some(perf_event_cpu_instructions_result); + } + } + + if let Ok(mut perf_event_cpu_cycles_result) = + Builder::new(Hardware::CPU_CYCLES).build() + { + if perf_event_cpu_cycles_result.enable().is_ok() { + perf_event_cpu_cycles = Some(perf_event_cpu_cycles_result); + } + } + + if let Ok(mut perf_event_cpu_ref_cycles_result) = + Builder::new(Hardware::REF_CPU_CYCLES).build() + { + if perf_event_cpu_ref_cycles_result.enable().is_ok() { + perf_event_cpu_ref_cycles = Some(perf_event_cpu_ref_cycles_result); + } + } + } + let tx_result = builder.try_mine_tx_with_len( &mut tenure_tx, tx, @@ -167,9 +205,46 @@ impl RPCNakamotoBlockReplayRequestHandler { &BlockLimitFunction::NO_LIMIT_HIT, None, ); + + let mut cpu_instructions: Option = None; + let mut cpu_cycles: Option = None; + let mut cpu_ref_cycles: Option = None; + + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + if self.profiler { + if let Some(mut perf_event_cpu_instructions) = perf_event_cpu_instructions { + if perf_event_cpu_instructions.disable().is_ok() { + if let Ok(value) = perf_event_cpu_instructions.read() { + cpu_instructions = Some(value); + } + } + } + + if let Some(mut perf_event_cpu_cycles) = perf_event_cpu_cycles { + if perf_event_cpu_cycles.disable().is_ok() { + if let Ok(value) = perf_event_cpu_cycles.read() { + cpu_cycles = Some(value); + } + } + } + + if let Some(mut perf_event_cpu_ref_cycles) = perf_event_cpu_ref_cycles { + if perf_event_cpu_ref_cycles.disable().is_ok() { + if let Ok(value) = perf_event_cpu_ref_cycles.read() { + cpu_ref_cycles = Some(value); + } + } + } + } + let err = match tx_result { TransactionResult::Success(tx_result) => { - txs_receipts.push(tx_result.receipt); + txs_receipts.push(( + tx_result.receipt, + cpu_instructions, + cpu_cycles, + cpu_ref_cycles, + )); Ok(()) } _ => Err(format!("Problematic tx {i}")), @@ -194,8 +269,13 @@ impl RPCNakamotoBlockReplayRequestHandler { let mut rpc_replayed_block = RPCReplayedBlock::from_block(block, block_fees, tenure_id, parent_block_id); - for receipt in &txs_receipts { - let transaction = RPCReplayedBlockTransaction::from_receipt(receipt); + for (receipt, cpu_instructions, cpu_cycles, cpu_ref_cycles) in &txs_receipts { + let transaction = RPCReplayedBlockTransaction::from_receipt( + receipt, + cpu_instructions, + cpu_cycles, + cpu_ref_cycles, + ); rpc_replayed_block.transactions.push(transaction); } @@ -231,10 +311,19 @@ pub struct RPCReplayedBlockTransaction { pub post_condition_aborted: bool, /// optional vm error pub vm_error: Option, + /// profiling data based on linux perf_events + pub cpu_instructions: Option, + pub cpu_cycles: Option, + pub cpu_ref_cycles: Option, } impl RPCReplayedBlockTransaction { - pub fn from_receipt(receipt: &StacksTransactionReceipt) -> Self { + pub fn from_receipt( + receipt: &StacksTransactionReceipt, + cpu_instructions: &Option, + cpu_cycles: &Option, + cpu_ref_cycles: &Option, + ) -> Self { let events = receipt .events .iter() @@ -269,6 +358,9 @@ impl RPCReplayedBlockTransaction { events, post_condition_aborted: receipt.post_condition_aborted, vm_error: receipt.vm_error.clone(), + cpu_instructions: cpu_instructions.clone(), + cpu_cycles: cpu_cycles.clone(), + cpu_ref_cycles: cpu_ref_cycles.clone(), } } } @@ -382,6 +474,17 @@ impl HttpRequest for RPCNakamotoBlockReplayRequestHandler { self.block_id = Some(block_id); + if let Some(query_string) = query { + for (key, value) in form_urlencoded::parse(query_string.as_bytes()) { + if key == "profiler" { + if value == "1" { + self.profiler = true; + } + break; + } + } + } + Ok(HttpRequestContents::new().query_string(query)) } } @@ -446,6 +549,23 @@ impl StacksHttpRequest { ) .expect("FATAL: failed to construct request from infallible data") } + + pub fn new_block_replay_with_profiler( + host: PeerHost, + block_id: &StacksBlockId, + profiler: bool, + ) -> StacksHttpRequest { + StacksHttpRequest::new_for_peer( + host, + "GET".into(), + format!("/v3/blocks/replay/{block_id}"), + HttpRequestContents::new().query_arg( + "profiler".into(), + if profiler { "1".into() } else { "0".into() }, + ), + ) + .expect("FATAL: failed to construct request from infallible data") + } } /// Decode the HTTP response diff --git a/stackslib/src/net/api/tests/blockreplay.rs b/stackslib/src/net/api/tests/blockreplay.rs index c4ae3d0ea0a..3615c10a556 100644 --- a/stackslib/src/net/api/tests/blockreplay.rs +++ b/stackslib/src/net/api/tests/blockreplay.rs @@ -72,6 +72,43 @@ fn test_try_parse_request() { let (preamble, contents) = parsed_request.destruct(); assert_eq!(&preamble, request.preamble()); + assert_eq!(handler.profiler, false); +} + +#[test] +fn test_try_parse_request_with_profiler() { + let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 33333); + let mut http = StacksHttp::new(addr.clone(), &ConnectionOptions::default()); + + let mut request = StacksHttpRequest::new_block_replay_with_profiler( + addr.into(), + &StacksBlockId([0x01; 32]), + true, + ); + + // add the authorization header + request.add_header("authorization".into(), "password".into()); + + let bytes = request.try_serialize().unwrap(); + + debug!("Request:\n{}\n", std::str::from_utf8(&bytes).unwrap()); + + let (parsed_preamble, offset) = http.read_preamble(&bytes).unwrap(); + + let mut handler = + blockreplay::RPCNakamotoBlockReplayRequestHandler::new(Some("password".into())); + + let parsed_request = http + .handle_try_parse_request( + &mut handler, + &parsed_preamble.expect_request(), + &bytes[offset..], + ) + .unwrap(); + + let (preamble, contents) = parsed_request.destruct(); + + assert_eq!(handler.profiler, true); } #[test] From 62af3197287e9ae1bd4b97f58f3573f1be5d85fa Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Mon, 17 Nov 2025 14:39:09 +0100 Subject: [PATCH 2/7] cleaned up for pure blockreplay profiler --- Cargo.lock | 1 - clarity/Cargo.toml | 3 - clarity/src/vm/contexts.rs | 23 +-- clarity/src/vm/mod.rs | 368 +------------------------------------ 4 files changed, 9 insertions(+), 386 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 918dcd6eec8..d9e7e6983f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -600,7 +600,6 @@ dependencies = [ "integer-sqrt", "lazy_static", "mutants", - "perf-event2", "proptest", "regex", "rstest", diff --git a/clarity/Cargo.toml b/clarity/Cargo.toml index bcdc12cb138..cba9468313f 100644 --- a/clarity/Cargo.toml +++ b/clarity/Cargo.toml @@ -31,9 +31,6 @@ rstest = { version = "0.17.0", optional = true } rstest_reuse = { version = "0.5.0", optional = true } rusqlite = { workspace = true, optional = true } -[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] -perf-event2 = "0.7.4" - [dev-dependencies] assert-json-diff = "1.0.0" clarity-types = { package = "clarity-types", path = "../clarity-types", default-features = false, features = ["testing"] } diff --git a/clarity/src/vm/contexts.rs b/clarity/src/vm/contexts.rs index 244d99f58aa..5600ea8fe04 100644 --- a/clarity/src/vm/contexts.rs +++ b/clarity/src/vm/contexts.rs @@ -214,11 +214,6 @@ pub enum ExecutionTimeTracker { }, } -pub trait NativeFunctionsProfiler { - fn start(&mut self, identifier: &FunctionIdentifier, args: &Vec); - fn end(&mut self); -} - /** GlobalContext represents the outermost context for a single transaction's execution. It tracks an asset changes that occurred during the processing of the transaction, whether or not the current context is read_only, @@ -240,17 +235,6 @@ pub struct GlobalContext<'a, 'hooks> { pub execution_time_tracker: ExecutionTimeTracker, } -impl<'a, 'hooks> GlobalContext<'a, 'hooks> { - pub fn add_eval_hook(&mut self, hook: &'hooks mut dyn EvalHook) { - if let Some(mut hooks) = self.eval_hooks.take() { - hooks.push(hook); - self.eval_hooks = Some(hooks); - } else { - self.eval_hooks = Some(vec![hook]); - } - } -} - #[derive(Serialize, Deserialize, Clone)] pub struct ContractContext { pub contract_identifier: QualifiedContractIdentifier, @@ -868,7 +852,12 @@ impl<'a, 'hooks> OwnedEnvironment<'a, 'hooks> { } pub fn add_eval_hook(&mut self, hook: &'hooks mut dyn EvalHook) { - self.context.add_eval_hook(hook); + if let Some(mut hooks) = self.context.eval_hooks.take() { + hooks.push(hook); + self.context.eval_hooks = Some(hooks); + } else { + self.context.eval_hooks = Some(vec![hook]); + } } } diff --git a/clarity/src/vm/mod.rs b/clarity/src/vm/mod.rs index de97e7ab4a7..408652321c9 100644 --- a/clarity/src/vm/mod.rs +++ b/clarity/src/vm/mod.rs @@ -623,29 +623,17 @@ pub fn execute_v2(program: &str) -> Result> { #[cfg(test)] mod test { - use std::collections::HashMap; - use std::env; - use std::fs::File; - use std::time::{Duration, Instant}; - - use clarity_types::types::ResponseData; - use perf_event::events::Hardware; - use perf_event::{Builder, Counter}; use stacks_common::consts::CHAIN_ID_TESTNET; use stacks_common::types::StacksEpochId; use super::ClarityVersion; use crate::vm::callables::{DefineType, DefinedFunction}; - use crate::vm::contexts::{Environment, LocalContext}; use crate::vm::costs::LimitedCostTracker; use crate::vm::database::MemoryBackingStore; - use crate::vm::errors::Error; - use crate::vm::functions::NativeFunctions; - use crate::vm::types::{QualifiedContractIdentifier, TypeSignature, Value}; - use crate::vm::variables::NativeVariables; + use crate::vm::types::{QualifiedContractIdentifier, TypeSignature}; use crate::vm::{ - ast, eval, eval_all, CallStack, ContractContext, EvalHook, GlobalContext, - SymbolicExpression, SymbolicExpressionType, + eval, CallStack, ContractContext, Environment, GlobalContext, LocalContext, + SymbolicExpression, Value, }; #[test] @@ -709,354 +697,4 @@ mod test { ); assert_eq!(Ok(Value::Int(64)), eval(&content[0], &mut env, &context)); } - - struct PerfEventExprState { - start_instant: Instant, - children_duration: Duration, - perf_counter_instructions: Counter, - perf_counter_cpu_cycles: Counter, - perf_counter_ref_cpu_cycles: Counter, - children_perf_counter_instructions: u64, - children_perf_counter_cpu_cycles: u64, - children_perf_counter_ref_cpu_cycles: u64, - } - - impl PerfEventExprState { - fn new( - perf_counter_instructions: Counter, - perf_counter_cpu_cycles: Counter, - perf_counter_ref_cpu_cycles: Counter, - ) -> Self { - Self { - start_instant: Instant::now(), - children_duration: Duration::default(), - perf_counter_instructions, - perf_counter_cpu_cycles, - perf_counter_ref_cpu_cycles, - children_perf_counter_instructions: 0, - children_perf_counter_cpu_cycles: 0, - children_perf_counter_ref_cpu_cycles: 0, - } - } - } - - #[derive(Serialize)] - struct PerfEventExprCounter { - expression: String, - args: Vec, - calls: u64, - duration: Duration, - instructions: u64, - cpu_cycles: u64, - ref_cpu_cycles: u64, - avg_instructions: u64, - avg_cpu_cycles: u64, - avg_ref_cpu_cycles: u64, - } - - impl PerfEventExprCounter { - fn new() -> Self { - Self { - expression: String::default(), - args: vec![], - calls: 0, - duration: Duration::default(), - instructions: 0, - cpu_cycles: 0, - ref_cpu_cycles: 0, - avg_instructions: 0, - avg_cpu_cycles: 0, - avg_ref_cpu_cycles: 0, - } - } - } - - #[derive(Default)] - - pub struct PerfEventCounterHook { - functions_ids: HashMap, - expression_states: HashMap, - expression_counters: HashMap, - call_stack: Vec, - } - - impl PerfEventCounterHook { - pub fn new() -> Self { - Self::default() - } - } - - impl EvalHook for PerfEventCounterHook { - fn will_begin_eval( - &mut self, - - env: &mut Environment, - - _context: &LocalContext, - - expr: &SymbolicExpression, - ) { - let mut key = None; - let mut function_args = vec![]; - - if let SymbolicExpressionType::Atom(atom) = &expr.expr { - if let Some(_native_variable) = NativeVariables::lookup_by_name_at_version( - atom.as_str(), - &ClarityVersion::latest(), - ) { - key = Some(atom.as_str().to_string()); - } - } else if let SymbolicExpressionType::List(list) = &expr.expr { - if let Some((function_name, args)) = list.split_first() { - if let Some(function_name) = function_name.match_atom() { - if let Some(_native_function) = NativeFunctions::lookup_by_name_at_version( - function_name, - &ClarityVersion::latest(), - ) { - let function_name_and_args = - format!("{} ({:?})", function_name, args).to_string(); - - for arg in args { - function_args.push(arg.expr.to_string()); - } - - key = Some(function_name_and_args); - } - } - } - }; - - if key.is_none() { - return; - } - - let function_key = key.unwrap().to_string(); - - let counter = self - .expression_counters - .entry(function_key.clone()) - .or_insert(PerfEventExprCounter::new()); - - counter.expression = function_key.clone(); - counter.args = function_args; - counter.calls += 1; - - let current_cost = env.global_context.cost_track.get_total(); - - self.functions_ids.insert(expr.id, function_key); - - self.call_stack.push(expr.id); - - let mut perf_event_instructions = Builder::new(Hardware::INSTRUCTIONS).build().unwrap(); - let mut perf_event_cpu_cycles = Builder::new(Hardware::CPU_CYCLES).build().unwrap(); - let mut perf_event_ref_cpu_cycles = - Builder::new(Hardware::REF_CPU_CYCLES).build().unwrap(); - - perf_event_instructions.enable().unwrap(); - perf_event_cpu_cycles.enable().unwrap(); - perf_event_ref_cpu_cycles.enable().unwrap(); - - self.expression_states.insert( - expr.id, - PerfEventExprState::new( - perf_event_instructions, - perf_event_cpu_cycles, - perf_event_ref_cpu_cycles, - ), - ); - } - - fn did_finish_eval( - &mut self, - - env: &mut Environment, - - _context: &LocalContext, - - expr: &SymbolicExpression, - - _res: &Result, - ) { - if self.expression_states.contains_key(&expr.id) { - let state = self.expression_states.get_mut(&expr.id).unwrap(); - - state.perf_counter_instructions.disable().unwrap(); - state.perf_counter_cpu_cycles.disable().unwrap(); - state.perf_counter_ref_cpu_cycles.disable().unwrap(); - - let mut instructions = state.perf_counter_instructions.read().unwrap(); - let mut cpu_cycles = state.perf_counter_cpu_cycles.read().unwrap(); - let mut ref_cpu_cycles = state.perf_counter_ref_cpu_cycles.read().unwrap(); - - let function_name = self.functions_ids.get(&expr.id).unwrap().to_string(); - - let elapsed = state.start_instant.elapsed(); - - let counter = self.expression_counters.get_mut(&function_name).unwrap(); - - let duration = elapsed - state.children_duration; - - counter.duration += duration; - - let children_instructions = state.children_perf_counter_instructions; - instructions -= children_instructions; - counter.instructions += instructions; - counter.avg_instructions = counter.instructions / counter.calls; - - let children_cpu_cycles = state.children_perf_counter_cpu_cycles; - cpu_cycles -= children_cpu_cycles; - counter.cpu_cycles += cpu_cycles; - counter.avg_cpu_cycles = counter.cpu_cycles / counter.calls; - - let children_ref_cpu_cycles = state.children_perf_counter_ref_cpu_cycles; - ref_cpu_cycles -= children_ref_cpu_cycles; - counter.ref_cpu_cycles += ref_cpu_cycles; - counter.avg_ref_cpu_cycles = counter.ref_cpu_cycles / counter.calls; - - let _expr_id = self.call_stack.pop().unwrap(); - - for expr_id in self.call_stack.iter().rev() { - let state = self.expression_states.get_mut(expr_id).unwrap(); - - state.children_duration += duration; - - state.children_perf_counter_instructions += instructions; - state.children_perf_counter_cpu_cycles += cpu_cycles; - state.children_perf_counter_ref_cpu_cycles += ref_cpu_cycles; - } - } - } - - fn did_complete( - &mut self, - - _result: core::result::Result<&mut crate::vm::ExecutionResult, String>, - ) { - } - } - - #[test] - fn test_native_functions_benchmark() { - let Ok(json_path) = env::var("CLARITY_BENCHMARK") else { - return; - }; - - let benchmark_iterations: u32 = env::var("CLARITY_BENCHMARK_ITERATIONS") - .ok() - .and_then(|v| v.parse().ok()) - .unwrap_or(1); - - let context = LocalContext::new(); - let mut contract_context = ContractContext::new( - QualifiedContractIdentifier::transient(), - ClarityVersion::latest(), - ); - - let mut marf = MemoryBackingStore::new(); - let mut global_context = GlobalContext::new( - false, - CHAIN_ID_TESTNET, - marf.as_clarity_db(), - LimitedCostTracker::new_free(), - StacksEpochId::latest(), - ); - - let call_stack = CallStack::new(); - - let mut perf_event_counter_hook = PerfEventCounterHook::new(); - - let program = r#" - (+ 1 1) - (+ 1 1 1) - (+ 1 1 1 1) - (- 1 1) - (- 1 1 1) - (- 1 1 1 1) - (* 1 1) - (* 1 1 1) - (* 1 1 1 1) - (sha256 0x12) - (sha256 0x1234) - (sha256 0x12345678) - (sha256 0x1234567812345678123456781234567812345678) - (sha512 0x12) - (sha512 0x1234) - (sha512 0x12345678) - (sha512 0x1234567812345678123456781234567812345678) - (keccak256 0x12) - (keccak256 0x1234) - (keccak256 0x12345678) - (keccak256 0x1234567812345678123456781234567812345678) - (to-ascii? 1) - (to-ascii? 10) - (to-ascii? 100) - (to-ascii? 1000) - (to-ascii? 10000) - (to-ascii? u2) - (to-ascii? u30) - (to-ascii? u400) - (to-ascii? u5000) - (to-ascii? u60000) - (to-ascii? true) - (to-ascii? false) - (to-ascii? 'ST1HTBVD3JG9C05J7HBJTHGR0GGW7KXW28M5JS8QE) - (to-ascii? tx-sender) - (to-ascii? 'ST1HTBVD3JG9C05J7HBJTHGR0GGW7KXW28M5JS8QE.dummy001) - (to-ascii? 'ST1HTBVD3JG9C05J7HBJTHGR0GGW7KXW28M5JS8QE.dummy0123456789) - (to-ascii? 0x01) - (to-ascii? 0x0102) - (to-ascii? 0x010203) - (to-ascii? 0x0102030405060708090A0B0C0D0E0F) - (to-ascii? u"A") - (to-ascii? u"AB") - (to-ascii? u"ABCDEFGHIJKLMNOPQRSTUVWXYZ") - (contract-hash? current-contract) - (as-contract? ((with-stx u100)) (+ 1 1) (+ 2 2)) - (as-contract? ((with-all-assets-unsafe)) (+ 1 1) (+ 2 2)) - (restrict-assets? tx-sender () (+ u1 u2)) - (as-contract? () (+ u1 u2)) - (secp256r1-verify 0xc3abef6a775793dfbc8e0719e7a1de1fc2f90d37a7912b1ce8e300a5a03b06a8 0xf2b8c0645caa7250e3b96d633cf40a88456e4ffbddffb69200c4e019039dfd31f153a6d5c3dc192a5574f3a261b1b70570971b92d8ebf86c17b7670d13591c4e 0x031e18532fd4754c02f3041d9c75ceb33b83ffd81ac7ce4fe882ccb1c98bc5896e) - (secp256k1-verify 0x89171d7815da4bc1f644665a3234bc99d1680afa0b3285eff4878f4275fbfa89 0x54cd3f378a424a3e50ff1c911b7d80cf424e1b86dddecadbcf39077e62fa1e54ee6514347c1608df2c3995e7356f2d60a1fab60878214642134d78cd923ce27a01 0x0256b328b30c8bf5839e24058747879408bdb36241dc9c2e7c619faa12b2920967) - "#; - - { - global_context.add_eval_hook(&mut perf_event_counter_hook); - let value = Value::string_ascii_from_bytes("1".into()).unwrap(); - let response = Value::Response(ResponseData { - committed: true, - data: Box::new(value), - }); - - let contract_id = QualifiedContractIdentifier::transient(); - - let parsed = ast::build_ast( - &contract_id, - program, - &mut (), - ClarityVersion::latest(), - StacksEpochId::latest(), - ) - .unwrap() - .expressions; - - for _ in 0..benchmark_iterations { - eval_all(&parsed, &mut contract_context, &mut global_context, None).unwrap(); - } - } - - let mut results: HashMap> = HashMap::new(); - - for (key, value) in perf_event_counter_hook.expression_counters { - let symbol_name = key.split_whitespace().next().unwrap(); - let counter = results.entry(symbol_name.to_string()).or_insert(vec![]); - counter.push(value); - counter.sort_by_key(|peec| peec.expression.clone()); - } - - let file = File::create(json_path).unwrap(); - - let benchmark_value = serde_json::json!(results); - - serde_json::to_writer_pretty(file, &benchmark_value).unwrap(); - } } From 387f1baf64a119c3cfc696b9a48e03ca37dbfb4a Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Mon, 17 Nov 2025 15:04:30 +0100 Subject: [PATCH 3/7] added profiler feature --- stackslib/Cargo.toml | 7 ++++--- stackslib/src/net/api/blockreplay.rs | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/stackslib/Cargo.toml b/stackslib/Cargo.toml index 6111b3c7ad1..b60982fffaf 100644 --- a/stackslib/Cargo.toml +++ b/stackslib/Cargo.toml @@ -46,9 +46,6 @@ rusqlite = { workspace = true } time = "0.3.41" toml = { workspace = true } -[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] -perf-event2 = "0.7.4" - [target.'cfg(unix)'.dependencies] nix = "0.23" @@ -88,6 +85,10 @@ developer-mode = ["clarity/developer-mode"] monitoring_prom = ["prometheus"] slog_json = ["stacks-common/slog_json", "clarity/slog_json", "pox-locking/slog_json"] testing = ["chrono", "stacks-common/testing", "clarity/testing"] +profiler = ["perf-event2"] + +[target.'cfg(all(target_os = "linux", target_arch = "x86_64"))'.dependencies] +perf-event2 = { version = "0.7.4", optional = true } [target.'cfg(all(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64"), not(any(target_os="windows"))))'.dependencies] sha2 = { version = "0.10", features = ["asm"] } diff --git a/stackslib/src/net/api/blockreplay.rs b/stackslib/src/net/api/blockreplay.rs index 05448a2227f..c7ca56554f6 100644 --- a/stackslib/src/net/api/blockreplay.rs +++ b/stackslib/src/net/api/blockreplay.rs @@ -15,9 +15,9 @@ use clarity::vm::costs::ExecutionCost; use clarity::vm::Value; -#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +#[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] use perf_event::events::Hardware; -#[cfg(all(target_os = "linux", target_arch = "x86_64"))] +#[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] use perf_event::{Builder, Counter}; use regex::{Captures, Regex}; use stacks_common::codec::StacksMessageCodec; @@ -167,11 +167,14 @@ impl RPCNakamotoBlockReplayRequestHandler { for (i, tx) in block.txs.iter().enumerate() { let tx_len = tx.tx_len(); + #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] let mut perf_event_cpu_instructions: Option = None; + #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] let mut perf_event_cpu_cycles: Option = None; + #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] let mut perf_event_cpu_ref_cycles: Option = None; - #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] if self.profiler { if let Ok(mut perf_event_cpu_instructions_result) = Builder::new(Hardware::INSTRUCTIONS).build() @@ -206,11 +209,22 @@ impl RPCNakamotoBlockReplayRequestHandler { None, ); + #[cfg(feature = "profiler")] let mut cpu_instructions: Option = None; + #[cfg(not(feature = "profiler"))] + let cpu_instructions: Option = None; + + #[cfg(feature = "profiler")] let mut cpu_cycles: Option = None; + #[cfg(not(feature = "profiler"))] + let cpu_cycles: Option = None; + + #[cfg(feature = "profiler")] let mut cpu_ref_cycles: Option = None; + #[cfg(not(feature = "profiler"))] + let cpu_ref_cycles: Option = None; - #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] if self.profiler { if let Some(mut perf_event_cpu_instructions) = perf_event_cpu_instructions { if perf_event_cpu_instructions.disable().is_ok() { From cbcaeed2545f15d0d39a895a27cc4e6ab72c814d Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Mon, 17 Nov 2025 15:23:22 +0100 Subject: [PATCH 4/7] use new_block_replay_with_profiler in test --- stackslib/src/net/api/tests/blockreplay.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stackslib/src/net/api/tests/blockreplay.rs b/stackslib/src/net/api/tests/blockreplay.rs index 3615c10a556..53d358881d9 100644 --- a/stackslib/src/net/api/tests/blockreplay.rs +++ b/stackslib/src/net/api/tests/blockreplay.rs @@ -148,7 +148,7 @@ fn test_try_make_response() { // query existing, non-empty Nakamoto block let mut request = - StacksHttpRequest::new_block_replay(addr.clone().into(), &rpc_test.canonical_tip); + StacksHttpRequest::new_block_replay_with_profiler(addr.clone().into(), &rpc_test.canonical_tip, true); // add the authorization header request.add_header("authorization".into(), "password".into()); requests.push(request); From 702f36b1c9bd02a4b59266c6ece0a03eb564b3c4 Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Wed, 19 Nov 2025 15:16:05 +0100 Subject: [PATCH 5/7] refactored profiler structures --- stackslib/src/net/api/blockreplay.rs | 182 ++++++++++++--------- stackslib/src/net/api/tests/blockreplay.rs | 7 +- 2 files changed, 114 insertions(+), 75 deletions(-) diff --git a/stackslib/src/net/api/blockreplay.rs b/stackslib/src/net/api/blockreplay.rs index c7ca56554f6..a9ce62a4e6b 100644 --- a/stackslib/src/net/api/blockreplay.rs +++ b/stackslib/src/net/api/blockreplay.rs @@ -44,6 +44,107 @@ use crate::net::http::{ use crate::net::httpcore::{RPCRequestHandler, StacksHttpResponse}; use crate::net::{Error as NetError, StacksHttpRequest, StacksNodeState}; +#[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] +struct BlockReplayProfiler { + perf_event_cpu_instructions: Option, + perf_event_cpu_cycles: Option, + perf_event_cpu_ref_cycles: Option, +} + +#[cfg(not(all(feature = "profiler", target_os = "linux", target_arch = "x86_64")))] +struct BlockReplayProfiler(); + +#[derive(Default)] +struct BlockReplayProfilerResult { + cpu_instructions: Option, + cpu_cycles: Option, + cpu_ref_cycles: Option, +} + +#[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] +impl BlockReplayProfiler { + fn new() -> Self { + let mut perf_event_cpu_instructions: Option = None; + let mut perf_event_cpu_cycles: Option = None; + let mut perf_event_cpu_ref_cycles: Option = None; + + if let Ok(mut perf_event_cpu_instructions_result) = + Builder::new(Hardware::INSTRUCTIONS).build() + { + if perf_event_cpu_instructions_result.enable().is_ok() { + perf_event_cpu_instructions = Some(perf_event_cpu_instructions_result); + } + } + + if let Ok(mut perf_event_cpu_cycles_result) = Builder::new(Hardware::CPU_CYCLES).build() { + if perf_event_cpu_cycles_result.enable().is_ok() { + perf_event_cpu_cycles = Some(perf_event_cpu_cycles_result); + } + } + + if let Ok(mut perf_event_cpu_ref_cycles_result) = + Builder::new(Hardware::REF_CPU_CYCLES).build() + { + if perf_event_cpu_ref_cycles_result.enable().is_ok() { + perf_event_cpu_ref_cycles = Some(perf_event_cpu_ref_cycles_result); + } + } + + Self { + perf_event_cpu_instructions, + perf_event_cpu_cycles, + perf_event_cpu_ref_cycles, + } + } + + fn collect(self) -> BlockReplayProfilerResult { + let mut cpu_instructions: Option = None; + let mut cpu_cycles: Option = None; + let mut cpu_ref_cycles: Option = None; + + if let Some(mut perf_event_cpu_instructions) = self.perf_event_cpu_instructions { + if perf_event_cpu_instructions.disable().is_ok() { + if let Ok(value) = perf_event_cpu_instructions.read() { + cpu_instructions = Some(value); + } + } + } + + if let Some(mut perf_event_cpu_cycles) = self.perf_event_cpu_cycles { + if perf_event_cpu_cycles.disable().is_ok() { + if let Ok(value) = perf_event_cpu_cycles.read() { + cpu_cycles = Some(value); + } + } + } + + if let Some(mut perf_event_cpu_ref_cycles) = self.perf_event_cpu_ref_cycles { + if perf_event_cpu_ref_cycles.disable().is_ok() { + if let Ok(value) = perf_event_cpu_ref_cycles.read() { + cpu_ref_cycles = Some(value); + } + } + } + + BlockReplayProfilerResult { + cpu_instructions, + cpu_cycles, + cpu_ref_cycles, + } + } +} + +#[cfg(not(all(feature = "profiler", target_os = "linux", target_arch = "x86_64")))] +impl BlockReplayProfiler { + fn new() -> Self { + warn!("BlockReplay Profiler is not available in this build."); + Self {} + } + fn collect(self) -> BlockReplayProfilerResult { + BlockReplayProfilerResult::default() + } +} + #[derive(Clone)] pub struct RPCNakamotoBlockReplayRequestHandler { pub block_id: Option, @@ -167,38 +268,11 @@ impl RPCNakamotoBlockReplayRequestHandler { for (i, tx) in block.txs.iter().enumerate() { let tx_len = tx.tx_len(); - #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] - let mut perf_event_cpu_instructions: Option = None; - #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] - let mut perf_event_cpu_cycles: Option = None; - #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] - let mut perf_event_cpu_ref_cycles: Option = None; + let mut profiler: Option = None; + let mut profiler_result = BlockReplayProfilerResult::default(); - #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] if self.profiler { - if let Ok(mut perf_event_cpu_instructions_result) = - Builder::new(Hardware::INSTRUCTIONS).build() - { - if perf_event_cpu_instructions_result.enable().is_ok() { - perf_event_cpu_instructions = Some(perf_event_cpu_instructions_result); - } - } - - if let Ok(mut perf_event_cpu_cycles_result) = - Builder::new(Hardware::CPU_CYCLES).build() - { - if perf_event_cpu_cycles_result.enable().is_ok() { - perf_event_cpu_cycles = Some(perf_event_cpu_cycles_result); - } - } - - if let Ok(mut perf_event_cpu_ref_cycles_result) = - Builder::new(Hardware::REF_CPU_CYCLES).build() - { - if perf_event_cpu_ref_cycles_result.enable().is_ok() { - perf_event_cpu_ref_cycles = Some(perf_event_cpu_ref_cycles_result); - } - } + profiler = Some(BlockReplayProfiler::new()); } let tx_result = builder.try_mine_tx_with_len( @@ -209,55 +283,17 @@ impl RPCNakamotoBlockReplayRequestHandler { None, ); - #[cfg(feature = "profiler")] - let mut cpu_instructions: Option = None; - #[cfg(not(feature = "profiler"))] - let cpu_instructions: Option = None; - - #[cfg(feature = "profiler")] - let mut cpu_cycles: Option = None; - #[cfg(not(feature = "profiler"))] - let cpu_cycles: Option = None; - - #[cfg(feature = "profiler")] - let mut cpu_ref_cycles: Option = None; - #[cfg(not(feature = "profiler"))] - let cpu_ref_cycles: Option = None; - - #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] - if self.profiler { - if let Some(mut perf_event_cpu_instructions) = perf_event_cpu_instructions { - if perf_event_cpu_instructions.disable().is_ok() { - if let Ok(value) = perf_event_cpu_instructions.read() { - cpu_instructions = Some(value); - } - } - } - - if let Some(mut perf_event_cpu_cycles) = perf_event_cpu_cycles { - if perf_event_cpu_cycles.disable().is_ok() { - if let Ok(value) = perf_event_cpu_cycles.read() { - cpu_cycles = Some(value); - } - } - } - - if let Some(mut perf_event_cpu_ref_cycles) = perf_event_cpu_ref_cycles { - if perf_event_cpu_ref_cycles.disable().is_ok() { - if let Ok(value) = perf_event_cpu_ref_cycles.read() { - cpu_ref_cycles = Some(value); - } - } - } + if let Some(profiler) = profiler { + profiler_result = profiler.collect(); } let err = match tx_result { TransactionResult::Success(tx_result) => { txs_receipts.push(( tx_result.receipt, - cpu_instructions, - cpu_cycles, - cpu_ref_cycles, + profiler_result.cpu_instructions, + profiler_result.cpu_cycles, + profiler_result.cpu_ref_cycles, )); Ok(()) } diff --git a/stackslib/src/net/api/tests/blockreplay.rs b/stackslib/src/net/api/tests/blockreplay.rs index 53d358881d9..9bc385c0fbd 100644 --- a/stackslib/src/net/api/tests/blockreplay.rs +++ b/stackslib/src/net/api/tests/blockreplay.rs @@ -147,8 +147,11 @@ fn test_try_make_response() { let mut requests = vec![]; // query existing, non-empty Nakamoto block - let mut request = - StacksHttpRequest::new_block_replay_with_profiler(addr.clone().into(), &rpc_test.canonical_tip, true); + let mut request = StacksHttpRequest::new_block_replay_with_profiler( + addr.clone().into(), + &rpc_test.canonical_tip, + true, + ); // add the authorization header request.add_header("authorization".into(), "password".into()); requests.push(request); From 847445fefaa486568f27e10f922363379f34aef5 Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Wed, 19 Nov 2025 15:29:42 +0100 Subject: [PATCH 6/7] cleaned up use statements for perf_event --- stackslib/src/net/api/blockreplay.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/stackslib/src/net/api/blockreplay.rs b/stackslib/src/net/api/blockreplay.rs index a9ce62a4e6b..aa7311a0914 100644 --- a/stackslib/src/net/api/blockreplay.rs +++ b/stackslib/src/net/api/blockreplay.rs @@ -15,10 +15,6 @@ use clarity::vm::costs::ExecutionCost; use clarity::vm::Value; -#[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] -use perf_event::events::Hardware; -#[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] -use perf_event::{Builder, Counter}; use regex::{Captures, Regex}; use stacks_common::codec::StacksMessageCodec; use stacks_common::types::chainstate::{BlockHeaderHash, ConsensusHash, StacksBlockId, TrieHash}; @@ -64,26 +60,28 @@ struct BlockReplayProfilerResult { #[cfg(all(feature = "profiler", target_os = "linux", target_arch = "x86_64"))] impl BlockReplayProfiler { fn new() -> Self { - let mut perf_event_cpu_instructions: Option = None; - let mut perf_event_cpu_cycles: Option = None; - let mut perf_event_cpu_ref_cycles: Option = None; + let mut perf_event_cpu_instructions: Option = None; + let mut perf_event_cpu_cycles: Option = None; + let mut perf_event_cpu_ref_cycles: Option = None; if let Ok(mut perf_event_cpu_instructions_result) = - Builder::new(Hardware::INSTRUCTIONS).build() + perf_event::Builder::new(perf_event::events::Hardware::INSTRUCTIONS).build() { if perf_event_cpu_instructions_result.enable().is_ok() { perf_event_cpu_instructions = Some(perf_event_cpu_instructions_result); } } - if let Ok(mut perf_event_cpu_cycles_result) = Builder::new(Hardware::CPU_CYCLES).build() { + if let Ok(mut perf_event_cpu_cycles_result) = + perf_event::Builder::new(perf_event::events::Hardware::CPU_CYCLES).build() + { if perf_event_cpu_cycles_result.enable().is_ok() { perf_event_cpu_cycles = Some(perf_event_cpu_cycles_result); } } if let Ok(mut perf_event_cpu_ref_cycles_result) = - Builder::new(Hardware::REF_CPU_CYCLES).build() + perf_event::Builder::new(perf_event::events::Hardware::REF_CPU_CYCLES).build() { if perf_event_cpu_ref_cycles_result.enable().is_ok() { perf_event_cpu_ref_cycles = Some(perf_event_cpu_ref_cycles_result); From 9bfe0a2bc629f28baae256b1b199b1c51e54a3c1 Mon Sep 17 00:00:00 2001 From: rob-stacks Date: Mon, 24 Nov 2025 12:38:45 +0100 Subject: [PATCH 7/7] refactored profiler args --- stackslib/src/net/api/blockreplay.rs | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/stackslib/src/net/api/blockreplay.rs b/stackslib/src/net/api/blockreplay.rs index aa7311a0914..dac83bf6a73 100644 --- a/stackslib/src/net/api/blockreplay.rs +++ b/stackslib/src/net/api/blockreplay.rs @@ -287,12 +287,7 @@ impl RPCNakamotoBlockReplayRequestHandler { let err = match tx_result { TransactionResult::Success(tx_result) => { - txs_receipts.push(( - tx_result.receipt, - profiler_result.cpu_instructions, - profiler_result.cpu_cycles, - profiler_result.cpu_ref_cycles, - )); + txs_receipts.push((tx_result.receipt, profiler_result)); Ok(()) } _ => Err(format!("Problematic tx {i}")), @@ -317,13 +312,8 @@ impl RPCNakamotoBlockReplayRequestHandler { let mut rpc_replayed_block = RPCReplayedBlock::from_block(block, block_fees, tenure_id, parent_block_id); - for (receipt, cpu_instructions, cpu_cycles, cpu_ref_cycles) in &txs_receipts { - let transaction = RPCReplayedBlockTransaction::from_receipt( - receipt, - cpu_instructions, - cpu_cycles, - cpu_ref_cycles, - ); + for (receipt, profiler_result) in &txs_receipts { + let transaction = RPCReplayedBlockTransaction::from_receipt(receipt, &profiler_result); rpc_replayed_block.transactions.push(transaction); } @@ -366,11 +356,9 @@ pub struct RPCReplayedBlockTransaction { } impl RPCReplayedBlockTransaction { - pub fn from_receipt( + fn from_receipt( receipt: &StacksTransactionReceipt, - cpu_instructions: &Option, - cpu_cycles: &Option, - cpu_ref_cycles: &Option, + profiler_result: &BlockReplayProfilerResult, ) -> Self { let events = receipt .events @@ -406,9 +394,9 @@ impl RPCReplayedBlockTransaction { events, post_condition_aborted: receipt.post_condition_aborted, vm_error: receipt.vm_error.clone(), - cpu_instructions: cpu_instructions.clone(), - cpu_cycles: cpu_cycles.clone(), - cpu_ref_cycles: cpu_ref_cycles.clone(), + cpu_instructions: profiler_result.cpu_instructions, + cpu_cycles: profiler_result.cpu_cycles, + cpu_ref_cycles: profiler_result.cpu_ref_cycles, } } }