Skip to content

Commit cbc1950

Browse files
committed
Split up writing and reading function locations, and support delaying the conversion.
1 parent 7a51d82 commit cbc1950

File tree

3 files changed

+69
-33
lines changed

3 files changed

+69
-33
lines changed

filpreload/src/lib.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ fn dump_to_flamegraph(
211211
// the GIL, allowing another thread to run, and it will try to allocation
212212
// and hit the TRACKER_STATE mutex. And now we're deadlocked. So we make
213213
// sure flamegraph rendering does not require TRACKER_STATE to be locked.
214-
let (allocated_bytes, flamegraph_callstacks) = {
214+
let (allocated_bytes, flamegraph_callstacks_factory) = {
215215
let mut tracker_state = TRACKER_STATE.lock();
216216
let allocations = &mut tracker_state.allocations;
217217

@@ -222,10 +222,12 @@ fn dump_to_flamegraph(
222222
} else {
223223
allocations.get_current_allocated_bytes()
224224
};
225-
let flamegraph_callstacks = allocations.combine_callstacks(peak, IdentityCleaner);
226-
(allocated_bytes, flamegraph_callstacks)
225+
let flamegraph_callstacks_factory = allocations.combine_callstacks(peak, IdentityCleaner);
226+
(allocated_bytes, flamegraph_callstacks_factory)
227227
};
228228

229+
let flamegraph_callstacks = flamegraph_callstacks_factory();
230+
229231
eprintln!("=fil-profile= Preparing to write to {}", path);
230232
let directory_path = Path::new(path);
231233

memapi/src/flamegraph.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use itertools::Itertools;
55

66
use crate::{
77
linecache::LineCacher,
8-
memorytracking::{Callstack, FunctionLocations},
8+
memorytracking::{Callstack, ReadFunctionLocations},
99
};
1010

1111
/// Filter down to top 99% of samples.
@@ -70,7 +70,7 @@ pub trait CallstackCleaner {
7070
}
7171

7272
/// The data needed to create a flamegraph.
73-
pub struct FlamegraphCallstacks<D, FL: FunctionLocations, UC> {
73+
pub struct FlamegraphCallstacks<D, FL: ReadFunctionLocations, UC> {
7474
data: D,
7575
functions: FL,
7676
callstack_cleaner: UC,
@@ -81,7 +81,7 @@ where
8181
&'a D: IntoIterator<Item = (&'a Callstack, &'a usize)>,
8282
<&'a D as IntoIterator>::IntoIter: ExactSizeIterator,
8383
D: 'a,
84-
FL: FunctionLocations,
84+
FL: ReadFunctionLocations,
8585
UC: CallstackCleaner,
8686
{
8787
pub fn new(data: D, functions: FL, callstack_cleaner: UC) -> Self {

memapi/src/memorytracking.rs

Lines changed: 61 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,23 @@ struct FunctionLocation {
4141
function_name: String,
4242
}
4343

44+
/// Basic usage: first clone, once any locks are released, convert to
45+
/// ReadFunctionLocations.
46+
///
4447
/// The clone should be cheap, ideally, so probably an immutable data structures
4548
/// would be good.
46-
pub trait FunctionLocations {
47-
fn get_function_and_filename(&self, id: FunctionId) -> (&str, &str);
49+
pub trait WriteFunctionLocations {
50+
type Reader: ReadFunctionLocations;
4851

49-
// Like Clone.clone(), but should be cheap.
52+
/// Like Clone.clone(), but should be cheap.
5053
fn cheap_clone(&self) -> Self;
54+
55+
/// Convert to ReadFunctionLocations.
56+
fn to_reader(self) -> Self::Reader;
57+
}
58+
59+
pub trait ReadFunctionLocations {
60+
fn get_function_and_filename(&self, id: FunctionId) -> (&str, &str);
5161
}
5262

5363
/// Stores FunctionLocations, returns a FunctionId
@@ -76,7 +86,7 @@ impl VecFunctionLocations {
7686
}
7787
}
7888

79-
impl FunctionLocations for VecFunctionLocations {
89+
impl ReadFunctionLocations for VecFunctionLocations {
8090
/// Get the function name and filename.
8191
fn get_function_and_filename(&self, id: FunctionId) -> (&str, &str) {
8292
if id == FunctionId::UNKNOWN {
@@ -85,12 +95,20 @@ impl FunctionLocations for VecFunctionLocations {
8595
let location = &self.functions[id.0 as usize];
8696
(&location.function_name, &location.filename)
8797
}
98+
}
99+
100+
impl WriteFunctionLocations for VecFunctionLocations {
101+
type Reader = Self;
88102

89103
fn cheap_clone(&self) -> Self {
90104
Self {
91105
functions: self.functions.clone(),
92106
}
93107
}
108+
109+
fn to_reader(self) -> Self::Reader {
110+
self
111+
}
94112
}
95113

96114
/// Either the line number, or the bytecode index needed to get it.
@@ -197,7 +215,7 @@ impl Callstack {
197215
callstack_id
198216
}
199217

200-
pub fn as_string<FL: FunctionLocations>(
218+
pub fn as_string<FL: ReadFunctionLocations>(
201219
&self,
202220
to_be_post_processed: bool,
203221
functions: &FL,
@@ -382,7 +400,7 @@ impl CallstackCleaner for IdentityCleaner {
382400
}
383401

384402
/// The main data structure tracking everything.
385-
pub struct AllocationTracker<FL: FunctionLocations> {
403+
pub struct AllocationTracker<FL: WriteFunctionLocations> {
386404
// malloc()/calloc():
387405
current_allocations: BTreeMap<ProcessUid, HashMap<usize, Allocation, ARandomState>>,
388406
// anonymous mmap(), i.e. not file backed:
@@ -411,7 +429,7 @@ pub struct AllocationTracker<FL: FunctionLocations> {
411429
failed_deallocations: usize,
412430
}
413431

414-
impl<FL: FunctionLocations> AllocationTracker<FL> {
432+
impl<FL: WriteFunctionLocations> AllocationTracker<FL> {
415433
pub fn new(default_path: String, functions: FL) -> AllocationTracker<FL> {
416434
AllocationTracker {
417435
current_allocations: BTreeMap::from([(PARENT_PROCESS, new_hashmap())]),
@@ -429,13 +447,21 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
429447
}
430448

431449
/// Print a traceback for the given CallstackId.
432-
pub fn print_traceback(&self, message: &'static str, callstack_id: CallstackId) {
450+
///
451+
/// Should only be used with VecFunctionLocations, may cause deadlocks with
452+
/// others...
453+
fn print_traceback(&self, message: &'static str, callstack_id: CallstackId) {
433454
let id_to_callstack = self.interner.get_reverse_map();
434455
let callstack = id_to_callstack[&callstack_id];
435456
eprintln!("=fil-profile= {}", message);
436457
eprintln!(
437458
"=| {}",
438-
callstack.as_string(false, &self.functions, "\n=| ", &mut LineCacher::default())
459+
callstack.as_string(
460+
false,
461+
&self.functions.cheap_clone().to_reader(),
462+
"\n=| ",
463+
&mut LineCacher::default()
464+
)
439465
);
440466
}
441467

@@ -618,12 +644,18 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
618644

619645
/// Combine Callstacks and make them human-readable. Duplicate callstacks
620646
/// have their allocated memory summed.
647+
///
648+
/// We don't return the FlamegraphCallstacks, but rather a factory, because
649+
/// we don't want to hold any locks while doing any potentially expensive
650+
/// WriteFunctionLocations->ReadFunctionLocations conversion; by returning a
651+
/// factory, that can be appropriately delayed by the caller.
621652
pub fn combine_callstacks<CC: CallstackCleaner>(
622653
&mut self,
623654
// If false, will do the current allocations:
624655
peak: bool,
625656
callstack_cleaner: CC,
626-
) -> FlamegraphCallstacks<HashMap<Callstack, usize, ARandomState>, FL, CC> {
657+
) -> impl FnOnce() -> FlamegraphCallstacks<HashMap<Callstack, usize, ARandomState>, FL::Reader, CC>
658+
{
627659
// Would be nice to validate if data is consistent. However, there are
628660
// edge cases that make it slightly inconsistent (e.g. see the
629661
// unexpected code path in add_allocation() above), and blowing up
@@ -644,18 +676,19 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
644676
};
645677
let sum = callstacks.iter().sum();
646678
let id_to_callstack = self.interner.get_reverse_map();
647-
FlamegraphCallstacks::new(
648-
filter_to_useful_callstacks(callstacks.iter().enumerate(), sum)
649-
.into_iter()
650-
.filter_map(|(k, v)| {
651-
id_to_callstack
652-
.get(&(k as CallstackId))
653-
.map(|cs| ((**cs).clone(), v))
654-
})
655-
.collect(),
656-
self.functions.cheap_clone(),
657-
callstack_cleaner,
658-
)
679+
let data = filter_to_useful_callstacks(callstacks.iter().enumerate(), sum)
680+
.into_iter()
681+
.filter_map(|(k, v)| {
682+
id_to_callstack
683+
.get(&(k as CallstackId))
684+
.map(|cs| ((**cs).clone(), v))
685+
})
686+
.collect();
687+
let functions_writer = self.functions.cheap_clone();
688+
689+
// Return a closure, so we can delay doing the ReadFunctionLocations
690+
// conversion if necessary:
691+
|| FlamegraphCallstacks::new(data, functions_writer.to_reader(), callstack_cleaner)
659692
}
660693

661694
/// Clear memory we won't be needing anymore, since we're going to exit out.
@@ -723,12 +756,14 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
723756

724757
#[cfg(test)]
725758
mod tests {
726-
use crate::memorytracking::{IdentityCleaner, ProcessUid, PARENT_PROCESS};
759+
use crate::memorytracking::{
760+
IdentityCleaner, ProcessUid, ReadFunctionLocations, WriteFunctionLocations, PARENT_PROCESS,
761+
};
727762

728763
use super::LineNumberInfo::LineNumber;
729764
use super::{
730765
Allocation, AllocationTracker, CallSiteId, Callstack, CallstackInterner, FunctionId,
731-
FunctionLocations, VecFunctionLocations, HIGH_32BIT, MIB,
766+
VecFunctionLocations, HIGH_32BIT, MIB,
732767
};
733768
use proptest::prelude::*;
734769
use std::borrow::Cow;
@@ -1183,8 +1218,7 @@ mod tests {
11831218
"c:3 (cf) 234",
11841219
"a:7 (af);b:2 (bf) 6000",
11851220
];
1186-
let mut result2: Vec<String> = tracker
1187-
.combine_callstacks(true, IdentityCleaner)
1221+
let mut result2: Vec<String> = tracker.combine_callstacks(true, IdentityCleaner)()
11881222
.to_lines(false)
11891223
.collect();
11901224
result2.sort();
@@ -1194,7 +1228,7 @@ mod tests {
11941228

11951229
#[test]
11961230
fn test_unknown_function_id() {
1197-
let func_locations = VecFunctionLocations::new();
1231+
let func_locations = VecFunctionLocations::new().to_reader();
11981232
let (function, filename) = func_locations.get_function_and_filename(FunctionId::UNKNOWN);
11991233
assert_eq!(filename, "UNKNOWN DUE TO BUG");
12001234
assert_eq!(function, "UNKNOWN");

0 commit comments

Comments
 (0)