Skip to content

Commit c0cdf3f

Browse files
authored
Merge pull request #488 from pythonspeed/sciagraph-feb-10-2023
Sciagraph changes
2 parents 7a51d82 + d415ae0 commit c0cdf3f

File tree

3 files changed

+95
-47
lines changed

3 files changed

+95
-47
lines changed

filpreload/src/lib.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ fn dump_to_flamegraph(
211211
// the GIL, allowing another thread to run, and it will try to allocation
212212
// and hit the TRACKER_STATE mutex. And now we're deadlocked. So we make
213213
// sure flamegraph rendering does not require TRACKER_STATE to be locked.
214-
let (allocated_bytes, flamegraph_callstacks) = {
214+
let (allocated_bytes, flamegraph_callstacks_factory) = {
215215
let mut tracker_state = TRACKER_STATE.lock();
216216
let allocations = &mut tracker_state.allocations;
217217

@@ -222,10 +222,12 @@ fn dump_to_flamegraph(
222222
} else {
223223
allocations.get_current_allocated_bytes()
224224
};
225-
let flamegraph_callstacks = allocations.combine_callstacks(peak, IdentityCleaner);
226-
(allocated_bytes, flamegraph_callstacks)
225+
let flamegraph_callstacks_factory = allocations.combine_callstacks(peak, IdentityCleaner);
226+
(allocated_bytes, flamegraph_callstacks_factory)
227227
};
228228

229+
let flamegraph_callstacks = flamegraph_callstacks_factory();
230+
229231
eprintln!("=fil-profile= Preparing to write to {}", path);
230232
let directory_path = Path::new(path);
231233

memapi/src/flamegraph.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use itertools::Itertools;
55

66
use crate::{
77
linecache::LineCacher,
8-
memorytracking::{Callstack, FunctionLocations},
8+
memorytracking::{Callstack, ReadFunctionLocations},
99
};
1010

1111
/// Filter down to top 99% of samples.
@@ -70,7 +70,7 @@ pub trait CallstackCleaner {
7070
}
7171

7272
/// The data needed to create a flamegraph.
73-
pub struct FlamegraphCallstacks<D, FL: FunctionLocations, UC> {
73+
pub struct FlamegraphCallstacks<D, FL: ReadFunctionLocations, UC> {
7474
data: D,
7575
functions: FL,
7676
callstack_cleaner: UC,
@@ -81,7 +81,7 @@ where
8181
&'a D: IntoIterator<Item = (&'a Callstack, &'a usize)>,
8282
<&'a D as IntoIterator>::IntoIter: ExactSizeIterator,
8383
D: 'a,
84-
FL: FunctionLocations,
84+
FL: ReadFunctionLocations,
8585
UC: CallstackCleaner,
8686
{
8787
pub fn new(data: D, functions: FL, callstack_cleaner: UC) -> Self {

memapi/src/memorytracking.rs

Lines changed: 87 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,23 @@ struct FunctionLocation {
4141
function_name: String,
4242
}
4343

44+
/// Basic usage: first clone, once any locks are released, convert to
45+
/// ReadFunctionLocations.
46+
///
4447
/// The clone should be cheap, ideally, so probably an immutable data structures
4548
/// would be good.
46-
pub trait FunctionLocations {
47-
fn get_function_and_filename(&self, id: FunctionId) -> (&str, &str);
49+
pub trait WriteFunctionLocations {
50+
type Reader: ReadFunctionLocations;
4851

49-
// Like Clone.clone(), but should be cheap.
52+
/// Like Clone.clone(), but should be cheap.
5053
fn cheap_clone(&self) -> Self;
54+
55+
/// Convert to ReadFunctionLocations.
56+
fn to_reader(self) -> Self::Reader;
57+
}
58+
59+
pub trait ReadFunctionLocations {
60+
fn get_function_and_filename_and_display_filename(&self, id: FunctionId) -> (&str, &str, &str);
5161
}
5262

5363
/// Stores FunctionLocations, returns a FunctionId
@@ -76,21 +86,34 @@ impl VecFunctionLocations {
7686
}
7787
}
7888

79-
impl FunctionLocations for VecFunctionLocations {
89+
impl ReadFunctionLocations for VecFunctionLocations {
8090
/// Get the function name and filename.
81-
fn get_function_and_filename(&self, id: FunctionId) -> (&str, &str) {
91+
fn get_function_and_filename_and_display_filename(&self, id: FunctionId) -> (&str, &str, &str) {
8292
if id == FunctionId::UNKNOWN {
83-
return ("UNKNOWN", "UNKNOWN DUE TO BUG");
93+
return ("UNKNOWN", "UNKNOWN", "UNKNOWN DUE TO BUG");
8494
}
8595
let location = &self.functions[id.0 as usize];
86-
(&location.function_name, &location.filename)
96+
(
97+
&location.function_name,
98+
&location.filename,
99+
// TODO on Jupyter you might want to make display filename different...
100+
&location.filename,
101+
)
87102
}
103+
}
104+
105+
impl WriteFunctionLocations for VecFunctionLocations {
106+
type Reader = Self;
88107

89108
fn cheap_clone(&self) -> Self {
90109
Self {
91110
functions: self.functions.clone(),
92111
}
93112
}
113+
114+
fn to_reader(self) -> Self::Reader {
115+
self
116+
}
94117
}
95118

96119
/// Either the line number, or the bytecode index needed to get it.
@@ -197,7 +220,7 @@ impl Callstack {
197220
callstack_id
198221
}
199222

200-
pub fn as_string<FL: FunctionLocations>(
223+
pub fn as_string<FL: ReadFunctionLocations>(
201224
&self,
202225
to_be_post_processed: bool,
203226
functions: &FL,
@@ -207,10 +230,15 @@ impl Callstack {
207230
if self.calls.is_empty() {
208231
return "[No Python stack]".to_string();
209232
}
210-
let calls: Vec<(CallSiteId, (&str, &str))> = self
233+
let calls: Vec<(CallSiteId, (&str, &str, &str))> = self
211234
.calls
212235
.iter()
213-
.map(|id| (*id, functions.get_function_and_filename(id.function)))
236+
.map(|id| {
237+
(
238+
*id,
239+
functions.get_function_and_filename_and_display_filename(id.function),
240+
)
241+
})
214242
.collect();
215243
let skip_prefix = if cfg!(feature = "fil4prod") {
216244
0
@@ -222,7 +250,7 @@ impl Callstack {
222250
calls
223251
.into_iter()
224252
.skip(skip_prefix)
225-
.map(|(id, (function, filename))| {
253+
.map(|(id, (function, filename, display_filename))| {
226254
if to_be_post_processed {
227255
// Get Python code.
228256
let code = linecache
@@ -250,16 +278,16 @@ impl Callstack {
250278
// and that whitespace doesn't get trimmed from start;
251279
// we'll get rid of this in post-processing.
252280
format!(
253-
"{filename}:{line} ({function});\u{2800}{code}",
254-
filename = filename,
281+
"{display_filename}:{line} ({function});\u{2800}{code}",
282+
display_filename = display_filename,
255283
line = id.line_number.get_line_number(),
256284
function = function,
257285
code = &code.trim_end(),
258286
)
259287
} else {
260288
format!(
261-
"{filename}:{line} ({function})",
262-
filename = filename,
289+
"{display_filename}:{line} ({function})",
290+
display_filename = display_filename,
263291
line = id.line_number.get_line_number(),
264292
function = function,
265293
)
@@ -269,10 +297,10 @@ impl Callstack {
269297
}
270298
}
271299

272-
fn runpy_prefix_length(calls: std::slice::Iter<(CallSiteId, (&str, &str))>) -> usize {
300+
fn runpy_prefix_length(calls: std::slice::Iter<(CallSiteId, (&str, &str, &str))>) -> usize {
273301
let mut length = 0;
274302
let runpy_path = get_runpy_path();
275-
for (_, (_, filename)) in calls {
303+
for (_, (_, filename, _)) in calls {
276304
// On Python 3.11 it uses <frozen runpy> for some reason.
277305
if *filename == runpy_path || *filename == "<frozen runpy>" {
278306
length += 1;
@@ -382,7 +410,7 @@ impl CallstackCleaner for IdentityCleaner {
382410
}
383411

384412
/// The main data structure tracking everything.
385-
pub struct AllocationTracker<FL: FunctionLocations> {
413+
pub struct AllocationTracker<FL: WriteFunctionLocations> {
386414
// malloc()/calloc():
387415
current_allocations: BTreeMap<ProcessUid, HashMap<usize, Allocation, ARandomState>>,
388416
// anonymous mmap(), i.e. not file backed:
@@ -411,7 +439,7 @@ pub struct AllocationTracker<FL: FunctionLocations> {
411439
failed_deallocations: usize,
412440
}
413441

414-
impl<FL: FunctionLocations> AllocationTracker<FL> {
442+
impl<FL: WriteFunctionLocations> AllocationTracker<FL> {
415443
pub fn new(default_path: String, functions: FL) -> AllocationTracker<FL> {
416444
AllocationTracker {
417445
current_allocations: BTreeMap::from([(PARENT_PROCESS, new_hashmap())]),
@@ -429,13 +457,21 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
429457
}
430458

431459
/// Print a traceback for the given CallstackId.
432-
pub fn print_traceback(&self, message: &'static str, callstack_id: CallstackId) {
460+
///
461+
/// Should only be used with VecFunctionLocations, may cause deadlocks with
462+
/// others...
463+
fn print_traceback(&self, message: &'static str, callstack_id: CallstackId) {
433464
let id_to_callstack = self.interner.get_reverse_map();
434465
let callstack = id_to_callstack[&callstack_id];
435466
eprintln!("=fil-profile= {}", message);
436467
eprintln!(
437468
"=| {}",
438-
callstack.as_string(false, &self.functions, "\n=| ", &mut LineCacher::default())
469+
callstack.as_string(
470+
false,
471+
&self.functions.cheap_clone().to_reader(),
472+
"\n=| ",
473+
&mut LineCacher::default()
474+
)
439475
);
440476
}
441477

@@ -618,12 +654,18 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
618654

619655
/// Combine Callstacks and make them human-readable. Duplicate callstacks
620656
/// have their allocated memory summed.
657+
///
658+
/// We don't return the FlamegraphCallstacks, but rather a factory, because
659+
/// we don't want to hold any locks while doing any potentially expensive
660+
/// WriteFunctionLocations->ReadFunctionLocations conversion; by returning a
661+
/// factory, that can be appropriately delayed by the caller.
621662
pub fn combine_callstacks<CC: CallstackCleaner>(
622663
&mut self,
623664
// If false, will do the current allocations:
624665
peak: bool,
625666
callstack_cleaner: CC,
626-
) -> FlamegraphCallstacks<HashMap<Callstack, usize, ARandomState>, FL, CC> {
667+
) -> impl FnOnce() -> FlamegraphCallstacks<HashMap<Callstack, usize, ARandomState>, FL::Reader, CC>
668+
{
627669
// Would be nice to validate if data is consistent. However, there are
628670
// edge cases that make it slightly inconsistent (e.g. see the
629671
// unexpected code path in add_allocation() above), and blowing up
@@ -644,18 +686,19 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
644686
};
645687
let sum = callstacks.iter().sum();
646688
let id_to_callstack = self.interner.get_reverse_map();
647-
FlamegraphCallstacks::new(
648-
filter_to_useful_callstacks(callstacks.iter().enumerate(), sum)
649-
.into_iter()
650-
.filter_map(|(k, v)| {
651-
id_to_callstack
652-
.get(&(k as CallstackId))
653-
.map(|cs| ((**cs).clone(), v))
654-
})
655-
.collect(),
656-
self.functions.cheap_clone(),
657-
callstack_cleaner,
658-
)
689+
let data = filter_to_useful_callstacks(callstacks.iter().enumerate(), sum)
690+
.into_iter()
691+
.filter_map(|(k, v)| {
692+
id_to_callstack
693+
.get(&(k as CallstackId))
694+
.map(|cs| ((**cs).clone(), v))
695+
})
696+
.collect();
697+
let functions_writer = self.functions.cheap_clone();
698+
699+
// Return a closure, so we can delay doing the ReadFunctionLocations
700+
// conversion if necessary:
701+
|| FlamegraphCallstacks::new(data, functions_writer.to_reader(), callstack_cleaner)
659702
}
660703

661704
/// Clear memory we won't be needing anymore, since we're going to exit out.
@@ -723,12 +766,14 @@ impl<FL: FunctionLocations> AllocationTracker<FL> {
723766

724767
#[cfg(test)]
725768
mod tests {
726-
use crate::memorytracking::{IdentityCleaner, ProcessUid, PARENT_PROCESS};
769+
use crate::memorytracking::{
770+
IdentityCleaner, ProcessUid, ReadFunctionLocations, WriteFunctionLocations, PARENT_PROCESS,
771+
};
727772

728773
use super::LineNumberInfo::LineNumber;
729774
use super::{
730775
Allocation, AllocationTracker, CallSiteId, Callstack, CallstackInterner, FunctionId,
731-
FunctionLocations, VecFunctionLocations, HIGH_32BIT, MIB,
776+
VecFunctionLocations, HIGH_32BIT, MIB,
732777
};
733778
use proptest::prelude::*;
734779
use std::borrow::Cow;
@@ -1183,8 +1228,7 @@ mod tests {
11831228
"c:3 (cf) 234",
11841229
"a:7 (af);b:2 (bf) 6000",
11851230
];
1186-
let mut result2: Vec<String> = tracker
1187-
.combine_callstacks(true, IdentityCleaner)
1231+
let mut result2: Vec<String> = tracker.combine_callstacks(true, IdentityCleaner)()
11881232
.to_lines(false)
11891233
.collect();
11901234
result2.sort();
@@ -1194,9 +1238,11 @@ mod tests {
11941238

11951239
#[test]
11961240
fn test_unknown_function_id() {
1197-
let func_locations = VecFunctionLocations::new();
1198-
let (function, filename) = func_locations.get_function_and_filename(FunctionId::UNKNOWN);
1199-
assert_eq!(filename, "UNKNOWN DUE TO BUG");
1241+
let func_locations = VecFunctionLocations::new().to_reader();
1242+
let (function, filename, display_filename) =
1243+
func_locations.get_function_and_filename_and_display_filename(FunctionId::UNKNOWN);
1244+
assert_eq!(display_filename, "UNKNOWN DUE TO BUG");
1245+
assert_eq!(filename, "UNKNOWN");
12001246
assert_eq!(function, "UNKNOWN");
12011247
}
12021248

0 commit comments

Comments
 (0)