diff --git a/.changelog/264.feature b/.changelog/264.feature new file mode 100644 index 00000000..648e6098 --- /dev/null +++ b/.changelog/264.feature @@ -0,0 +1 @@ +Paths to modules no longer include repetitive boilerplate prefixes like "/usr/lib/python3.9/". \ No newline at end of file diff --git a/Makefile b/Makefile index cf288346..6176ac3a 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ MAKEFLAGS += --no-builtin-rules .PHONY: build build: - pip install -e . + pip install . python setup.py install_data target/release/libfilpreload.so: Cargo.lock memapi/Cargo.toml memapi/src/*.rs filpreload/src/*.rs filpreload/src/*.c diff --git a/filpreload/src/lib.rs b/filpreload/src/lib.rs index 26a9fc2b..0626eb6c 100644 --- a/filpreload/src/lib.rs +++ b/filpreload/src/lib.rs @@ -313,10 +313,6 @@ extern "C" { // Return whether C code has initialized. fn is_initialized() -> c_int; - - // Increment/decrement reentrancy counter. - fn fil_increment_reentrancy(); - fn fil_decrement_reentrancy(); } struct FilMmapAPI; diff --git a/filprofiler/_script.py b/filprofiler/_script.py index ef253eec..b0bfea2a 100644 --- a/filprofiler/_script.py +++ b/filprofiler/_script.py @@ -241,8 +241,17 @@ def stage_2(): PARSER.print_help() sys.exit(2) script = rest[0] + + # Current directory might be added to sys.path as side-effect of how + # this code runs. We do NOT want that, it doesn't match normal Python + # behavior. + try: + sys.path.remove(os.getcwd()) + except ValueError: + pass # Make directory where script is importable: sys.path.insert(0, dirname(abspath(script))) + function = runpy.run_path func_args = (script,) func_kwargs = {"run_name": "__main__"} diff --git a/memapi/src/memorytracking.rs b/memapi/src/memorytracking.rs index 879c898e..ee90c3f5 100644 --- a/memapi/src/memorytracking.rs +++ b/memapi/src/memorytracking.rs @@ -2,6 +2,7 @@ use crate::flamegraph::filter_to_useful_callstacks; use crate::flamegraph::write_flamegraphs; use crate::linecache::LineCacher; use crate::python::get_runpy_path; +use crate::python::PrefixStripper; use super::rangemap::RangeMap; use super::util::new_hashmap; @@ -167,6 +168,7 @@ impl Callstack { pub fn as_string( &self, to_be_post_processed: bool, + prefix_stripper: Option<&PrefixStripper>, functions: &dyn FunctionLocations, separator: &'static str, linecache: &mut LineCacher, @@ -192,7 +194,13 @@ impl Callstack { .map(|(id, (function, filename))| { if to_be_post_processed { // Get Python code. + let code = linecache.get_source_line(filename, id.line_number as usize); + // TODO this is a bug, we should not be calling into Python! + let filename = prefix_stripper + .map(|ps| ps.strip_prefix(filename)) + .unwrap_or(filename); + // Leading whitespace is dropped by SVG, so we'd like to // replace it with non-breaking space. However, inferno // trims whitespace @@ -387,7 +395,13 @@ impl AllocationTracker { eprintln!("=fil-profile= {}", message); eprintln!( "=| {}", - callstack.as_string(false, &self.functions, "\n=| ", &mut LineCacher::default()) + callstack.as_string( + false, + None, + &self.functions, + "\n=| ", + &mut LineCacher::default() + ) ); } @@ -403,8 +417,7 @@ impl AllocationTracker { if let Some(allocation) = self .current_allocations .get(&process) - .map(|a| a.get(&address)) - .flatten() + .and_then(|a| a.get(&address)) { allocation.size() } else { @@ -612,12 +625,14 @@ impl AllocationTracker { ) -> impl ExactSizeIterator + '_ { let by_call = self.combine_callstacks(peak).into_iter(); let id_to_callstack = self.interner.get_reverse_map(); + let prefix_stripper = PrefixStripper::new(); let mut linecache = LineCacher::default(); by_call.map(move |(callstack_id, size)| { format!( "{} {}", id_to_callstack.get(&callstack_id).unwrap().as_string( to_be_post_processed, + Some(&prefix_stripper), &self.functions, ";", &mut linecache, diff --git a/memapi/src/python.rs b/memapi/src/python.rs index 448e3a40..2353ede4 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -13,3 +13,79 @@ pub fn get_runpy_path() -> &'static str { }); PATH.as_str() } + +/// Strip sys.path prefixes from Python modules' pathes. +pub struct PrefixStripper { + prefixes: Vec, +} + +impl PrefixStripper { + pub fn new() -> Self { + let prefixes = Python::with_gil(|py| { + let paths = py.eval( + // 1. Drop non-string values, they're not something we can understand. + // 2. Drop empty string, it's misleading. + // 3. Add '/' to end of all paths. + // 4. Sorted, so most specific (i.e. longest) ones are first. + "list(sorted([__import__('os').path.normpath(path) + '/' for path in __import__('sys').path if (isinstance(path, str) and path)], key=lambda i: -len(i)))", + None, + None, + ); + paths + .map(|p| p.extract::>().unwrap_or_else(|_| vec![])) + .unwrap_or_else(|_| vec![]) + }); + PrefixStripper { prefixes } + } + + /// Remove the sys.path prefix from a path to an imported module. + /// + /// E.g. if the input is "/usr/lib/python3.9/threading.py", the result will + /// probably be "threading.py". + pub fn strip_prefix<'a>(&self, path: &'a str) -> &'a str { + for prefix in &self.prefixes { + if path.starts_with(prefix) { + return &path[prefix.len()..path.len()]; + } + } + // No prefix found. + path + } +} + +#[cfg(test)] +mod tests { + use pyo3::Python; + + use crate::python::PrefixStripper; + + /// Get the filesystem path of a Python module. + fn get_module_path(module: &str) -> String { + Python::with_gil(|py| { + py.eval( + &("__import__('".to_owned() + module + "').__file__"), + None, + None, + ) + .unwrap() + .extract() + .unwrap() + }) + } + + #[test] + fn prefix_stripping() { + pyo3::prepare_freethreaded_python(); + let ps = PrefixStripper::new(); + // stdlib + assert_eq!( + ps.strip_prefix(&get_module_path("threading")), + "threading.py" + ); + // site-packages + assert_eq!(ps.strip_prefix(&get_module_path("pip")), "pip/__init__.py"); + // random paths + assert_eq!(ps.strip_prefix("/x/blah.py"), "/x/blah.py"); + assert_eq!(ps.strip_prefix("foo.py"), "foo.py"); + } +}