|
| 1 | +# Item 59: Use tracemalloc to understand memory usage and leaks |
| 2 | + |
| 3 | + |
| 4 | +# Memory management in the default implementation of Python, CPython, uses |
| 5 | +# reference counting. This ensures that as soon as all references to an |
| 6 | +# object have expired, the referenced object is also cleared. CPython also |
| 7 | +# has a built-in cycle detector to ensure that self-referencing objects are |
| 8 | +# eventually garbage collected. |
| 9 | + |
| 10 | +# In theory, this means that most Python programmers don't have to worry about |
| 11 | +# allocating or deallocating memory in their programs. It's taken care of |
| 12 | +# automatically by the language and the CPython runtime. However, in practice, |
| 13 | +# programs eventually do run out of memory due to held reference. Figuring out |
| 14 | +# where your Python programs are using or leaking memory proves to be a |
| 15 | +# challenge. |
| 16 | + |
| 17 | +# The first way to debug memory usage is to ask the gc built-in module to list |
| 18 | +# every object currently known by the garbage collector. Although it's quite |
| 19 | +# a blunt tool, this approach does let you quickly get a sense of where your |
| 20 | +# program's memory is being used. |
| 21 | + |
| 22 | +# Here, I run a program that wastes memory by keeping references. It prints |
| 23 | +# out how many objects were created during execution and a small sample of |
| 24 | +# allocated objects. |
| 25 | + |
| 26 | +# item_59_use_tracemalloc_using_pc.py |
| 27 | +import item_59_use_tracemalloc_using_gc |
| 28 | +# 4944 objects before |
| 29 | +# 4955 objects after |
| 30 | +# {'_loaders': [('.cpython-35m-x86_64-linux-gnu.so', <class '_frozen_importlib_external.ExtensionFileL |
| 31 | +# set() |
| 32 | +# {'imageio', 'mujoco_py-0.5.7-py3.5.egg-info', 'pip', 'keras_tqdm', 'pyglet-1.2.4.dist-info', 'easy-i |
| 33 | + |
| 34 | +# The problem with gc.get_objects is that it doesn't tell you anything about |
| 35 | +# how the objects were allocated. In complicated programs, a specific class |
| 36 | +# of object could be allocated many different ways. The overall number of |
| 37 | +# objects isn't nearly as important as identifying the code responsible for |
| 38 | +# allocating the objects that are leaking memory. |
| 39 | + |
| 40 | +# Python 3.4 introduces a new tracemalloc built-in module for solving this |
| 41 | +# problem. tracemalloc makes it possible to connect an object back to where |
| 42 | +# it was allocated. Here, I print out the top three memory usage offenders in |
| 43 | +# a progam using tracemalloc: |
| 44 | + |
| 45 | +# item_59_use_tracemalloc_top_n.py |
| 46 | +import item_59_use_tracemalloc_top_n |
| 47 | +# /home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py:7: size=3539 KiB (+3539 KiB), count=100000 (+100000), average=36 B |
| 48 | +# /home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_top_n.py:6: size=1264 B (+1264 B), count=2 (+2), average=632 B |
| 49 | +# <frozen importlib._bootstrap_external>:476: size=485 B (+485 B), count=6 (+6), average=81 B |
| 50 | + |
| 51 | +# It's immediately clear which objects are dominating my program's memory |
| 52 | +# usage and where in the source code they were allocated. |
| 53 | + |
| 54 | +# The tracemalloc module can also print out the full stack trace of each |
| 55 | +# allocation (up to the number of frames passed to the start method). Here, I |
| 56 | +# print out the stack trace of the biggest source of memory usage in the |
| 57 | +# program: |
| 58 | + |
| 59 | +# item_59_use_tracemalloc_with_trace.py |
| 60 | +import item_59_use_tracemalloc_with_trace |
| 61 | +# File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_waste_memory.py", line 7 |
| 62 | +# a.append(10 * 230 * i) |
| 63 | +# File "/home/robot/Documents/PycharmProjects/BetterPython59Ways/item_59_use_tracemalloc_with_trace.py", line 6 |
| 64 | +# x = waste_memory.run() |
| 65 | + |
| 66 | +# A stack trace like this is most valuable for figuring out which particular |
| 67 | +# usage of a common function is responsible for memory consumption in a |
| 68 | +# program. |
| 69 | + |
| 70 | +# Unfortunately, Python 2 doesn't provide the tracemalloc built-in module. |
| 71 | +# There are open source packages for tracking memory usage in Python 2 (such |
| 72 | +# as heapy), though they do not fully replicate the functionality of |
| 73 | +# tracemalloc. |
| 74 | + |
| 75 | + |
| 76 | +# Things to remember |
| 77 | + |
| 78 | +# 1. It can be difficult to understand how Python programs use and leak |
| 79 | +# memory. |
| 80 | +# 2. The gc module can help you understand which objects exist, but it has no |
| 81 | +# information about how they were allocated. |
| 82 | +# 3. The tracemalloc built-in module provides powerful tools for understanding |
| 83 | +# the source of memory usage. |
| 84 | +# 4. tracemalloc is only available in Python 3.4 and above. |
0 commit comments