@@ -12,86 +12,92 @@ use std::ffi::c_int;
1212use std:: ffi:: c_void;
1313
1414use bytes:: Buf ;
15- use bytes:: BytesMut ;
1615use hyperactor:: Named ;
1716use pyo3:: buffer:: PyBuffer ;
1817use pyo3:: prelude:: * ;
1918use pyo3:: types:: PyBytes ;
2019use pyo3:: types:: PyBytesMethods ;
2120use serde:: Deserialize ;
2221use serde:: Serialize ;
22+ use serde_multipart:: FragmentedPart ;
23+ use serde_multipart:: Part ;
24+
25+ /// Wrapper that keeps Py<PyBytes> alive while allowing zero-copy access to its memory
26+ struct PyBytesWrapper {
27+ _py_bytes : Py < PyBytes > ,
28+ ptr : * const u8 ,
29+ len : usize ,
30+ }
31+
32+ impl PyBytesWrapper {
33+ fn new ( py_bytes : Py < PyBytes > ) -> Self {
34+ let ( ptr, len) = Python :: with_gil ( |py| {
35+ let bytes_ref = py_bytes. as_bytes ( py) ;
36+ ( bytes_ref. as_ptr ( ) , bytes_ref. len ( ) )
37+ } ) ;
38+ Self {
39+ _py_bytes : py_bytes,
40+ ptr,
41+ len,
42+ }
43+ }
44+ }
45+
46+ impl AsRef < [ u8 ] > for PyBytesWrapper {
47+ fn as_ref ( & self ) -> & [ u8 ] {
48+ // SAFETY: ptr is valid as long as py_bytes is alive (kept alive by Py<PyBytes>)
49+ // Python won't free the memory until the Py<PyBytes> refcount reaches 0
50+ unsafe { std:: slice:: from_raw_parts ( self . ptr , self . len ) }
51+ }
52+ }
53+
54+ // SAFETY: Py<PyBytes> is Send/Sync for immutable bytes
55+ unsafe impl Send for PyBytesWrapper { }
56+ // SAFETY: Py<PyBytes> is Send/Sync for immutable bytes
57+ unsafe impl Sync for PyBytesWrapper { }
2358
2459/// A mutable buffer for reading and writing bytes data.
2560///
26- /// The `Buffer` struct provides an interface for accumulating byte data that can be written to
27- /// and then frozen into an immutable `FrozenBuffer ` for reading. It uses the `bytes::BytesMut`
28- /// internally for efficient memory management .
61+ /// The `Buffer` struct provides an interface for accumulating byte data from Python `bytes` objects
62+ /// that can be converted into a `FragmentedPart ` for zero-copy multipart message serialization.
63+ /// It accumulates references to Python bytes objects without copying .
2964///
3065/// # Examples
3166///
3267/// ```python
3368/// from monarch._rust_bindings.monarch_hyperactor.buffers import Buffer
3469///
35- /// # Create a new buffer with default capacity (4096 bytes)
70+ /// # Create a new buffer
3671/// buffer = Buffer()
3772///
3873/// # Write some data
3974/// data = b"Hello, World!"
4075/// bytes_written = buffer.write(data)
4176///
42- /// # Check length
43- /// print(len(buffer)) # 13
44- ///
45- /// # Freeze for reading
46- /// frozen = buffer.freeze()
47- /// content = frozen.read()
77+ /// # Use in multipart serialization
78+ /// # The buffer accumulates multiple writes as separate fragments
4879/// ```
4980#[ pyclass( subclass, module = "monarch._rust_bindings.monarch_hyperactor.buffers" ) ]
50- #[ derive( Clone , Serialize , Deserialize , Named , PartialEq , Default ) ]
81+ #[ derive( Clone , Default ) ]
5182pub struct Buffer {
52- pub ( crate ) inner : bytes:: BytesMut ,
53- }
54-
55- impl Buffer {
56- /// Consumes the Buffer and returns the underlying BytesMut.
57- /// This allows zero-copy access to the raw buffer data.
58- pub fn into_inner ( self ) -> bytes:: BytesMut {
59- self . inner
60- }
61- }
62-
63- impl < T > From < T > for Buffer
64- where
65- T : Into < BytesMut > ,
66- {
67- fn from ( value : T ) -> Self {
68- Self {
69- inner : value. into ( ) ,
70- }
71- }
83+ inner : Vec < Py < PyBytes > > ,
7284}
7385
7486#[ pymethods]
7587impl Buffer {
7688 /// Creates a new empty buffer with specified initial capacity.
7789 ///
78- /// # Arguments
79- /// * `size` - Initial capacity in bytes (default: 4096)
8090 ///
8191 /// # Returns
8292 /// A new empty `Buffer` instance with the specified capacity.
8393 #[ new]
84- #[ pyo3( signature=( size=4096 ) ) ]
85- fn new ( size : usize ) -> Self {
86- Self {
87- inner : bytes:: BytesMut :: with_capacity ( size) ,
88- }
94+ fn new ( ) -> Self {
95+ Self { inner : Vec :: new ( ) }
8996 }
9097
9198 /// Writes bytes data to the buffer.
9299 ///
93- /// Appends the provided bytes to the end of the buffer, extending its capacity
94- /// if necessary.
100+ /// This keeps a reference to the Python bytes object without copying.
95101 ///
96102 /// # Arguments
97103 /// * `buff` - The bytes object to write to the buffer
@@ -100,26 +106,49 @@ impl Buffer {
100106 /// The number of bytes written (always equal to the length of input bytes)
101107 fn write < ' py > ( & mut self , buff : & Bound < ' py , PyBytes > ) -> usize {
102108 let bytes_written = buff. as_bytes ( ) . len ( ) ;
103- self . inner . extend_from_slice ( buff. as_bytes ( ) ) ;
109+ self . inner . push ( buff. clone ( ) . unbind ( ) ) ;
104110 bytes_written
105111 }
106112
107- /// Freezes this buffer into an immutable `FrozenBuffer`.
113+ /// Freezes the buffer, converting it into an immutable `FrozenBuffer` for reading.
114+ ///
115+ /// This consumes all accumulated PyBytes and converts them into a contiguous bytes buffer.
116+ /// After freezing, the original buffer is cleared.
108117 ///
109- /// This operation consumes the mutable buffer's contents, transferring ownership
110- /// to a new `FrozenBuffer` that can only be read from. The original buffer
111- /// becomes empty after this operation.
118+ /// This operation should avoided in hot paths as it creates a copy in order to concatenate
119+ /// bytes that are fragmented in memory into a single series of contiguous bytes
112120 ///
113121 /// # Returns
114- /// A new `FrozenBuffer` containing all the data that was in this buffer
122+ /// A new `FrozenBuffer` containing all the bytes that were written to this buffer
115123 fn freeze ( & mut self ) -> FrozenBuffer {
116- let buff = std :: mem :: take ( & mut self . inner ) ;
124+ let fragmented_part = self . into_fragmented_part ( ) ;
117125 FrozenBuffer {
118- inner : buff . freeze ( ) ,
126+ inner : fragmented_part . into_bytes ( ) ,
119127 }
120128 }
121129}
122130
131+ impl Buffer {
132+ /// Converts accumulated `PyBytes` objects to [`FragmentedPart`] for zero-copy multipart messages.
133+ ///
134+ /// Returns a `FragmentedPart::Fragmented` variant since the buffer accumulates multiple
135+ /// separate PyBytes objects that remain physically fragmented.
136+ pub fn into_fragmented_part ( & mut self ) -> FragmentedPart {
137+ let inner = std:: mem:: take ( & mut self . inner ) ;
138+
139+ FragmentedPart :: Fragmented (
140+ inner
141+ . into_iter ( )
142+ . map ( |py_bytes| {
143+ let wrapper = PyBytesWrapper :: new ( py_bytes) ;
144+ let bytes = bytes:: Bytes :: from_owner ( wrapper) ;
145+ Part :: from ( bytes)
146+ } )
147+ . collect ( ) ,
148+ )
149+ }
150+ }
151+
123152/// An immutable buffer for reading bytes data.
124153///
125154/// The `FrozenBuffer` struct provides a read-only interface to byte data. Once created,
0 commit comments