Skip to content

Commit 705ee28

Browse files
committed
Improve performance of serialization.
(a) we reduce the number of times we have to copy the data into intermediate buffers. (b) we allow things like ListOf(int) to use a fastpath and avoid having to varint encode everything (which does increase the size but is faster)
1 parent 29a4f34 commit 705ee28

13 files changed

+343
-95
lines changed

typed_python/NullSerializationContext.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ class NullSerializationContext : public SerializationContext {
3737
}
3838

3939

40+
virtual bool serializePodListsInline() const {
41+
return true;
42+
}
4043
virtual bool isCompressionEnabled() const {
4144
return false;
4245
}

typed_python/PythonSerializationContext.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ void PythonSerializationContext::setFlags() {
3838

3939
mSerializeHashSequence = ((PyObject*)serializeHashSequence) == Py_True;
4040

41+
PyObjectStealer serializePodListsInline(PyObject_GetAttrString(mContextObj, "serializePodListsInline"));
42+
43+
if (!serializePodListsInline) {
44+
throw PythonExceptionSet();
45+
}
46+
47+
mSerializePodListsInline = ((PyObject*)serializePodListsInline) == Py_True;
48+
4149
PyObjectStealer encodeLineInformationForCode(PyObject_GetAttrString(mContextObj, "encodeLineInformationForCode"));
4250

4351
if (!encodeLineInformationForCode) {

typed_python/PythonSerializationContext.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class PythonSerializationContext : public SerializationContext {
8383
PythonSerializationContext(PyObject* typeSetObj) :
8484
mContextObj(typeSetObj),
8585
mCompressionEnabled(false),
86+
mSerializePodListsInline(false),
8687
mSerializeHashSequence(false)
8788
{
8889
setFlags();
@@ -98,6 +99,10 @@ class PythonSerializationContext : public SerializationContext {
9899
return mSuppressLineInfo;
99100
}
100101

102+
bool serializePodListsInline() const {
103+
return mSerializePodListsInline;
104+
}
105+
101106
// should we serialize an integer in the order of the
102107
// hash sequence rather than the hash itself?
103108
bool shouldSerializeHashSequence() const {
@@ -189,6 +194,8 @@ class PythonSerializationContext : public SerializationContext {
189194

190195
bool mCompressionEnabled;
191196

197+
bool mSerializePodListsInline;
198+
192199
bool mSuppressLineInfo;
193200

194201
bool mSerializeHashSequence;

typed_python/RegisterTypes.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,13 @@ class RegisterType : public Type {
263263
buffer.writeRegisterType(fieldNumber, *(T*)self);
264264
}
265265

266+
template<class buf_t>
267+
void serializeMultiConcrete(instance_ptr left, size_t count, size_t stride, buf_t& buffer, size_t fieldNumber) {
268+
for (long k = 0; k < count; k++) {
269+
buffer.writeRegisterType(fieldNumber, *(T*)(left + k * stride));
270+
}
271+
}
272+
266273
size_t deepBytecountConcrete(instance_ptr instance, std::unordered_set<void*>& alreadyVisited, std::set<Slab*>* outSlabs) {
267274
return 0;
268275
}

typed_python/SerializationBuffer.cpp

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ Bytes SerializationBuffer::serializeSingleBoolToBytes(bool value) {
2424
return Bytes((const char*)existsValue, 2);
2525
}
2626

27-
void SerializationBuffer::compress() {
28-
if (m_last_compression_point == m_size) {
27+
void SerializationBufferBlock::compress() {
28+
if (m_compressed) {
2929
return;
3030
}
3131

@@ -37,7 +37,7 @@ void SerializationBuffer::compress() {
3737

3838
//replace the data we have here with a block of 4 bytes of size of compressed data and
3939
//then the data stream
40-
size_t bytesRequired = LZ4F_compressFrameBound(m_size - m_last_compression_point, &lz4Prefs);
40+
size_t bytesRequired = LZ4F_compressFrameBound(m_size, &lz4Prefs);
4141

4242
void* compressedBytes = malloc(bytesRequired);
4343

@@ -49,26 +49,64 @@ void SerializationBuffer::compress() {
4949
compressedBytecount = LZ4F_compressFrame(
5050
compressedBytes,
5151
bytesRequired,
52-
m_buffer + m_last_compression_point,
53-
m_size - m_last_compression_point,
52+
m_buffer,
53+
m_size,
5454
&lz4Prefs
5555
);
5656

5757
if (LZ4F_isError(compressedBytecount)) {
58+
free(compressedBytes);
59+
5860
throw std::runtime_error(
5961
std::string("Error compressing data using LZ4: ")
6062
+ LZ4F_getErrorName(compressedBytecount)
6163
);
6264
}
6365
}
6466

65-
m_size = m_last_compression_point;
67+
m_size = 0;
6668

6769
write<uint32_t>(compressedBytecount);
6870

69-
write_bytes((uint8_t*)compressedBytes, compressedBytecount, false);
71+
write_bytes((uint8_t*)compressedBytes, compressedBytecount);
7072

7173
free(compressedBytes);
7274

73-
m_last_compression_point = m_size;
75+
m_compressed = true;
76+
}
77+
78+
void SerializationBuffer::consolidate() {
79+
if (m_wants_compress) {
80+
for (auto blockPtr: m_blocks) {
81+
blockPtr->compress();
82+
}
83+
}
84+
85+
if (m_blocks.size() == 1) {
86+
return;
87+
}
88+
89+
size_t totalSize = 0;
90+
91+
for (auto blockPtr: m_blocks) {
92+
totalSize += blockPtr->size();
93+
}
94+
95+
SerializationBufferBlock* block = new SerializationBufferBlock();
96+
block->ensure(totalSize);
97+
98+
for (auto blockPtr: m_blocks) {
99+
block->write_bytes(blockPtr->buffer(), blockPtr->size());
100+
}
101+
102+
if (m_wants_compress) {
103+
block->markCompressed();
104+
}
105+
106+
m_blocks.clear();
107+
m_blocks.push_back(
108+
std::shared_ptr<SerializationBufferBlock>(
109+
block
110+
)
111+
);
74112
}

0 commit comments

Comments
 (0)