Skip to content

Commit 8f82c6b

Browse files
Added support for nullable value, nested and empty collections in protobuf (#1430)
* Added support for nullable value, nested and empty collections in protobuf * Protobuf absence value reading speed-up - reading mark of first 64 fields saves in Long value - reading mark of most fields saves on Long array - descriptor elements analyzing only if field wasn't readed from bytes Co-authored-by: Leonid Startsev <sandwwraith@users.noreply.github.com>
1 parent c0976fd commit 8f82c6b

File tree

10 files changed

+1323
-76
lines changed

10 files changed

+1323
-76
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
/*
2+
* Copyright 2017-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
package kotlinx.benchmarks.protobuf
5+
6+
import kotlinx.serialization.Serializable
7+
import kotlinx.serialization.protobuf.*
8+
import org.openjdk.jmh.annotations.*
9+
import java.util.concurrent.*
10+
11+
@Warmup(iterations = 5, time = 1)
12+
@Measurement(iterations = 5, time = 1)
13+
@BenchmarkMode(Mode.Throughput)
14+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
15+
@State(Scope.Benchmark)
16+
@Fork(1)
17+
open class ProtoHuge {
18+
19+
@Serializable
20+
data class Values130(
21+
val field0: Int,
22+
val field1: Int,
23+
val field2: Int,
24+
val field3: Int,
25+
val field4: Int,
26+
val field5: Int,
27+
val field6: Int,
28+
val field7: Int,
29+
val field8: Int,
30+
val field9: Int,
31+
32+
val field10: Int,
33+
val field11: Int,
34+
val field12: Int,
35+
val field13: Int,
36+
val field14: Int,
37+
val field15: Int,
38+
val field16: Int,
39+
val field17: Int,
40+
val field18: Int,
41+
val field19: Int,
42+
43+
val field20: Int,
44+
val field21: Int,
45+
val field22: Int,
46+
val field23: Int,
47+
val field24: Int,
48+
val field25: Int,
49+
val field26: Int,
50+
val field27: Int,
51+
val field28: Int,
52+
val field29: Int,
53+
54+
val field30: Int,
55+
val field31: Int,
56+
val field32: Int,
57+
val field33: Int,
58+
val field34: Int,
59+
val field35: Int,
60+
val field36: Int,
61+
val field37: Int,
62+
val field38: Int,
63+
val field39: Int,
64+
65+
val field40: Int,
66+
val field41: Int,
67+
val field42: Int,
68+
val field43: Int,
69+
val field44: Int,
70+
val field45: Int,
71+
val field46: Int,
72+
val field47: Int,
73+
val field48: Int,
74+
val field49: Int,
75+
76+
val field50: Int,
77+
val field51: Int,
78+
val field52: Int,
79+
val field53: Int,
80+
val field54: Int,
81+
val field55: Int,
82+
val field56: Int,
83+
val field57: Int,
84+
val field58: Int,
85+
val field59: Int,
86+
87+
val field60: Int,
88+
val field61: Int,
89+
val field62: Int,
90+
val field63: Int,
91+
val field64: Int,
92+
val field65: Int,
93+
val field66: Int,
94+
val field67: Int,
95+
val field68: Int,
96+
val field69: Int,
97+
98+
val field70: Int,
99+
val field71: Int,
100+
val field72: Int,
101+
val field73: Int,
102+
val field74: Int,
103+
val field75: Int,
104+
val field76: Int,
105+
val field77: Int,
106+
val field78: Int,
107+
val field79: Int,
108+
109+
val field80: Int,
110+
val field81: Int,
111+
val field82: Int,
112+
val field83: Int,
113+
val field84: Int,
114+
val field85: Int,
115+
val field86: Int,
116+
val field87: Int,
117+
val field88: Int,
118+
val field89: Int,
119+
120+
val field90: Int,
121+
val field91: Int,
122+
val field92: Int,
123+
val field93: Int,
124+
val field94: Int,
125+
val field95: Int,
126+
val field96: Int,
127+
val field97: Int,
128+
val field98: Int,
129+
val field99: Int,
130+
131+
val field100: Int,
132+
val field101: Int,
133+
val field102: Int,
134+
val field103: Int,
135+
val field104: Int,
136+
val field105: Int,
137+
val field106: Int,
138+
val field107: Int,
139+
val field108: Int,
140+
val field109: Int,
141+
142+
val field110: Int,
143+
val field111: Int,
144+
val field112: Int,
145+
val field113: Int,
146+
val field114: Int,
147+
val field115: Int,
148+
val field116: Int,
149+
val field117: Int,
150+
val field118: Int,
151+
val field119: Int,
152+
153+
val field120: Int,
154+
val field121: Int,
155+
val field122: Int,
156+
val field123: Int,
157+
val field124: Int,
158+
val field125: Int,
159+
val field126: Int,
160+
val field127: Int,
161+
val field128: Int,
162+
val field129: Int
163+
)
164+
165+
private val values130 = Values130(
166+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
167+
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
168+
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
169+
30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
170+
40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
171+
50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
172+
60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
173+
70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
174+
80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
175+
90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
176+
100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
177+
110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
178+
120, 121, 122, 123, 124, 125, 126, 127, 128, 129
179+
)
180+
181+
private val values130Bytes = ProtoBuf.encodeToByteArray(Values130.serializer(), values130)
182+
183+
@Benchmark
184+
fun toBytes130() = ProtoBuf.encodeToByteArray(Values130.serializer(), values130)
185+
186+
@Benchmark
187+
fun fromBytes130() = ProtoBuf.decodeFromByteArray(Values130.serializer(), values130Bytes)
188+
189+
}

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/ProtobufDecoding.kt

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,46 @@ internal open class ProtobufDecoder(
2828
private var indexCache: IntArray? = null
2929
private var sparseIndexCache: MutableMap<Int, Int>? = null
3030

31+
/*
32+
Element decoding marks from given bytes.
33+
The element number is the same as the bit position.
34+
Marks for the lowest 64 elements are always stored in a single Long value, higher elements stores in long array.
35+
*/
36+
private var lowerReadMark: Long = 0
37+
private val highReadMarks: LongArray?
38+
39+
private var valueIsNull: Boolean = false
40+
3141
init {
42+
highReadMarks = prepareReadMarks(descriptor)
3243
populateCache(descriptor)
3344
}
3445

46+
private fun prepareReadMarks(descriptor: SerialDescriptor): LongArray? {
47+
val elementsCount = descriptor.elementsCount
48+
return if (elementsCount <= Long.SIZE_BITS) {
49+
lowerReadMark = if (elementsCount == Long.SIZE_BITS) {
50+
// number og bits in the mark is equal to the number of fields
51+
0
52+
} else {
53+
// (1 - elementsCount) bits are always 1 since there are no fields for them
54+
-1L shl elementsCount
55+
}
56+
null
57+
} else {
58+
// (elementsCount - 1) because only one Long value is needed to store 64 fields etc
59+
val slotsCount = (elementsCount - 1) / Long.SIZE_BITS
60+
val elementsInLastSlot = elementsCount % Long.SIZE_BITS
61+
val highReadMarks = LongArray(slotsCount)
62+
// (elementsCount % Long.SIZE_BITS) == 0 this means that the fields occupy all bits in mark
63+
if (elementsInLastSlot != 0) {
64+
// all marks except the higher are always 0
65+
highReadMarks[highReadMarks.lastIndex] = -1L shl elementsCount
66+
}
67+
highReadMarks
68+
}
69+
}
70+
3571
public fun populateCache(descriptor: SerialDescriptor) {
3672
val elements = descriptor.elementsCount
3773
if (elements < 32) {
@@ -97,7 +133,18 @@ internal open class ProtobufDecoder(
97133

98134
override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder {
99135
return when (descriptor.kind) {
100-
StructureKind.LIST -> RepeatedDecoder(proto, reader, currentTagOrDefault, descriptor)
136+
StructureKind.LIST -> {
137+
val tag = currentTagOrDefault
138+
return if (this.descriptor.kind == StructureKind.LIST && tag != MISSING_TAG && this.descriptor != descriptor) {
139+
val reader = makeDelimited(reader, tag)
140+
// repeated decoder expects the first tag to be read already
141+
reader.readTag()
142+
// all elements always have id = 1
143+
RepeatedDecoder(proto, reader, ProtoDesc(1, ProtoIntegerType.DEFAULT), descriptor)
144+
} else {
145+
RepeatedDecoder(proto, reader, tag, descriptor)
146+
}
147+
}
101148
StructureKind.CLASS, StructureKind.OBJECT, is PolymorphicKind -> {
102149
val tag = currentTagOrDefault
103150
// Do not create redundant copy
@@ -200,20 +247,98 @@ internal open class ProtobufDecoder(
200247

201248
override fun SerialDescriptor.getTag(index: Int) = extractParameters(index)
202249

250+
private fun findUnreadElementIndex(): Int {
251+
val elementsCount = descriptor.elementsCount
252+
while (lowerReadMark != -1L) {
253+
val index = lowerReadMark.inv().countTrailingZeroBits()
254+
lowerReadMark = lowerReadMark or (1L shl index)
255+
256+
if (!descriptor.isElementOptional(index)) {
257+
val elementDescriptor = descriptor.getElementDescriptor(index)
258+
val kind = elementDescriptor.kind
259+
if (kind == StructureKind.MAP || kind == StructureKind.LIST) {
260+
return index
261+
} else if (elementDescriptor.isNullable) {
262+
valueIsNull = true
263+
return index
264+
}
265+
}
266+
}
267+
268+
if (elementsCount > Long.SIZE_BITS) {
269+
val higherMarks = highReadMarks!!
270+
271+
for (slot in higherMarks.indices) {
272+
// (slot + 1) because first element in high marks has index 64
273+
val slotOffset = (slot + 1) * Long.SIZE_BITS
274+
// store in a variable so as not to frequently use the array
275+
var mark = higherMarks[slot]
276+
277+
while (mark != -1L) {
278+
val indexInSlot = mark.inv().countTrailingZeroBits()
279+
mark = mark or (1L shl indexInSlot)
280+
281+
val index = slotOffset + indexInSlot
282+
if (!descriptor.isElementOptional(index)) {
283+
val elementDescriptor = descriptor.getElementDescriptor(index)
284+
val kind = elementDescriptor.kind
285+
if (kind == StructureKind.MAP || kind == StructureKind.LIST) {
286+
higherMarks[slot] = mark
287+
return index
288+
} else if (elementDescriptor.isNullable) {
289+
higherMarks[slot] = mark
290+
valueIsNull = true
291+
return index
292+
}
293+
}
294+
}
295+
296+
higherMarks[slot] = mark
297+
}
298+
return -1
299+
}
300+
return -1
301+
}
302+
303+
private fun markElementAsRead(index: Int) {
304+
if (index < Long.SIZE_BITS) {
305+
lowerReadMark = lowerReadMark or (1L shl index)
306+
} else {
307+
val slot = (index / Long.SIZE_BITS) - 1
308+
val offsetInSlot = index % Long.SIZE_BITS
309+
highReadMarks!![slot] = highReadMarks[slot] or (1L shl offsetInSlot)
310+
}
311+
}
312+
203313
override fun decodeElementIndex(descriptor: SerialDescriptor): Int {
204314
while (true) {
205315
val protoId = reader.readTag()
206316
if (protoId == -1) { // EOF
207-
return CompositeDecoder.DECODE_DONE
317+
val absenceIndex = findUnreadElementIndex()
318+
return if (absenceIndex == -1) {
319+
CompositeDecoder.DECODE_DONE
320+
} else {
321+
absenceIndex
322+
}
208323
}
209324
val index = getIndexByTag(protoId)
210325
if (index == -1) { // not found
211326
reader.skipElement()
212327
} else {
328+
markElementAsRead(index)
213329
return index
214330
}
215331
}
216332
}
333+
334+
override fun decodeNotNullMark(): Boolean {
335+
return if (valueIsNull) {
336+
valueIsNull = false
337+
false
338+
} else {
339+
true
340+
}
341+
}
217342
}
218343

219344
private class RepeatedDecoder(

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/ProtobufEncoding.kt

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ internal open class ProtobufEncoder(
3232
if (tag == MISSING_TAG) {
3333
writer.writeInt(collectionSize)
3434
}
35-
RepeatedEncoder(proto, writer, tag, descriptor)
35+
if (this.descriptor.kind == StructureKind.LIST && tag != MISSING_TAG && this.descriptor != descriptor) {
36+
NestedRepeatedEncoder(proto, writer, tag, descriptor)
37+
} else {
38+
RepeatedEncoder(proto, writer, tag, descriptor)
39+
}
3640
}
3741
StructureKind.MAP -> {
3842
// Size and missing tag are managed by the implementation that delegated to the list
@@ -177,3 +181,18 @@ private class RepeatedEncoder(
177181
) : ProtobufEncoder(proto, writer, descriptor) {
178182
override fun SerialDescriptor.getTag(index: Int) = curTag
179183
}
184+
185+
private class NestedRepeatedEncoder(
186+
proto: ProtoBuf,
187+
@JvmField val writer: ProtobufWriter,
188+
@JvmField val curTag: ProtoDesc,
189+
descriptor: SerialDescriptor,
190+
@JvmField val stream: ByteArrayOutput = ByteArrayOutput()
191+
) : ProtobufEncoder(proto, ProtobufWriter(stream), descriptor) {
192+
// all elements always have id = 1
193+
override fun SerialDescriptor.getTag(index: Int) = ProtoDesc(1, ProtoIntegerType.DEFAULT)
194+
195+
override fun endEncode(descriptor: SerialDescriptor) {
196+
writer.writeOutput(stream, curTag.protoId)
197+
}
198+
}

formats/protobuf/commonMain/src/kotlinx/serialization/protobuf/internal/ProtobufTaggedDecoder.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ internal abstract class ProtobufTaggedDecoder : ProtobufTaggedBase(), Decoder, C
2626

2727
protected open fun decodeTaggedInline(tag: ProtoDesc, inlineDescriptor: SerialDescriptor): Decoder = this.apply { pushTag(tag) }
2828

29-
final override fun decodeNotNullMark(): Boolean = true
29+
override fun decodeNotNullMark(): Boolean = true
3030
final override fun decodeNull(): Nothing? = null
3131
final override fun decodeBoolean(): Boolean = decodeTaggedBoolean(popTagOrDefault())
3232
final override fun decodeByte(): Byte = decodeTaggedByte(popTagOrDefault())

0 commit comments

Comments
 (0)