Skip to content

Commit bf66cdd

Browse files
authored
fix: [Geneva Uploader] Eliminate hash collisions and redundant MD5 computation in schema deduplication - Perf improvement 5-10% (#486)
1 parent c955356 commit bf66cdd

File tree

4 files changed

+108
-113
lines changed

4 files changed

+108
-113
lines changed

opentelemetry-exporter-geneva/geneva-uploader/src/payload_encoder/bond_encoder.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,14 @@ pub(crate) struct FieldDef {
107107

108108
/// Schema definition that can be built dynamically
109109
#[derive(Clone)]
110-
pub(crate) struct DynamicSchema {
110+
pub(crate) struct DynamicSchema<'a> {
111111
pub struct_name: String,
112112
pub qualified_name: String,
113-
pub fields: Vec<FieldDef>,
113+
pub fields: &'a [FieldDef],
114114
}
115115

116-
impl DynamicSchema {
117-
pub(crate) fn new(name: &str, namespace: &str, fields: Vec<FieldDef>) -> Self {
116+
impl<'a> DynamicSchema<'a> {
117+
pub(crate) fn new(name: &str, namespace: &str, fields: &'a [FieldDef]) -> Self {
118118
Self {
119119
struct_name: name.to_string(),
120120
qualified_name: format!("{namespace}.{name}"),
@@ -380,7 +380,7 @@ pub(crate) struct BondEncodedSchema {
380380
}
381381

382382
impl BondEncodedSchema {
383-
pub(crate) fn from_fields(name: &str, namespace: &str, fields: Vec<FieldDef>) -> Self {
383+
pub(crate) fn from_fields(name: &str, namespace: &str, fields: &[FieldDef]) -> Self {
384384
let schema = DynamicSchema::new(name, namespace, fields);
385385
let encoded_bytes = schema.encode().expect("Schema encoding failed");
386386

@@ -428,7 +428,7 @@ mod tests {
428428
},
429429
];
430430

431-
let schema = DynamicSchema::new("TestStruct", "test.namespace", fields);
431+
let schema = DynamicSchema::new("TestStruct", "test.namespace", &fields);
432432
let encoded = schema.encode().unwrap();
433433
assert!(!encoded.is_empty());
434434
}
@@ -453,7 +453,7 @@ mod tests {
453453
},
454454
];
455455

456-
let schema = BondEncodedSchema::from_fields("OtlpLogRecord", "telemetry", fields);
456+
let schema = BondEncodedSchema::from_fields("OtlpLogRecord", "telemetry", &fields);
457457
let bytes = schema.as_bytes();
458458
assert!(!bytes.is_empty());
459459
}
@@ -475,7 +475,7 @@ mod tests {
475475
},
476476
];
477477

478-
let schema = BondEncodedSchema::from_fields("TestStruct", "test.namespace", fields);
478+
let schema = BondEncodedSchema::from_fields("TestStruct", "test.namespace", &fields);
479479
let bytes = schema.as_bytes();
480480
assert!(!bytes.is_empty());
481481
}
@@ -504,7 +504,7 @@ mod tests {
504504
})
505505
.collect();
506506

507-
let schema = DynamicSchema::new("TestStruct", "test.namespace", fields);
507+
let schema = DynamicSchema::new("TestStruct", "test.namespace", &fields);
508508
let exact_size = schema.calculate_exact_encoded_size();
509509
let encoded = schema.encode().unwrap();
510510

opentelemetry-exporter-geneva/geneva-uploader/src/payload_encoder/central_blob.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//use md5;
22

3-
use crate::payload_encoder::bond_encoder::BondEncodedSchema;
3+
use crate::payload_encoder::bond_encoder::{BondEncodedSchema, FieldDef};
44
use chrono::{DateTime, Datelike, Timelike, Utc};
55
use std::sync::Arc;
66

@@ -72,6 +72,10 @@ pub(crate) struct CentralSchemaEntry {
7272
pub id: u64,
7373
pub md5: [u8; 16],
7474
pub schema: BondEncodedSchema,
75+
// Store fields for deduplication
76+
// TODO: It might be faster to encode schema per event and compare encoded bytes with stored schemas
77+
// instead of checking each field. Needs benchmarking to determine which approach is faster.
78+
pub fields: Vec<FieldDef>,
7579
}
7680

7781
/// Event/row entry for central blob
@@ -250,7 +254,7 @@ mod tests {
250254
field_id: 2u16,
251255
},
252256
];
253-
let schema_obj = BondEncodedSchema::from_fields("TestStruct", "test.namespace", fields);
257+
let schema_obj = BondEncodedSchema::from_fields("TestStruct", "test.namespace", &fields);
254258
let schema_bytes = schema_obj.as_bytes().to_vec();
255259
let schema_md5 = md5_bytes(&schema_bytes);
256260
let schema_id = 1234u64;
@@ -259,6 +263,7 @@ mod tests {
259263
id: schema_id,
260264
md5: schema_md5,
261265
schema: schema_obj,
266+
fields,
262267
};
263268

264269
// Prepare a row

opentelemetry-exporter-geneva/geneva-uploader/src/payload_encoder/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod tests {
1313
use crate::payload_encoder::bond_encoder::FieldDef;
1414

1515
fn create_payload(fields: Vec<FieldDef>, row_data: Vec<u8>) -> Vec<u8> {
16-
let schema_obj = BondEncodedSchema::from_fields("MdsContainer", "testNamespace", fields);
16+
let schema_obj = BondEncodedSchema::from_fields("MdsContainer", "testNamespace", &fields);
1717
let schema_bytes = schema_obj.as_bytes();
1818
let schema_md5 = md5::compute(schema_bytes).0;
1919
let schema_id = 1u64;
@@ -22,6 +22,7 @@ mod tests {
2222
id: schema_id,
2323
md5: schema_md5,
2424
schema: schema_obj,
25+
fields,
2526
};
2627

2728
let event = CentralEventEntry {

0 commit comments

Comments
 (0)