Skip to content

Commit ba8d6e9

Browse files
committed
Skip label name sanitization if utf8 label names enabled
1 parent c26f585 commit ba8d6e9

File tree

4 files changed

+212
-17
lines changed

4 files changed

+212
-17
lines changed

pkg/distributor/distributor.go

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
"connectrpc.com/connect"
1818
"go.uber.org/atomic"
1919

20+
"github.com/grafana/pyroscope/pkg/featureflags"
21+
2022
"github.com/dustin/go-humanize"
2123
"github.com/go-kit/log"
2224
"github.com/go-kit/log/level"
@@ -651,7 +653,7 @@ func visitSampleSeriesForIngester(profile *profilev1.Profile, labels []*typesv1.
651653
}
652654

653655
func (d *Distributor) sendRequestsToIngester(ctx context.Context, req *distributormodel.ProfileSeries) (resp *connect.Response[pushv1.PushResponse], err error) {
654-
sampleSeries, err := d.visitSampleSeries(req, visitSampleSeriesForIngester)
656+
sampleSeries, err := d.visitSampleSeries(ctx, req, visitSampleSeriesForIngester)
655657
if err != nil {
656658
return nil, err
657659
}
@@ -740,7 +742,7 @@ func (d *Distributor) sendRequestsToSegmentWriter(ctx context.Context, req *dist
740742
// NOTE(kolesnikovae): if we return early, e.g., due to a validation error,
741743
// or if there are no series, the write path router has already seen the
742744
// request, and could have already accounted for the size, latency, etc.
743-
serviceSeries, err := d.visitSampleSeries(req, visitSampleSeriesForSegmentWriter)
745+
serviceSeries, err := d.visitSampleSeries(ctx, req, visitSampleSeriesForSegmentWriter)
744746
if err != nil {
745747
return nil, err
746748
}
@@ -1126,15 +1128,23 @@ func injectMappingVersions(s *distributormodel.ProfileSeries) error {
11261128

11271129
type visitFunc func(*profilev1.Profile, []*typesv1.LabelPair, []*relabel.Config, *sampleSeriesVisitor) error
11281130

1129-
func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit visitFunc) ([]*distributormodel.ProfileSeries, error) {
1131+
func (d *Distributor) visitSampleSeries(ctx context.Context, s *distributormodel.ProfileSeries, visit visitFunc) ([]*distributormodel.ProfileSeries, error) {
11301132
relabelingRules := d.limits.IngestionRelabelingRules(s.TenantID)
11311133
usageConfig := d.limits.DistributorUsageGroups(s.TenantID)
11321134
var result []*distributormodel.ProfileSeries
11331135
usageGroups := d.usageGroupEvaluator.GetMatch(s.TenantID, usageConfig, s.Labels)
1136+
1137+
// Extract client capabilities from context
1138+
var clientCapabilities *featureflags.ClientCapabilities
1139+
if capabilities, ok := featureflags.GetClientCapabilities(ctx); ok {
1140+
clientCapabilities = &capabilities
1141+
}
1142+
11341143
visitor := &sampleSeriesVisitor{
1135-
tenantID: s.TenantID,
1136-
limits: d.limits,
1137-
profile: s.Profile,
1144+
tenantID: s.TenantID,
1145+
limits: d.limits,
1146+
profile: s.Profile,
1147+
clientCapabilities: clientCapabilities,
11381148
}
11391149
if err := visit(s.Profile.Profile, s.Labels, relabelingRules, visitor); err != nil {
11401150
validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalProfiles))
@@ -1164,18 +1174,19 @@ func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit
11641174
}
11651175

11661176
type sampleSeriesVisitor struct {
1167-
tenantID string
1168-
limits Limits
1169-
profile *pprof.Profile
1170-
exp *pprof.SampleExporter
1171-
series []*distributormodel.ProfileSeries
1177+
tenantID string
1178+
limits Limits
1179+
profile *pprof.Profile
1180+
exp *pprof.SampleExporter
1181+
clientCapabilities *featureflags.ClientCapabilities
1182+
series []*distributormodel.ProfileSeries
11721183

11731184
discardedBytes int
11741185
discardedProfiles int
11751186
}
11761187

11771188
func (v *sampleSeriesVisitor) ValidateLabels(labels phlaremodel.Labels) error {
1178-
return validation.ValidateLabels(v.limits, v.tenantID, labels)
1189+
return validation.ValidateLabels(v.clientCapabilities, v.limits, v.tenantID, labels)
11791190
}
11801191

11811192
func (v *sampleSeriesVisitor) VisitProfile(labels phlaremodel.Labels) {

pkg/distributor/distributor_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,7 +1228,7 @@ func Test_SampleLabels_Ingester(t *testing.T) {
12281228
}}, overrides, nil, log.NewLogfmtLogger(os.Stdout), nil)
12291229
require.NoError(t, err)
12301230
var series []*distributormodel.ProfileSeries
1231-
series, err = d.visitSampleSeries(tc.pushReq, visitSampleSeriesForIngester)
1231+
series, err = d.visitSampleSeries(context.Background(), tc.pushReq, visitSampleSeriesForIngester)
12321232
assert.Equal(t, tc.expectBytesDropped, float64(tc.pushReq.DiscardedBytesRelabeling))
12331233
assert.Equal(t, tc.expectProfilesDropped, float64(tc.pushReq.DiscardedProfilesRelabeling))
12341234

@@ -1786,7 +1786,7 @@ func Test_SampleLabels_SegmentWriter(t *testing.T) {
17861786

17871787
require.NoError(t, err)
17881788
var series []*distributormodel.ProfileSeries
1789-
series, err = d.visitSampleSeries(tc.pushReq, visitSampleSeriesForSegmentWriter)
1789+
series, err = d.visitSampleSeries(context.Background(), tc.pushReq, visitSampleSeriesForSegmentWriter)
17901790
assert.Equal(t, tc.expectBytesDropped, float64(tc.pushReq.DiscardedBytesRelabeling))
17911791
assert.Equal(t, tc.expectProfilesDropped, float64(tc.pushReq.DiscardedProfilesRelabeling))
17921792

pkg/validation/validate.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"time"
1010
"unicode/utf8"
1111

12+
"github.com/grafana/pyroscope/pkg/featureflags"
1213
"github.com/grafana/pyroscope/pkg/pprof"
1314

1415
"github.com/go-kit/log/level"
@@ -115,7 +116,7 @@ type LabelValidationLimits interface {
115116
}
116117

117118
// ValidateLabels validates the labels of a profile.
118-
func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1.LabelPair) error {
119+
func ValidateLabels(clientCapabilities *featureflags.ClientCapabilities, limits LabelValidationLimits, tenantID string, ls []*typesv1.LabelPair) error {
119120
if len(ls) == 0 {
120121
return NewErrorf(MissingLabels, MissingLabelsErrorMsg)
121122
}
@@ -146,7 +147,12 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1
146147
if len(l.Value) > limits.MaxLabelValueLength(tenantID) {
147148
return NewErrorf(LabelValueTooLong, LabelValueTooLongErrorMsg, phlaremodel.LabelPairsString(ls), l.Value)
148149
}
149-
if origName, newName, ok := SanitizeLegacyLabelName(l.Name); ok && origName != newName {
150+
if clientCapabilities != nil && clientCapabilities.AllowUtf8LabelNames {
151+
// Using Prometheus utf-8 label name validation
152+
if !model.LabelName(l.Name).IsValid() {
153+
return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label name '"+l.Name+"'")
154+
}
155+
} else if origName, newName, ok := SanitizeLegacyLabelName(l.Name); ok && origName != newName {
150156
var err error
151157
ls, idx, err = handleSanitizedLabel(ls, idx, origName, newName)
152158
if err != nil {

pkg/validation/validate_test.go

Lines changed: 179 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
1111
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
12+
"github.com/grafana/pyroscope/pkg/featureflags"
1213
phlaremodel "github.com/grafana/pyroscope/pkg/model"
1314
"github.com/grafana/pyroscope/pkg/pprof"
1415
)
@@ -136,7 +137,7 @@ func TestValidateLabels(t *testing.T) {
136137
},
137138
} {
138139
t.Run(tt.name, func(t *testing.T) {
139-
err := ValidateLabels(MockLimits{
140+
err := ValidateLabels(nil, MockLimits{
140141
MaxLabelNamesPerSeriesValue: 4,
141142
MaxLabelNameLengthValue: 12,
142143
MaxLabelValueLengthValue: 10,
@@ -152,6 +153,183 @@ func TestValidateLabels(t *testing.T) {
152153
}
153154
}
154155

156+
func TestValidateLabels_WithClientCapabilities(t *testing.T) {
157+
for _, tt := range []struct {
158+
name string
159+
clientCapabilities *featureflags.ClientCapabilities
160+
lbs []*typesv1.LabelPair
161+
expectedErr string
162+
expectedReason Reason
163+
expectedLabels []*typesv1.LabelPair // Expected labels after validation/sanitization
164+
}{
165+
{
166+
name: "UTF-8 labels allowed when capabilities enabled",
167+
clientCapabilities: &featureflags.ClientCapabilities{
168+
AllowUtf8LabelNames: true,
169+
},
170+
lbs: []*typesv1.LabelPair{
171+
{Name: model.MetricNameLabel, Value: "cpu"},
172+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
173+
{Name: "日本語", Value: "test"},
174+
{Name: "emoji_🚀", Value: "rocket"},
175+
},
176+
// Labels get sorted alphabetically
177+
expectedLabels: []*typesv1.LabelPair{
178+
{Name: model.MetricNameLabel, Value: "cpu"},
179+
{Name: "emoji_🚀", Value: "rocket"},
180+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
181+
{Name: "日本語", Value: "test"},
182+
},
183+
},
184+
{
185+
name: "UTF-8 labels rejected when capabilities disabled",
186+
clientCapabilities: nil,
187+
lbs: []*typesv1.LabelPair{
188+
{Name: model.MetricNameLabel, Value: "cpu"},
189+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
190+
{Name: "日本語", Value: "test"},
191+
},
192+
expectedErr: `invalid labels '{__name__="cpu", service_name="svc", 日本語="test"}' with error: invalid label name '日本語'`,
193+
expectedReason: InvalidLabels,
194+
},
195+
{
196+
name: "UTF-8 labels rejected when AllowUtf8LabelNames false",
197+
clientCapabilities: &featureflags.ClientCapabilities{
198+
AllowUtf8LabelNames: false,
199+
},
200+
lbs: []*typesv1.LabelPair{
201+
{Name: model.MetricNameLabel, Value: "cpu"},
202+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
203+
{Name: "café", Value: "test"},
204+
},
205+
expectedErr: `invalid labels '{__name__="cpu", café="test", service_name="svc"}' with error: invalid label name 'café'`,
206+
expectedReason: InvalidLabels,
207+
},
208+
{
209+
name: "valid underscore and hyphen labels with UTF-8 enabled",
210+
clientCapabilities: &featureflags.ClientCapabilities{
211+
AllowUtf8LabelNames: true,
212+
},
213+
lbs: []*typesv1.LabelPair{
214+
{Name: model.MetricNameLabel, Value: "cpu"},
215+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
216+
{Name: "label_with_underscore", Value: "test"},
217+
{Name: "label-with-hyphen", Value: "test2"},
218+
},
219+
expectedLabels: []*typesv1.LabelPair{
220+
{Name: model.MetricNameLabel, Value: "cpu"},
221+
{Name: "label-with-hyphen", Value: "test2"},
222+
{Name: "label_with_underscore", Value: "test"},
223+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
224+
},
225+
},
226+
{
227+
name: "legacy invalid label names rejected when capabilities disabled",
228+
clientCapabilities: nil,
229+
lbs: []*typesv1.LabelPair{
230+
{Name: model.MetricNameLabel, Value: "cpu"},
231+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
232+
{Name: "123invalid", Value: "test"},
233+
},
234+
expectedErr: `invalid labels '{123invalid="test", __name__="cpu", service_name="svc"}' with error: invalid label name '123invalid'`,
235+
expectedReason: InvalidLabels,
236+
},
237+
{
238+
name: "dots allowed as-is with UTF-8 enabled",
239+
clientCapabilities: &featureflags.ClientCapabilities{
240+
AllowUtf8LabelNames: true,
241+
},
242+
lbs: []*typesv1.LabelPair{
243+
{Name: model.MetricNameLabel, Value: "cpu"},
244+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
245+
{Name: "app.kubernetes.io/name", Value: "test"},
246+
},
247+
// With UTF-8 enabled, dots are allowed (Prometheus UTF-8 validation)
248+
expectedLabels: []*typesv1.LabelPair{
249+
{Name: model.MetricNameLabel, Value: "cpu"},
250+
{Name: "app.kubernetes.io/name", Value: "test"},
251+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
252+
},
253+
},
254+
{
255+
name: "dots rejected without UTF-8",
256+
clientCapabilities: nil,
257+
lbs: []*typesv1.LabelPair{
258+
{Name: model.MetricNameLabel, Value: "cpu"},
259+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
260+
{Name: "app.kubernetes.io/name", Value: "test"},
261+
},
262+
expectedErr: `invalid labels '{__name__="cpu", app.kubernetes.io/name="test", service_name="svc"}' with error: invalid label name 'app.kubernetes.io/name'`,
263+
expectedReason: InvalidLabels,
264+
},
265+
{
266+
name: "hyphens allowed with UTF-8 enabled",
267+
clientCapabilities: &featureflags.ClientCapabilities{
268+
AllowUtf8LabelNames: true,
269+
},
270+
lbs: []*typesv1.LabelPair{
271+
{Name: model.MetricNameLabel, Value: "cpu"},
272+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
273+
{Name: "some-label-name", Value: "test"},
274+
},
275+
expectedLabels: []*typesv1.LabelPair{
276+
{Name: model.MetricNameLabel, Value: "cpu"},
277+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
278+
{Name: "some-label-name", Value: "test"},
279+
},
280+
},
281+
{
282+
name: "mixed ASCII and UTF-8 labels with capabilities enabled",
283+
clientCapabilities: &featureflags.ClientCapabilities{
284+
AllowUtf8LabelNames: true,
285+
},
286+
lbs: []*typesv1.LabelPair{
287+
{Name: model.MetricNameLabel, Value: "cpu"},
288+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
289+
{Name: "region", Value: "us-east-1"},
290+
{Name: "地域", Value: "東京"},
291+
{Name: "environment", Value: "prod"},
292+
},
293+
// Labels get sorted alphabetically
294+
expectedLabels: []*typesv1.LabelPair{
295+
{Name: model.MetricNameLabel, Value: "cpu"},
296+
{Name: "environment", Value: "prod"},
297+
{Name: "region", Value: "us-east-1"},
298+
{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
299+
{Name: "地域", Value: "東京"},
300+
},
301+
},
302+
} {
303+
t.Run(tt.name, func(t *testing.T) {
304+
lbsCopy := make([]*typesv1.LabelPair, len(tt.lbs))
305+
for i, lb := range tt.lbs {
306+
lbsCopy[i] = &typesv1.LabelPair{Name: lb.Name, Value: lb.Value}
307+
}
308+
309+
err := ValidateLabels(tt.clientCapabilities, MockLimits{
310+
MaxLabelNamesPerSeriesValue: 10,
311+
MaxLabelNameLengthValue: 1024,
312+
MaxLabelValueLengthValue: 2048,
313+
}, "test-tenant", lbsCopy)
314+
315+
if tt.expectedErr != "" {
316+
require.Error(t, err)
317+
require.Equal(t, tt.expectedErr, err.Error())
318+
require.Equal(t, tt.expectedReason, ReasonOf(err))
319+
} else {
320+
require.NoError(t, err)
321+
if tt.expectedLabels != nil {
322+
require.Equal(t, len(tt.expectedLabels), len(lbsCopy), "label count mismatch")
323+
for i, expected := range tt.expectedLabels {
324+
require.Equal(t, expected.Name, lbsCopy[i].Name, "label name mismatch at index %d", i)
325+
require.Equal(t, expected.Value, lbsCopy[i].Value, "label value mismatch at index %d", i)
326+
}
327+
}
328+
}
329+
})
330+
}
331+
}
332+
155333
func Test_ValidateRangeRequest(t *testing.T) {
156334
now := model.Now()
157335
for _, tt := range []struct {

0 commit comments

Comments
 (0)