Skip to content

Commit c29d3da

Browse files
authored
feat(lambda): Add widget and alarms on init duration (#646)
Fixes #462 Add `init_duration` to `LambdaFunctionMonitoring`. Monitoring INIT has become increasingly important due to: 1. Lambda now bills for the INIT phase. [Blog](https://aws.amazon.com/blogs/compute/aws-lambda-standardizes-billing-for-init-phase/). 2. With the growing adoption of Provisioned Concurrency and SnapStart, people want visibility into INIT durations. While making changes for the PR, I noticed an issue with how the function cost is calculated (#645). I’ll fix that next and also include INIT as part of the cost calculation. <img width="1458" alt="image" src="https://github.com/user-attachments/assets/5e0b53ce-304c-4ae6-94ff-e2d1e30b05ad" /> --- _By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license_
1 parent cf1aade commit c29d3da

File tree

6 files changed

+739
-37
lines changed

6 files changed

+739
-37
lines changed

API.md

Lines changed: 193 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/common/monitoring/alarms/LatencyAlarmFactory.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,41 @@ export class LatencyAlarmFactory {
246246
});
247247
}
248248

249+
addCustomDurationAlarm(
250+
metric: MetricWithAlarmSupport,
251+
latencyType: LatencyType,
252+
props: DurationThreshold,
253+
durationName: string,
254+
disambiguator?: string,
255+
additionalAlarmNameSuffix: string | undefined = undefined,
256+
) {
257+
const alarmNameSuffix = [
258+
durationName,
259+
latencyType,
260+
additionalAlarmNameSuffix,
261+
]
262+
.filter((i) => i !== undefined)
263+
.join("-");
264+
265+
return this.alarmFactory.addAlarm(metric, {
266+
treatMissingData:
267+
props.treatMissingDataOverride ?? TreatMissingData.NOT_BREACHING,
268+
comparisonOperator:
269+
props.comparisonOperatorOverride ??
270+
ComparisonOperator.GREATER_THAN_THRESHOLD,
271+
...props,
272+
disambiguator,
273+
threshold: props.maxDuration.toMilliseconds({ integral: false }),
274+
alarmNameSuffix,
275+
// we will dedupe any kind of latency issue to the same ticket
276+
alarmDedupeStringSuffix: this.alarmFactory
277+
.shouldUseDefaultDedupeForLatency
278+
? `Any${durationName}`
279+
: alarmNameSuffix,
280+
alarmDescription: `${latencyType} ${durationName} is too long.`,
281+
});
282+
}
283+
249284
addJvmGarbageCollectionDurationAlarm(
250285
metric: MetricWithAlarmSupport,
251286
latencyType: LatencyType,

lib/monitoring/aws-lambda/LambdaFunctionEnhancedMetricFactory.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,30 @@ export class LambdaFunctionEnhancedMetricFactory extends BaseMetricFactory<Lambd
7474
);
7575
}
7676

77+
enhancedMetricMaxInitDuration() {
78+
return this.enhancedMetric(
79+
"init_duration",
80+
MetricStatistic.MAX,
81+
"InitDuration.Max",
82+
);
83+
}
84+
85+
enhancedMetricP90InitDuration() {
86+
return this.enhancedMetric(
87+
"init_duration",
88+
MetricStatistic.P90,
89+
"InitDuration.P90",
90+
);
91+
}
92+
93+
enhancedMetricAvgInitDuration() {
94+
return this.enhancedMetric(
95+
"init_duration",
96+
MetricStatistic.AVERAGE,
97+
"InitDuration.Avg",
98+
);
99+
}
100+
77101
enhancedMetricFunctionCost() {
78102
return this.metricFactory.createMetricMath(
79103
"memory_utilization * duration",

lib/monitoring/aws-lambda/LambdaFunctionMonitoring.ts

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,20 @@ export interface LambdaFunctionMonitoringOptions extends BaseMonitoringProps {
144144
string,
145145
UsageThreshold
146146
>;
147+
148+
// Enhanced init duration metrics that are time-based
149+
readonly addEnhancedMonitoringMaxInitDurationAlarm?: Record<
150+
string,
151+
DurationThreshold
152+
>;
153+
readonly addEnhancedMonitoringP90InitDurationAlarm?: Record<
154+
string,
155+
DurationThreshold
156+
>;
157+
readonly addEnhancedMonitoringAvgInitDurationAlarm?: Record<
158+
string,
159+
DurationThreshold
160+
>;
147161
}
148162

149163
export interface LambdaFunctionMonitoringProps
@@ -174,6 +188,7 @@ export class LambdaFunctionMonitoring extends Monitoring {
174188
readonly memoryUsageAnnotations: HorizontalAnnotation[];
175189
readonly maxIteratorAgeAnnotations: HorizontalAnnotation[];
176190
readonly maxOffsetLagAnnotations: HorizontalAnnotation[];
191+
readonly initDurationAnnotations: HorizontalAnnotation[];
177192

178193
readonly tpsMetric: MetricWithAlarmSupport;
179194
readonly p50LatencyMetric: MetricWithAlarmSupport;
@@ -202,6 +217,9 @@ export class LambdaFunctionMonitoring extends Monitoring {
202217
readonly enhancedMonitoringMaxMemoryUtilizationMetric?: MetricWithAlarmSupport;
203218
readonly enhancedMonitoringP90MemoryUtilizationMetric?: MetricWithAlarmSupport;
204219
readonly enhancedMonitoringAvgMemoryUtilizationMetric?: MetricWithAlarmSupport;
220+
readonly enhancedMonitoringMaxInitDurationMetric?: MetricWithAlarmSupport;
221+
readonly enhancedMonitoringP90InitDurationMetric?: MetricWithAlarmSupport;
222+
readonly enhancedMonitoringAvgInitDurationMetric?: MetricWithAlarmSupport;
205223
readonly enhancedMetricFunctionCostMetric?: MetricWithAlarmSupport;
206224

207225
constructor(scope: MonitoringScope, props: LambdaFunctionMonitoringProps) {
@@ -238,6 +256,7 @@ export class LambdaFunctionMonitoring extends Monitoring {
238256
this.memoryUsageAnnotations = [];
239257
this.maxIteratorAgeAnnotations = [];
240258
this.maxOffsetLagAnnotations = [];
259+
this.initDurationAnnotations = [];
241260

242261
this.metricFactory = new LambdaFunctionMetricFactory(
243262
scope.createMetricFactory(),
@@ -295,6 +314,12 @@ export class LambdaFunctionMonitoring extends Monitoring {
295314
this.enhancedMetricFactory.enhancedMetricP90MemoryUtilization();
296315
this.enhancedMonitoringAvgMemoryUtilizationMetric =
297316
this.enhancedMetricFactory.enhancedMetricAvgMemoryUtilization();
317+
this.enhancedMonitoringMaxInitDurationMetric =
318+
this.enhancedMetricFactory.enhancedMetricMaxInitDuration();
319+
this.enhancedMonitoringP90InitDurationMetric =
320+
this.enhancedMetricFactory.enhancedMetricP90InitDuration();
321+
this.enhancedMonitoringAvgInitDurationMetric =
322+
this.enhancedMetricFactory.enhancedMetricAvgInitDuration();
298323
this.enhancedMetricFunctionCostMetric =
299324
this.enhancedMetricFactory.enhancedMetricFunctionCost();
300325

@@ -382,6 +407,51 @@ export class LambdaFunctionMonitoring extends Monitoring {
382407
this.memoryUsageAnnotations.push(createdAlarm.annotation);
383408
this.addAlarm(createdAlarm);
384409
}
410+
for (const disambiguator in props.addEnhancedMonitoringMaxInitDurationAlarm) {
411+
const alarmProps =
412+
props.addEnhancedMonitoringMaxInitDurationAlarm[disambiguator];
413+
const createdAlarm = this.latencyAlarmFactory.addCustomDurationAlarm(
414+
/* eslint-disable @typescript-eslint/no-non-null-assertion */
415+
this.enhancedMonitoringMaxInitDurationMetric!,
416+
/* eslint-enable @typescript-eslint/no-non-null-assertion */
417+
LatencyType.MAX,
418+
alarmProps,
419+
"InitDuration",
420+
disambiguator,
421+
);
422+
this.initDurationAnnotations.push(createdAlarm.annotation);
423+
this.addAlarm(createdAlarm);
424+
}
425+
for (const disambiguator in props.addEnhancedMonitoringP90InitDurationAlarm) {
426+
const alarmProps =
427+
props.addEnhancedMonitoringP90InitDurationAlarm[disambiguator];
428+
const createdAlarm = this.latencyAlarmFactory.addCustomDurationAlarm(
429+
/* eslint-disable @typescript-eslint/no-non-null-assertion */
430+
this.enhancedMonitoringP90InitDurationMetric!,
431+
/* eslint-enable @typescript-eslint/no-non-null-assertion */
432+
LatencyType.P90,
433+
alarmProps,
434+
"InitDuration",
435+
disambiguator,
436+
);
437+
this.initDurationAnnotations.push(createdAlarm.annotation);
438+
this.addAlarm(createdAlarm);
439+
}
440+
for (const disambiguator in props.addEnhancedMonitoringAvgInitDurationAlarm) {
441+
const alarmProps =
442+
props.addEnhancedMonitoringAvgInitDurationAlarm[disambiguator];
443+
const createdAlarm = this.latencyAlarmFactory.addCustomDurationAlarm(
444+
/* eslint-disable @typescript-eslint/no-non-null-assertion */
445+
this.enhancedMonitoringAvgInitDurationMetric!,
446+
/* eslint-enable @typescript-eslint/no-non-null-assertion */
447+
LatencyType.AVERAGE,
448+
alarmProps,
449+
"InitDuration",
450+
disambiguator,
451+
);
452+
this.initDurationAnnotations.push(createdAlarm.annotation);
453+
this.addAlarm(createdAlarm);
454+
}
385455
}
386456
for (const disambiguator in props.addLatencyP50Alarm) {
387457
const alarmProps = props.addLatencyP50Alarm[disambiguator];
@@ -653,15 +723,19 @@ export class LambdaFunctionMonitoring extends Monitoring {
653723
widgets.push(
654724
new Row(
655725
this.createLambdaInsightsCpuWidget(
656-
ThirdWidth,
726+
QuarterWidth,
657727
DefaultGraphWidgetHeight,
658728
),
659729
this.createLambdaInsightsMemoryWidget(
660-
ThirdWidth,
730+
QuarterWidth,
731+
DefaultGraphWidgetHeight,
732+
),
733+
this.createLambdaInsightsInitDurationWidget(
734+
QuarterWidth,
661735
DefaultGraphWidgetHeight,
662736
),
663737
this.createLambdaInsightsFunctionCostWidget(
664-
ThirdWidth,
738+
QuarterWidth,
665739
DefaultGraphWidgetHeight,
666740
),
667741
),
@@ -813,6 +887,23 @@ export class LambdaFunctionMonitoring extends Monitoring {
813887
});
814888
}
815889

890+
createLambdaInsightsInitDurationWidget(width: number, height: number) {
891+
return new GraphWidget({
892+
width,
893+
height,
894+
title: "Init Duration",
895+
left: [
896+
/* eslint-disable @typescript-eslint/no-non-null-assertion */
897+
this.enhancedMonitoringMaxInitDurationMetric!,
898+
this.enhancedMonitoringP90InitDurationMetric!,
899+
this.enhancedMonitoringAvgInitDurationMetric!,
900+
/* eslint-enable @typescript-eslint/no-non-null-assertion */
901+
],
902+
leftYAxis: TimeAxisMillisFromZero,
903+
leftAnnotations: this.initDurationAnnotations,
904+
});
905+
}
906+
816907
createLambdaInsightsFunctionCostWidget(width: number, height: number) {
817908
return new GraphWidget({
818909
width,

test/monitoring/aws-lambda/LambdaFunctionMonitoring.test.ts

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,21 @@ test("snapshot test: all alarms, alarmPrefix on error dedupeString", () => {
357357
maxUsagePercent: 50,
358358
},
359359
},
360+
addEnhancedMonitoringMaxInitDurationAlarm: {
361+
Warning: {
362+
maxDuration: Duration.millis(200),
363+
},
364+
},
365+
addEnhancedMonitoringP90InitDurationAlarm: {
366+
Warning: {
367+
maxDuration: Duration.millis(150),
368+
},
369+
},
370+
addEnhancedMonitoringAvgInitDurationAlarm: {
371+
Warning: {
372+
maxDuration: Duration.millis(100),
373+
},
374+
},
360375
useCreatedAlarms: {
361376
consume(alarms: AlarmWithAnnotation[]) {
362377
numAlarmsCreated = alarms.length;
@@ -365,7 +380,7 @@ test("snapshot test: all alarms, alarmPrefix on error dedupeString", () => {
365380
});
366381

367382
addMonitoringDashboardsToStack(stack, monitoring);
368-
expect(numAlarmsCreated).toStrictEqual(20);
383+
expect(numAlarmsCreated).toStrictEqual(23);
369384
expect(Template.fromStack(stack)).toMatchSnapshot();
370385
});
371386

@@ -500,6 +515,21 @@ test("snapshot test: all alarms, alarmPrefix on latency dedupeString", () => {
500515
maxUsagePercent: 50,
501516
},
502517
},
518+
addEnhancedMonitoringMaxInitDurationAlarm: {
519+
Warning: {
520+
maxDuration: Duration.millis(200),
521+
},
522+
},
523+
addEnhancedMonitoringP90InitDurationAlarm: {
524+
Warning: {
525+
maxDuration: Duration.millis(150),
526+
},
527+
},
528+
addEnhancedMonitoringAvgInitDurationAlarm: {
529+
Warning: {
530+
maxDuration: Duration.millis(100),
531+
},
532+
},
503533
useCreatedAlarms: {
504534
consume(alarms: AlarmWithAnnotation[]) {
505535
numAlarmsCreated = alarms.length;
@@ -508,7 +538,7 @@ test("snapshot test: all alarms, alarmPrefix on latency dedupeString", () => {
508538
});
509539

510540
addMonitoringDashboardsToStack(stack, monitoring);
511-
expect(numAlarmsCreated).toStrictEqual(20);
541+
expect(numAlarmsCreated).toStrictEqual(23);
512542
expect(Template.fromStack(stack)).toMatchSnapshot();
513543
});
514544

@@ -620,6 +650,21 @@ test("doesn't create alarms for enhanced Lambda Insights metrics if not enabled"
620650
maxUsagePercent: 50,
621651
},
622652
},
653+
addEnhancedMonitoringMaxInitDurationAlarm: {
654+
Warning: {
655+
maxDuration: Duration.millis(200),
656+
},
657+
},
658+
addEnhancedMonitoringP90InitDurationAlarm: {
659+
Warning: {
660+
maxDuration: Duration.millis(150),
661+
},
662+
},
663+
addEnhancedMonitoringAvgInitDurationAlarm: {
664+
Warning: {
665+
maxDuration: Duration.millis(100),
666+
},
667+
},
623668
useCreatedAlarms: {
624669
consume(alarms: AlarmWithAnnotation[]) {
625670
numAlarmsCreated = alarms.length;

0 commit comments

Comments
 (0)