Skip to content

Commit dcc8aef

Browse files
authored
Update autoscaling defaults (#909)
1 parent 83623a2 commit dcc8aef

File tree

3 files changed

+25
-25
lines changed

3 files changed

+25
-25
lines changed

docs/deployments/api-configuration.md

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ Reference the section below which corresponds to your Predictor type: [Python](#
3434
max_replica_concurrency: <int> # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024)
3535
window: <duration> # the time over which to average the API's concurrency (default: 60s)
3636
downscale_stabilization_period: <duration> # the API will not scale below the highest recommendation made during this period (default: 5m)
37-
upscale_stabilization_period: <duration> # the API will not scale above the lowest recommendation made during this period (default: 0m)
38-
max_downscale_factor: <float> # the maximum factor by which to scale down the API on a single scaling event (default: 0.5)
39-
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 10)
40-
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1)
41-
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1)
37+
upscale_stabilization_period: <duration> # the API will not scale above the lowest recommendation made during this period (default: 1m)
38+
max_downscale_factor: <float> # the maximum factor by which to scale down the API on a single scaling event (default: 0.75)
39+
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
40+
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
41+
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
4242
update_strategy:
4343
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
4444
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
@@ -76,11 +76,11 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
7676
max_replica_concurrency: <int> # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024)
7777
window: <duration> # the time over which to average the API's concurrency (default: 60s)
7878
downscale_stabilization_period: <duration> # the API will not scale below the highest recommendation made during this period (default: 5m)
79-
upscale_stabilization_period: <duration> # the API will not scale above the lowest recommendation made during this period (default: 0m)
80-
max_downscale_factor: <float> # the maximum factor by which to scale down the API on a single scaling event (default: 0.5)
81-
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 10)
82-
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1)
83-
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1)
79+
upscale_stabilization_period: <duration> # the API will not scale above the lowest recommendation made during this period (default: 1m)
80+
max_downscale_factor: <float> # the maximum factor by which to scale down the API on a single scaling event (default: 0.75)
81+
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
82+
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
83+
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
8484
update_strategy:
8585
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
8686
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
@@ -117,11 +117,11 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
117117
max_replica_concurrency: <int> # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024)
118118
window: <duration> # the time over which to average the API's concurrency (default: 60s)
119119
downscale_stabilization_period: <duration> # the API will not scale below the highest recommendation made during this period (default: 5m)
120-
upscale_stabilization_period: <duration> # the API will not scale above the lowest recommendation made during this period (default: 0m)
121-
max_downscale_factor: <float> # the maximum factor by which to scale down the API on a single scaling event (default: 0.5)
122-
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 10)
123-
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1)
124-
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1)
120+
upscale_stabilization_period: <duration> # the API will not scale above the lowest recommendation made during this period (default: 1m)
121+
max_downscale_factor: <float> # the maximum factor by which to scale down the API on a single scaling event (default: 0.75)
122+
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
123+
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
124+
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
125125
update_strategy:
126126
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
127127
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)

docs/deployments/autoscaling.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ Cortex autoscales your web services based on your configuration.
3636

3737
* `downscale_stabilization_period` (default: 5m): The API will not scale below the highest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the max of the current recommendation and all recommendations made during the `downscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to decreased traffic, and will reduce thrashing.
3838

39-
* `upscale_stabilization_period` (default: 0m): The API will not scale above the lowest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the min of the current recommendation and all recommendations made during the `upscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to increased traffic, and will reduce thrashing. The default is 0 minutes, which means that the cluster will react quickly to increased traffic.
39+
* `upscale_stabilization_period` (default: 1m): The API will not scale above the lowest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the min of the current recommendation and all recommendations made during the `upscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to increased traffic, and will reduce thrashing. The default is 0 minutes, which means that the cluster will react quickly to increased traffic.
4040

41-
* `max_downscale_factor` (default: 0.5): The maximum factor by which to scale down the API on a single scaling event. For example, if `max_downscale_factor` is 0.5 and there are 10 running replicas, the autoscaler will not recommend fewer than 5 replicas. Increasing this number will allow the cluster to shrink more quickly in response to dramatic dips in traffic.
41+
* `max_downscale_factor` (default: 0.75): The maximum factor by which to scale down the API on a single scaling event. For example, if `max_downscale_factor` is 0.5 and there are 10 running replicas, the autoscaler will not recommend fewer than 5 replicas. Increasing this number will allow the cluster to shrink more quickly in response to dramatic dips in traffic.
4242

43-
* `max_upscale_factor` (default: 10): The maximum factor by which to scale up the API on a single scaling event. For example, if `max_upscale_factor` is 10 and there are 5 running replicas, the autoscaler will not recommend more than 50 replicas. Increasing this number will allow the cluster to grow more quickly in response to dramatic spikes in traffic.
43+
* `max_upscale_factor` (default: 1.5): The maximum factor by which to scale up the API on a single scaling event. For example, if `max_upscale_factor` is 10 and there are 5 running replicas, the autoscaler will not recommend more than 50 replicas. Increasing this number will allow the cluster to grow more quickly in response to dramatic spikes in traffic.
4444

45-
* `downscale_tolerance` (default: 0.1): Any recommendation falling within this factor below the current number of replicas will not trigger a scale down event. For example, if `downscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 18 or 19 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
45+
* `downscale_tolerance` (default: 0.05): Any recommendation falling within this factor below the current number of replicas will not trigger a scale down event. For example, if `downscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 18 or 19 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
4646

47-
* `upscale_tolerance` (default: 0.1): Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event. For example, if `upscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 21 or 22 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
47+
* `upscale_tolerance` (default: 0.05): Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event. For example, if `upscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 21 or 22 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
4848

4949
## Autoscaling Nodes
5050

pkg/operator/operator/validations.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ var _autoscalingValidation = &cr.StructFieldValidation{
255255
{
256256
StructField: "UpscaleStabilizationPeriod",
257257
StringValidation: &cr.StringValidation{
258-
Default: "0s",
258+
Default: "1m",
259259
},
260260
Parser: cr.DurationParser(&cr.DurationValidation{
261261
GreaterThanOrEqualTo: pointer.Duration(libtime.MustParseDuration("0s")),
@@ -264,30 +264,30 @@ var _autoscalingValidation = &cr.StructFieldValidation{
264264
{
265265
StructField: "MaxDownscaleFactor",
266266
Float64Validation: &cr.Float64Validation{
267-
Default: 0.5,
267+
Default: 0.75,
268268
GreaterThanOrEqualTo: pointer.Float64(0),
269269
LessThan: pointer.Float64(1),
270270
},
271271
},
272272
{
273273
StructField: "MaxUpscaleFactor",
274274
Float64Validation: &cr.Float64Validation{
275-
Default: 10,
275+
Default: 1.5,
276276
GreaterThan: pointer.Float64(1),
277277
},
278278
},
279279
{
280280
StructField: "DownscaleTolerance",
281281
Float64Validation: &cr.Float64Validation{
282-
Default: 0.1,
282+
Default: 0.05,
283283
GreaterThanOrEqualTo: pointer.Float64(0),
284284
LessThan: pointer.Float64(1),
285285
},
286286
},
287287
{
288288
StructField: "UpscaleTolerance",
289289
Float64Validation: &cr.Float64Validation{
290-
Default: 0.1,
290+
Default: 0.05,
291291
GreaterThanOrEqualTo: pointer.Float64(0),
292292
},
293293
},

0 commit comments

Comments
 (0)