Skip to content

Commit 7dae9c2

Browse files
authored
CLOUDP-349087: Block simultaneous TLS disabling and scaling for ReplicaSets (#549)
# Summary We have code in the replica set controller dedicated to handling a scenario where the user disable TLS, and ascale their cluster at the same time. The test for this behaviour was broken. Because there were two functions with the name `test_tls_is_disabled_and_scaled_up`, so pytest would run only one of them. When I fixed the test, it failed: https://spruce.mongodb.com/task/mongodb_kubernetes_e2e_mdb_kind_ubi_cloudqa_e2e_disable_tls_scale_up_patch_dfb9424e9b34ddd048a725a9988114ca4032f9bf_68de79b2fccf070007a2c51e_25_10_02_13_10_12/tests?execution=4&sorts=STATUS%3AASC Which means the code to handle it was incorrect. My opinion is that it is better to block this change, rather than introducing complexity to handle it. It is not a common use case. A changelog entry was added. Question: should it be explicitly mentioned in our public documentation ? Old related PR (2021): 10gen/ops-manager-kubernetes#1444 And ticket: https://jira.mongodb.org/browse/CLOUDP-80768 ## Proof of Work Unit and e2e test for blocking the operation should pass. No regression due to the change in `updateOmDeploymentRs` should be observed. ## Checklist - [X] Have you linked a jira ticket and/or is the ticket in the title? - [X] Have you checked whether your jira ticket required DOCSP changes? - [X] Have you added changelog file? - use `skip-changelog` label if not needed - refer to [Changelog files and Release Notes](https://github.com/mongodb/mongodb-kubernetes/blob/master/CONTRIBUTING.md#changelog-files-and-release-notes) section in CONTRIBUTING.md for more details
1 parent 0617759 commit 7dae9c2

11 files changed

+197
-182
lines changed

.evergreen-tasks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ tasks:
502502
commands:
503503
- func: "e2e_test"
504504

505-
- name: e2e_disable_tls_scale_up
505+
- name: e2e_disable_tls_and_scale
506506
tags: [ "patch-run" ]
507507
commands:
508508
- func: "e2e_test"

.evergreen.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ task_groups:
679679
- e2e_tls_rs_external_access
680680
- e2e_replica_set_tls_require
681681
- e2e_replica_set_tls_certs_secret_prefix
682-
- e2e_disable_tls_scale_up
682+
- e2e_disable_tls_and_scale
683683
- e2e_replica_set_tls_require_and_disable
684684
# e2e_x509_task_group
685685
- e2e_configure_tls_and_x509_simultaneously_st

api/v1/mdb/mongodb_validation.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,25 @@ func noTopologyMigration(newObj, oldObj MongoDbSpec) v1.ValidationResult {
369369
return v1.ValidationSuccess()
370370
}
371371

372+
func noSimultaneousTLSDisablingAndScaling(newObj, oldObj MongoDbSpec) v1.ValidationResult {
373+
if newObj.ResourceType != ReplicaSet {
374+
return v1.ValidationSuccess()
375+
}
376+
377+
// Check if TLS is being disabled (was enabled, now disabled)
378+
tlsWasEnabled := oldObj.IsSecurityTLSConfigEnabled()
379+
tlsWillBeDisabled := !newObj.IsSecurityTLSConfigEnabled()
380+
381+
// Check if members count is changing
382+
membersChanging := oldObj.Members != newObj.Members
383+
384+
if tlsWasEnabled && tlsWillBeDisabled && membersChanging {
385+
return v1.ValidationError("Cannot disable TLS and change member count simultaneously. Please apply these changes separately.")
386+
}
387+
388+
return v1.ValidationSuccess()
389+
}
390+
372391
// specWithExactlyOneSchema checks that exactly one among "Project/OpsManagerConfig/CloudManagerConfig"
373392
// is configured, doing the "oneOf" validation in the webhook.
374393
func specWithExactlyOneSchema(d DbCommonSpec) v1.ValidationResult {
@@ -437,6 +456,7 @@ func (m *MongoDB) RunValidations(old *MongoDB) []v1.ValidationResult {
437456
updateValidators := []func(newObj MongoDbSpec, oldObj MongoDbSpec) v1.ValidationResult{
438457
resourceTypeImmutable,
439458
noTopologyMigration,
459+
noSimultaneousTLSDisablingAndScaling,
440460
}
441461

442462
var validationResults []v1.ValidationResult

api/v1/mdb/mongodb_validation_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,88 @@ func TestMongoDB_ResourceTypeImmutable(t *testing.T) {
8080
assert.Errorf(t, err, "'resourceType' cannot be changed once created")
8181
}
8282

83+
func TestMongoDB_NoSimultaneousTLSDisablingAndScaling(t *testing.T) {
84+
tests := []struct {
85+
name string
86+
oldTLSEnabled bool
87+
oldMembers int
88+
newTLSEnabled bool
89+
newMembers int
90+
expectError bool
91+
expectedErrorMessage string
92+
}{
93+
{
94+
name: "Simultaneous TLS disabling and scaling is blocked",
95+
oldTLSEnabled: true,
96+
oldMembers: 3,
97+
newTLSEnabled: false,
98+
newMembers: 5,
99+
expectError: true,
100+
expectedErrorMessage: "Cannot disable TLS and change member count simultaneously. Please apply these changes separately.",
101+
},
102+
{
103+
name: "TLS disabling without scaling is allowed",
104+
oldTLSEnabled: true,
105+
oldMembers: 3,
106+
newTLSEnabled: false,
107+
newMembers: 3,
108+
expectError: false,
109+
},
110+
{
111+
name: "Scaling without TLS changes is allowed",
112+
oldTLSEnabled: true,
113+
oldMembers: 3,
114+
newTLSEnabled: true,
115+
newMembers: 5,
116+
expectError: false,
117+
},
118+
{
119+
name: "TLS enabling with scaling is allowed",
120+
oldTLSEnabled: false,
121+
oldMembers: 3,
122+
newTLSEnabled: true,
123+
newMembers: 5,
124+
expectError: false,
125+
},
126+
}
127+
128+
for _, tt := range tests {
129+
t.Run(tt.name, func(t *testing.T) {
130+
// Build old ReplicaSet
131+
oldBuilder := NewReplicaSetBuilder()
132+
if tt.oldTLSEnabled {
133+
oldBuilder = oldBuilder.SetSecurityTLSEnabled()
134+
}
135+
oldRs := oldBuilder.Build()
136+
oldRs.Spec.CloudManagerConfig = &PrivateCloudConfig{
137+
ConfigMapRef: ConfigMapRef{Name: "cloud-manager"},
138+
}
139+
oldRs.Spec.Members = tt.oldMembers
140+
141+
// Build new ReplicaSet
142+
newBuilder := NewReplicaSetBuilder()
143+
if tt.newTLSEnabled {
144+
newBuilder = newBuilder.SetSecurityTLSEnabled()
145+
}
146+
newRs := newBuilder.Build()
147+
newRs.Spec.CloudManagerConfig = &PrivateCloudConfig{
148+
ConfigMapRef: ConfigMapRef{Name: "cloud-manager"},
149+
}
150+
newRs.Spec.Members = tt.newMembers
151+
152+
// Validate
153+
_, err := newRs.ValidateUpdate(oldRs)
154+
155+
if tt.expectError {
156+
require.Error(t, err)
157+
assert.Equal(t, tt.expectedErrorMessage, err.Error())
158+
} else {
159+
assert.NoError(t, err)
160+
}
161+
})
162+
}
163+
}
164+
83165
func TestSpecProjectOnlyOneValue(t *testing.T) {
84166
rs := NewReplicaSetBuilder().Build()
85167
rs.Spec.CloudManagerConfig = &PrivateCloudConfig{
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
kind: fix
3+
date: 2025-10-24
4+
---
5+
6+
* **ReplicaSet**: Blocked disabling TLS and changing member count simultaneously. These operations must now be applied separately to prevent configuration inconsistencies.

controllers/om/deployment.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -100,24 +100,6 @@ func NewDeployment() Deployment {
100100
return ans
101101
}
102102

103-
// TLSConfigurationWillBeDisabled checks that if applying this security configuration the Deployment
104-
// TLS configuration will go from Enabled -> Disabled.
105-
func (d Deployment) TLSConfigurationWillBeDisabled(security *mdbv1.Security) bool {
106-
tlsIsCurrentlyEnabled := false
107-
108-
// To detect that TLS is enabled, it is sufficient to check for the
109-
// d["tls"]["CAFilePath"] attribute to have a value different from nil.
110-
if tlsMap, ok := d["tls"]; ok {
111-
if caFilePath, ok := tlsMap.(map[string]interface{})["CAFilePath"]; ok {
112-
if caFilePath != nil {
113-
tlsIsCurrentlyEnabled = true
114-
}
115-
}
116-
}
117-
118-
return tlsIsCurrentlyEnabled && !security.IsTLSEnabled()
119-
}
120-
121103
// ConfigureTLS configures the deployment's TLS settings from the security
122104
// specification provided by the user in the mongodb resource.
123105
func (d Deployment) ConfigureTLS(security *mdbv1.Security, caFilePath string) {

controllers/om/deployment_test.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -152,20 +152,6 @@ func TestConfigureSSL_Deployment(t *testing.T) {
152152
assert.Equal(t, d["tls"], map[string]any{"clientCertificateMode": string(automationconfig.ClientCertificateModeOptional)})
153153
}
154154

155-
func TestTLSConfigurationWillBeDisabled(t *testing.T) {
156-
d := Deployment{}
157-
d.ConfigureTLS(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: false}}, util.CAFilePathInContainer)
158-
159-
assert.False(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: false}}))
160-
assert.False(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: true}}))
161-
162-
d = Deployment{}
163-
d.ConfigureTLS(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: true}}, util.CAFilePathInContainer)
164-
165-
assert.False(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: true}}))
166-
assert.True(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: false}}))
167-
}
168-
169155
// TestMergeDeployment_BigReplicaset ensures that adding a big replica set (> 7 members) works correctly and no more than
170156
// 7 voting members are added
171157
func TestMergeDeployment_BigReplicaset(t *testing.T) {

controllers/operator/mongodbreplicaset_controller.go

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -578,32 +578,8 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c
578578
}
579579

580580
caFilePath := fmt.Sprintf("%s/ca-pem", util.TLSCaMountPath)
581-
// If current operation is to Disable TLS, then we should the current members of the Replica Set,
582-
// this is, do not scale them up or down util TLS disabling has completed.
583-
shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, log, caFilePath, tlsCertPath)
584-
if err != nil && !isRecovering {
585-
return workflow.Failed(err)
586-
}
587-
588-
var updatedMembers int
589-
// This lock member logic will be removed soon, we should rather block possibility to disable tls + scale
590-
// Tracked in CLOUDP-349087
591-
if shouldLockMembers {
592-
// We should not add or remove members during this run, we'll wait for
593-
// TLS to be completely disabled first.
594-
// However, on first reconciliation (membersNumberBefore=0), we need to use replicasTarget
595-
// because the OM deployment is initialized with TLS enabled by default.
596-
log.Debugf("locking members for this reconciliation because TLS was disabled")
597-
if membersNumberBefore == 0 {
598-
updatedMembers = replicasTarget
599-
} else {
600-
updatedMembers = membersNumberBefore
601-
}
602-
} else {
603-
updatedMembers = replicasTarget
604-
}
605581

606-
replicaSet := replicaset.BuildFromMongoDBWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, rs, updatedMembers, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath)
582+
replicaSet := replicaset.BuildFromMongoDBWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, rs, replicasTarget, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath)
607583
processNames := replicaSet.GetProcessNames()
608584

609585
status, additionalReconciliationRequired := r.updateOmAuthentication(ctx, conn, processNames, rs, deploymentOptionsRS.agentCertPath, caFilePath, internalClusterCertPath, isRecovering, log)
@@ -668,40 +644,6 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c
668644
return workflow.OK()
669645
}
670646

671-
// updateOmDeploymentDisableTLSConfiguration checks if TLS configuration needs
672-
// to be disabled. In which case it will disable it and inform to the calling
673-
// function.
674-
func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, membersNumberBefore int, rs *mdbv1.MongoDB, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) {
675-
tlsConfigWasDisabled := false
676-
677-
err := conn.ReadUpdateDeployment(
678-
func(d om.Deployment) error {
679-
if !d.TLSConfigurationWillBeDisabled(rs.Spec.GetSecurity()) {
680-
return nil
681-
}
682-
683-
tlsConfigWasDisabled = true
684-
d.ConfigureTLS(rs.Spec.GetSecurity(), caFilePath)
685-
686-
// configure as many agents/Pods as we currently have, no more (in case
687-
// there's a scale up change at the same time).
688-
replicaSet := replicaset.BuildFromMongoDBWithReplicas(mongoDBImage, forceEnterprise, rs, membersNumberBefore, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath)
689-
690-
lastConfig, err := rs.GetLastAdditionalMongodConfigByType(mdbv1.ReplicaSetConfig)
691-
if err != nil {
692-
return err
693-
}
694-
695-
d.MergeReplicaSet(replicaSet, rs.Spec.AdditionalMongodConfig.ToMap(), lastConfig.ToMap(), log)
696-
697-
return nil
698-
},
699-
log,
700-
)
701-
702-
return tlsConfigWasDisabled, err
703-
}
704-
705647
func (r *ReconcileMongoDbReplicaSet) OnDelete(ctx context.Context, obj runtime.Object, log *zap.SugaredLogger) error {
706648
rs := obj.(*mdbv1.MongoDB)
707649

controllers/operator/mongodbreplicaset_controller_test.go

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func TestCreateReplicaSet(t *testing.T) {
6868

6969
connection := omConnectionFactory.GetConnection()
7070
connection.(*om.MockedOmConnection).CheckDeployment(t, deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rs), "auth", "ssl")
71-
connection.(*om.MockedOmConnection).CheckNumberOfUpdateRequests(t, 2)
71+
connection.(*om.MockedOmConnection).CheckNumberOfUpdateRequests(t, 1)
7272
}
7373

7474
func TestReplicaSetRace(t *testing.T) {
@@ -385,39 +385,6 @@ func TestCreateReplicaSet_TLS(t *testing.T) {
385385
assert.Equal(t, "OPTIONAL", sslConfig["clientCertificateMode"])
386386
}
387387

388-
// TestUpdateDeploymentTLSConfiguration a combination of tests checking that:
389-
//
390-
// TLS Disabled -> TLS Disabled: should not lock members
391-
// TLS Disabled -> TLS Enabled: should not lock members
392-
// TLS Enabled -> TLS Enabled: should not lock members
393-
// TLS Enabled -> TLS Disabled: *should lock members*
394-
func TestUpdateDeploymentTLSConfiguration(t *testing.T) {
395-
rsWithTLS := mdbv1.NewReplicaSetBuilder().SetSecurityTLSEnabled().Build()
396-
rsNoTLS := mdbv1.NewReplicaSetBuilder().Build()
397-
deploymentWithTLS := deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rsWithTLS)
398-
deploymentNoTLS := deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rsNoTLS)
399-
400-
// TLS Disabled -> TLS Disabled
401-
shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsNoTLS, zap.S(), util.CAFilePathInContainer, "")
402-
assert.NoError(t, err)
403-
assert.False(t, shouldLockMembers)
404-
405-
// TLS Disabled -> TLS Enabled
406-
shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsWithTLS, zap.S(), util.CAFilePathInContainer, "")
407-
assert.NoError(t, err)
408-
assert.False(t, shouldLockMembers)
409-
410-
// TLS Enabled -> TLS Enabled
411-
shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsWithTLS, zap.S(), util.CAFilePathInContainer, "")
412-
assert.NoError(t, err)
413-
assert.False(t, shouldLockMembers)
414-
415-
// TLS Enabled -> TLS Disabled
416-
shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsNoTLS, zap.S(), util.CAFilePathInContainer, "")
417-
assert.NoError(t, err)
418-
assert.True(t, shouldLockMembers)
419-
}
420-
421388
// TestCreateDeleteReplicaSet checks that no state is left in OpsManager on removal of the replicaset
422389
func TestCreateDeleteReplicaSet(t *testing.T) {
423390
ctx := context.Background()

0 commit comments

Comments
 (0)