@@ -23,7 +23,6 @@ package resources
2323import (
2424 "context"
2525 "fmt"
26- "time"
2726
2827 policyv1 "k8s.io/api/policy/v1"
2928 policyv1beta1 "k8s.io/api/policy/v1beta1"
@@ -35,7 +34,6 @@ import (
3534 "github.com/arangodb/kube-arangodb/pkg/util/globals"
3635 "github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
3736 "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/kerrors"
38- "github.com/arangodb/kube-arangodb/pkg/util/timer"
3937)
4038
4139func min (a int , b int ) int {
@@ -50,36 +48,45 @@ func (r *Resources) EnsurePDBs(ctx context.Context) error {
5048
5149 // Only in Cluster and Production Mode
5250 spec := r .context .GetSpec ()
51+ status := r .context .GetStatus ()
5352 if spec .IsProduction () && spec .GetMode ().IsCluster () {
5453
5554 // We want to lose at most one agent and dbserver.
5655 // Coordinators are not that critical. To keep the service available two should be enough
5756 minAgents := spec .GetServerGroupSpec (api .ServerGroupAgents ).GetCount () - 1
57+ currAgents := status .Members .Agents .MembersReady ()
58+
5859 minDBServers := spec .GetServerGroupSpec (api .ServerGroupDBServers ).GetCount () - 1
60+ currDBServers := status .Members .DBServers .MembersReady ()
61+
5962 minCoordinators := min (spec .GetServerGroupSpec (api .ServerGroupCoordinators ).GetCount ()- 1 , 2 )
63+ currCoordinators := status .Members .Coordinators .MembersReady ()
6064
6165 // Setting those to zero triggers a remove of the PDB
62- minSyncMaster := 0
63- minSyncWorker := 0
66+ minSyncMaster , currSyncMaster := 0 , 0
67+ minSyncWorker , currSyncWorker := 0 , 0
6468 if r .context .IsSyncEnabled () {
6569 minSyncMaster = spec .GetServerGroupSpec (api .ServerGroupSyncMasters ).GetCount () - 1
70+ currSyncMaster = status .Members .SyncMasters .MembersReady ()
71+
6672 minSyncWorker = spec .GetServerGroupSpec (api .ServerGroupSyncWorkers ).GetCount () - 1
73+ currSyncWorker = status .Members .SyncWorkers .MembersReady ()
6774 }
6875
6976 // Ensure all PDBs as calculated
70- if err := r .ensurePDBForGroup (ctx , api .ServerGroupAgents , minAgents ); err != nil {
77+ if err := r .ensurePDBForGroup (ctx , api .ServerGroupAgents , minAgents , currAgents ); err != nil {
7178 return err
7279 }
73- if err := r .ensurePDBForGroup (ctx , api .ServerGroupDBServers , minDBServers ); err != nil {
80+ if err := r .ensurePDBForGroup (ctx , api .ServerGroupDBServers , minDBServers , currDBServers ); err != nil {
7481 return err
7582 }
76- if err := r .ensurePDBForGroup (ctx , api .ServerGroupCoordinators , minCoordinators ); err != nil {
83+ if err := r .ensurePDBForGroup (ctx , api .ServerGroupCoordinators , minCoordinators , currCoordinators ); err != nil {
7784 return err
7885 }
79- if err := r .ensurePDBForGroup (ctx , api .ServerGroupSyncMasters , minSyncMaster ); err != nil {
86+ if err := r .ensurePDBForGroup (ctx , api .ServerGroupSyncMasters , minSyncMaster , currSyncMaster ); err != nil {
8087 return err
8188 }
82- if err := r .ensurePDBForGroup (ctx , api .ServerGroupSyncWorkers , minSyncWorker ); err != nil {
89+ if err := r .ensurePDBForGroup (ctx , api .ServerGroupSyncWorkers , minSyncWorker , currSyncWorker ); err != nil {
8390 return err
8491 }
8592 }
@@ -122,7 +129,7 @@ func newPDBV1(minAvail int, deplname string, group api.ServerGroup, owner meta.O
122129}
123130
124131// ensurePDBForGroup ensure pdb for a specific server group, if wantMinAvail is zero or less, the PDB is removed and not recreated
125- func (r * Resources ) ensurePDBForGroup (ctx context.Context , group api.ServerGroup , wantedMinAvail int ) error {
132+ func (r * Resources ) ensurePDBForGroup (ctx context.Context , group api.ServerGroup , wantedMinAvail , current int ) error {
126133 if wantedMinAvail < 0 {
127134 // Enforce removal
128135 wantedMinAvail = 0
@@ -134,101 +141,90 @@ func (r *Resources) ensurePDBForGroup(ctx context.Context, group api.ServerGroup
134141 cache := r .context .ACS ().CurrentClusterCache ()
135142 pdbMod := cache .PodDisruptionBudgetsModInterface ()
136143
137- for {
138- var minAvailable * intstr.IntOrString
139- var deletionTimestamp * meta.Time
144+ var minAvailable * intstr.IntOrString
145+ var deletionTimestamp * meta.Time
140146
141- err := globals .GetGlobalTimeouts ().Kubernetes ().RunWithTimeout (ctx , func (ctxChild context.Context ) error {
142- if inspector , err := cache .PodDisruptionBudget ().V1 (); err == nil {
143- if pdb , err := inspector .Read ().Get (ctxChild , pdbName , meta.GetOptions {}); err != nil {
144- return err
145- } else {
146- minAvailable = pdb .Spec .MinAvailable
147- deletionTimestamp = pdb .GetDeletionTimestamp ()
148- }
149- } else if inspector , err := cache .PodDisruptionBudget ().V1Beta1 (); err == nil {
150- if pdb , err := inspector .Read ().Get (ctxChild , pdbName , meta.GetOptions {}); err != nil {
151- return err
152- } else {
153- minAvailable = pdb .Spec .MinAvailable
154- deletionTimestamp = pdb .GetDeletionTimestamp ()
155- }
147+ err := globals .GetGlobalTimeouts ().Kubernetes ().RunWithTimeout (ctx , func (ctxChild context.Context ) error {
148+ if inspector , err := cache .PodDisruptionBudget ().V1 (); err == nil {
149+ if pdb , err := inspector .Read ().Get (ctxChild , pdbName , meta.GetOptions {}); err != nil {
150+ return err
156151 } else {
157- return errors .WithStack (err )
152+ minAvailable = pdb .Spec .MinAvailable
153+ deletionTimestamp = pdb .GetDeletionTimestamp ()
158154 }
159-
160- return nil
161- })
162-
163- if kerrors .IsNotFound (err ) {
164- if wantedMinAvail != 0 {
165- // No PDB found - create new.
166- log .Debug ("Creating new PDB" )
167- err = globals .GetGlobalTimeouts ().Kubernetes ().RunWithTimeout (ctx , func (ctxChild context.Context ) error {
168- var errInternal error
169-
170- if cache .PodDisruptionBudget ().Version ().IsV1 () {
171- pdb := newPDBV1 (wantedMinAvail , deplName , group , r .context .GetAPIObject ().AsOwner ())
172- _ , errInternal = pdbMod .V1 ().Create (ctxChild , pdb , meta.CreateOptions {})
173- } else {
174- pdb := newPDBV1Beta1 (wantedMinAvail , deplName , group , r .context .GetAPIObject ().AsOwner ())
175- _ , errInternal = pdbMod .V1Beta1 ().Create (ctxChild , pdb , meta.CreateOptions {})
176- }
177-
178- return errInternal
179- })
180-
181- if err != nil {
182- log .Err (err ).Error ("failed to create PDB" )
183- return errors .WithStack (err )
184- }
155+ } else if inspector , err := cache .PodDisruptionBudget ().V1Beta1 (); err == nil {
156+ if pdb , err := inspector .Read ().Get (ctxChild , pdbName , meta.GetOptions {}); err != nil {
157+ return err
158+ } else {
159+ minAvailable = pdb .Spec .MinAvailable
160+ deletionTimestamp = pdb .GetDeletionTimestamp ()
185161 }
186-
187- return nil
188- } else if err != nil {
189- // Some other error than not found.
162+ } else {
190163 return errors .WithStack (err )
191164 }
192165
193- // PDB v1 or v1beta1 is here.
194- if minAvailable .IntValue () == wantedMinAvail && wantedMinAvail != 0 {
195- return nil
196- }
197- // Update for PDBs is forbidden, thus one has to delete it and then create it again
198- // Otherwise delete it if wantedMinAvail is zero
199- log .Int ("wanted-min-avail" , wantedMinAvail ).
200- Int ("current-min-avail" , minAvailable .IntValue ()).
201- Debug ("Recreating PDB" )
202-
203- // Trigger deletion only if not already deleted.
204- if deletionTimestamp == nil {
205- // Update the PDB.
206- err := globals .GetGlobalTimeouts ().Kubernetes ().RunWithTimeout (ctx , func (ctxChild context.Context ) error {
166+ return nil
167+ })
168+
169+ if kerrors .IsNotFound (err ) {
170+ if wantedMinAvail != 0 && wantedMinAvail < current {
171+ // No PDB found - create new.
172+ log .Debug ("Creating new PDB" )
173+ err = globals .GetGlobalTimeouts ().Kubernetes ().RunWithTimeout (ctx , func (ctxChild context.Context ) error {
174+ var errInternal error
175+
207176 if cache .PodDisruptionBudget ().Version ().IsV1 () {
208- return pdbMod .V1 ().Delete (ctxChild , pdbName , meta.DeleteOptions {})
177+ pdb := newPDBV1 (wantedMinAvail , deplName , group , r .context .GetAPIObject ().AsOwner ())
178+ _ , errInternal = pdbMod .V1 ().Create (ctxChild , pdb , meta.CreateOptions {})
179+ } else {
180+ pdb := newPDBV1Beta1 (wantedMinAvail , deplName , group , r .context .GetAPIObject ().AsOwner ())
181+ _ , errInternal = pdbMod .V1Beta1 ().Create (ctxChild , pdb , meta.CreateOptions {})
209182 }
210183
211- return pdbMod . V1Beta1 (). Delete ( ctxChild , pdbName , meta. DeleteOptions {})
184+ return errInternal
212185 })
213- if err != nil && ! kerrors .IsNotFound (err ) {
214- log .Err (err ).Error ("PDB deletion failed" )
186+
187+ if err != nil {
188+ log .Err (err ).Error ("failed to create PDB" )
215189 return errors .WithStack (err )
216190 }
217- } else {
218- log .Debug ("PDB already deleted" )
219- }
220- // Exit here if deletion was intended
221- if wantedMinAvail == 0 {
222- return nil
223191 }
224192
225- log .Debug ("Retry loop for PDB" )
226- select {
227- case <- ctx .Done ():
228- return ctx .Err ()
229- case <- timer .After (time .Second ):
193+ return nil
194+ } else if err != nil {
195+ // Some other error than not found.
196+ return errors .WithStack (err )
197+ }
198+
199+ // PDB v1 or v1beta1 is here.
200+ if minAvailable .IntValue () == wantedMinAvail && wantedMinAvail != 0 {
201+ return nil
202+ }
203+ // Update for PDBs is forbidden, thus one has to delete it and then create it again
204+ // Otherwise delete it if wantedMinAvail is zero
205+ log .Int ("wanted-min-avail" , wantedMinAvail ).
206+ Int ("current-min-avail" , minAvailable .IntValue ()).
207+ Debug ("Recreating PDB" )
208+
209+ // Trigger deletion only if not already deleted.
210+ if deletionTimestamp == nil {
211+ // Update the PDB.
212+ err := globals .GetGlobalTimeouts ().Kubernetes ().RunWithTimeout (ctx , func (ctxChild context.Context ) error {
213+ if cache .PodDisruptionBudget ().Version ().IsV1 () {
214+ return pdbMod .V1 ().Delete (ctxChild , pdbName , meta.DeleteOptions {})
215+ }
216+
217+ return pdbMod .V1Beta1 ().Delete (ctxChild , pdbName , meta.DeleteOptions {})
218+ })
219+ if err != nil && ! kerrors .IsNotFound (err ) {
220+ log .Err (err ).Error ("PDB deletion failed" )
221+ return errors .WithStack (err )
230222 }
223+ } else {
224+ log .Debug ("PDB already deleted" )
231225 }
226+
227+ return nil
232228}
233229
234230func newFromInt (v int ) * intstr.IntOrString {
0 commit comments