Skip to content

Commit d1a3bee

Browse files
authored
Merge pull request #20896 from k8s-infra-cherrypick-robot/cherry-pick-20894-to-release-3.6
[release-3.6] Fix the issue that `--force-new-cluster` can't clean up learner after creating v2 snapshot
2 parents 52b2948 + 523100b commit d1a3bee

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

server/storage/util.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ func GetEffectiveNodeIDsFromWALEntries(lg *zap.Logger, snap *raftpb.Snapshot, en
131131
for _, id := range snap.Metadata.ConfState.Voters {
132132
ids[id] = true
133133
}
134+
for _, id := range snap.Metadata.ConfState.Learners {
135+
ids[id] = true
136+
}
134137
}
135138
for _, e := range ents {
136139
if e.Type != raftpb.EntryConfChange {

tests/e2e/force_new_cluster_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package e2e
1919
import (
2020
"context"
2121
"encoding/json"
22+
"strings"
2223
"testing"
2324
"time"
2425

@@ -30,6 +31,7 @@ import (
3031
"go.etcd.io/etcd/server/v3/storage/schema"
3132
"go.etcd.io/etcd/tests/v3/framework/config"
3233
"go.etcd.io/etcd/tests/v3/framework/e2e"
34+
"go.etcd.io/etcd/tests/v3/framework/testutils"
3335
)
3436

3537
// TestForceNewCluster verified that etcd works as expected when --force-new-cluster.
@@ -115,6 +117,71 @@ func TestForceNewCluster_MemberCount(t *testing.T) {
115117
require.Len(t, members, 1)
116118
}
117119

120+
// TestForceNewCluster_AddLearner_MemberCount verifies that `--force-new-cluster`
121+
// should always be able to clean up all other members, including learners.
122+
func TestForceNewCluster_AddLearner_MemberCount(t *testing.T) {
123+
e2e.BeforeTest(t)
124+
125+
testCases := []struct {
126+
name string
127+
snapcount int
128+
}{
129+
{
130+
name: "no snapshot after adding learner",
131+
snapcount: 0,
132+
},
133+
{
134+
name: "create a snapshot after adding learner",
135+
snapcount: 5,
136+
},
137+
}
138+
139+
for _, tc := range testCases {
140+
t.Run(tc.name, func(t *testing.T) {
141+
cfg := e2e.NewConfig(e2e.WithClusterSize(3))
142+
epc, err := e2e.NewEtcdProcessCluster(t.Context(), t, e2e.WithConfig(cfg), e2e.WithSnapshotCount(uint64(tc.snapcount)), e2e.WithKeepDataDir(true))
143+
require.NoError(t, err)
144+
145+
t.Log("Adding a learner member")
146+
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
147+
for {
148+
_, aerr := epc.StartNewProc(t.Context(), nil, t, true)
149+
if aerr != nil {
150+
if strings.Contains(aerr.Error(), "etcdserver: unhealthy cluster") {
151+
time.Sleep(1 * time.Second)
152+
continue
153+
}
154+
}
155+
break
156+
}
157+
})
158+
159+
for i := 0; i < tc.snapcount; i++ {
160+
werr := epc.Etcdctl().Put(t.Context(), "foo", "bar", config.PutOptions{})
161+
require.NoError(t, werr)
162+
}
163+
require.NoError(t, epc.Close())
164+
165+
m := epc.Procs[0]
166+
t.Logf("Forcibly create a one-member cluster with member: %s", m.Config().Name)
167+
m.Config().Args = append(m.Config().Args, "--force-new-cluster")
168+
require.NoError(t, m.Start(t.Context()))
169+
170+
t.Log("Restarting the member")
171+
require.NoError(t, m.Restart(t.Context()))
172+
defer func() {
173+
t.Log("Closing the member")
174+
require.NoError(t, m.Close())
175+
}()
176+
177+
t.Log("Checking member count")
178+
resp, merr := m.Etcdctl().MemberList(t.Context(), false)
179+
require.NoError(t, merr)
180+
require.Len(t, resp.Members, 1)
181+
})
182+
}
183+
}
184+
118185
func mustReadMembersFromBoltDB(t *testing.T, dataDir string) []*membership.Member {
119186
dbPath := datadir.ToBackendFileName(dataDir)
120187
db, err := bbolt.Open(dbPath, 0o400, &bbolt.Options{ReadOnly: true})

0 commit comments

Comments
 (0)