From f6bc88f348dc796423f4b4aae42672c86bd9368c Mon Sep 17 00:00:00 2001 From: ziggie Date: Wed, 19 Nov 2025 22:08:31 +0100 Subject: [PATCH 1/2] routing: allow misson control manager to startup despite errors We now allow the mission control manager to skip over deserializable errors. We cannot repair this these results but we just skip over it so we can startup properly. When fetchAll() encounters entries that fail to deserialize, in addition to skipping them, now also: - Delete the corrupted entries from the database - Remove them from the in-memory keysMap and keys tracking structures This prevents corrupted entries from: - Being counted toward maxRecords, which would cause valid entries to be pruned prematurely - Persisting in the database indefinitely - Causing inaccurate entry counts in startup logs --- routing/missioncontrol_store.go | 74 ++++++++++++++++++- routing/missioncontrol_store_test.go | 105 +++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 2 deletions(-) diff --git a/routing/missioncontrol_store.go b/routing/missioncontrol_store.go index 7398ca0dcca..373bc300360 100644 --- a/routing/missioncontrol_store.go +++ b/routing/missioncontrol_store.go @@ -133,30 +133,100 @@ func (b *missionControlStore) clear() error { } // fetchAll returns all results currently stored in the database. +// It also removes any corrupted entries that fail to deserialize from both +// the database and the in-memory tracking structures. func (b *missionControlStore) fetchAll() ([]*paymentResult, error) { var results []*paymentResult + var corruptedKeys [][]byte + // Read all results and identify corrupted entries. err := b.db.view(func(resultBucket kvdb.RBucket) error { results = make([]*paymentResult, 0) + corruptedKeys = make([][]byte, 0) - return resultBucket.ForEach(func(k, v []byte) error { + err := resultBucket.ForEach(func(k, v []byte) error { result, err := deserializeResult(k, v) + + // In case of an error, track the key for removal. if err != nil { - return err + log.Warnf("Failed to deserialize mission "+ + "control entry (key=%x): %v", k, err) + + // Make a copy of the key since ForEach reuses + // the slice. + keyCopy := make([]byte, len(k)) + copy(keyCopy, k) + corruptedKeys = append(corruptedKeys, keyCopy) + + return nil } results = append(results, result) return nil }) + if err != nil { + return err + } + return nil }, func() { results = nil + corruptedKeys = nil }) if err != nil { return nil, err } + // Delete corrupted entries from the database which were identified + // when loading the results from the database. + // + // TODO: This code part should eventually be removed once we move the + // mission control store to a native sql database and have to do a + // full migration of the data. + if len(corruptedKeys) > 0 { + err = b.db.update(func(resultBucket kvdb.RwBucket) error { + for _, key := range corruptedKeys { + if err := resultBucket.Delete(key); err != nil { + return fmt.Errorf("failed to delete "+ + "corrupted entry: %w", err) + } + } + + return nil + }, func() {}) + if err != nil { + return nil, err + } + + // Build a set of corrupted keys. + corruptedSet := make(map[string]struct{}, len(corruptedKeys)) + for _, key := range corruptedKeys { + corruptedSet[string(key)] = struct{}{} + } + + // Remove corrupted keys from in-memory map. + for keyStr := range corruptedSet { + delete(b.keysMap, keyStr) + } + + // Remove from the keys list in a single pass. + for e := b.keys.Front(); e != nil; { + next := e.Next() + keyVal, ok := e.Value.(string) + if ok { + _, isCorrupted := corruptedSet[keyVal] + if isCorrupted { + b.keys.Remove(e) + } + } + e = next + } + + log.Infof("Removed %d corrupted mission control entries", + len(corruptedKeys)) + } + return results, nil } diff --git a/routing/missioncontrol_store_test.go b/routing/missioncontrol_store_test.go index b020fcbb465..889dca07135 100644 --- a/routing/missioncontrol_store_test.go +++ b/routing/missioncontrol_store_test.go @@ -332,3 +332,108 @@ func BenchmarkMissionControlStoreFlushing(b *testing.B) { }) } } + +// TestMissionControlStoreDeletesCorruptedEntries tests that fetchAll() skips +// entries that fail to deserialize, deletes them from the database, and +// removes them from the in-memory tracking structures. +func TestMissionControlStoreDeletesCorruptedEntries(t *testing.T) { + h := newMCStoreTestHarness(t, testMaxRecords, time.Second) + store := h.store + + failureSourceIdx := 1 + + // Create two valid results. + result1 := newPaymentResult( + 1, mcStoreTestRoute, testTime, testTime, + fn.Some(newPaymentFailure( + &failureSourceIdx, + lnwire.NewFailIncorrectDetails(100, 1000), + )), + ) + + result2 := newPaymentResult( + 2, mcStoreTestRoute, testTime.Add(time.Hour), + testTime.Add(time.Hour), + fn.Some(newPaymentFailure( + &failureSourceIdx, + lnwire.NewFailIncorrectDetails(100, 1000), + )), + ) + + // Store both results. + store.AddResult(result1) + store.AddResult(result2) + require.NoError(t, store.storeResults()) + + // Insert a corrupted entry into the database. + var corruptedKey [8 + 8 + 33]byte + byteOrder.PutUint64(corruptedKey[:], uint64(testTime.Add( + 30*time.Minute).UnixNano()), + ) + byteOrder.PutUint64(corruptedKey[8:], 99) // Unique ID. + copy(corruptedKey[16:], result1.route.Val.sourcePubKey.Val[:]) + + err := store.db.update(func(bucket kvdb.RwBucket) error { + // Insert corrupted/invalid TLV data that will fail to + // deserialize. + corruptedValue := []byte{0xFF, 0xFF, 0xFF, 0xFF} + + return bucket.Put(corruptedKey[:], corruptedValue) + }, func() {}) + require.NoError(t, err) + + // Add the corrupted key to in-memory tracking to simulate it being + // loaded at startup (newMissionControlStore populates keysMap from + // all DB keys). + corruptedKeyStr := string(corruptedKey[:]) + store.keysMap[corruptedKeyStr] = struct{}{} + store.keys.PushBack(corruptedKeyStr) + + // Verify the corrupted key is in the in-memory tracking. + _, exists := store.keysMap[corruptedKeyStr] + require.True(t, exists, "corrupted key should be in keysMap") + + // Verify we have 3 entries in the database before fetchAll. + var dbEntryCountBefore int + err = store.db.view(func(bucket kvdb.RBucket) error { + return bucket.ForEach(func(k, v []byte) error { + dbEntryCountBefore++ + return nil + }) + }, func() { + dbEntryCountBefore = 0 + }) + require.NoError(t, err) + require.Equal(t, 3, dbEntryCountBefore, "should have 3 entries "+ + "in the database before cleanup") + + // Now fetch all results. The corrupted entry should be skipped, + // deleted from the DB, and removed from in-memory tracking. + results, err := store.fetchAll() + require.NoError(t, err, "fetchAll should not return an error "+ + "even when encountering corrupted entries") + require.Len(t, results, 2, "should skip the corrupted entry and "+ + "return only valid results") + + // Verify we still have the correct results. + require.Equal(t, result1, results[0]) + require.Equal(t, result2, results[1]) + + // Verify the corrupted entry was removed from in-memory tracking. + _, exists = store.keysMap[corruptedKeyStr] + require.False(t, exists, "corrupted key should not exist in keysMap") + + // Verify the corrupted entry was deleted from the database. + var dbEntryCountAfter int + err = store.db.view(func(bucket kvdb.RBucket) error { + return bucket.ForEach(func(k, v []byte) error { + dbEntryCountAfter++ + return nil + }) + }, func() { + dbEntryCountAfter = 0 + }) + require.NoError(t, err) + require.Equal(t, 2, dbEntryCountAfter, "corrupted entry should be "+ + "deleted from the database") +} From 17b77b64a3672f1d0fe6f5bd01afbc5e9a8e5bd9 Mon Sep 17 00:00:00 2001 From: ziggie Date: Sat, 22 Nov 2025 02:13:08 +0100 Subject: [PATCH 2/2] docs: add release-notes for LND 20.1 --- docs/release-notes/release-notes-0.20.1.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release-notes/release-notes-0.20.1.md b/docs/release-notes/release-notes-0.20.1.md index 4cc4556fb0a..844ae3089ec 100644 --- a/docs/release-notes/release-notes-0.20.1.md +++ b/docs/release-notes/release-notes-0.20.1.md @@ -31,6 +31,11 @@ * Fix a bug where [repeated network addresses](https://github.com/lightningnetwork/lnd/pull/10341) were added to the node announcement and `getinfo` output. + +* [Fix a startup issue in LND when encountering a + deserialization issue](https://github.com/lightningnetwork/lnd/pull/10383) + in the mission control store. Now we skip over potential errors and also + delete them from the store. # New Features