routing: allow misson control manager to startup despite errors

ziggie1984 · ziggie1984 · commit a333b1a19d0a · 2025-11-22T02:15:55.000+01:00
We now allow the mission control manager to skip over deserializable
errors. We cannot repair this these results but we just skip over
it so we can startup properly.

When fetchAll() encounters entries that fail to deserialize, in
addition to skipping them, now also:

- Delete the corrupted entries from the database
- Remove them from the in-memory keysMap and keys tracking structures

This prevents corrupted entries from:
- Being counted toward maxRecords, which would cause valid entries
  to be pruned prematurely
- Persisting in the database indefinitely
- Causing inaccurate entry counts in startup logs
diff --git a/routing/missioncontrol_store.go b/routing/missioncontrol_store.go
@@ -133,30 +133,81 @@ func (b *missionControlStore) clear() error {
 }
 
 // fetchAll returns all results currently stored in the database.
+// It also removes any corrupted entries that fail to deserialize from both
+// the database and the in-memory tracking structures.
 func (b *missionControlStore) fetchAll() ([]*paymentResult, error) {
 	var results []*paymentResult
+	var corruptedKeys [][]byte
 
-	err := b.db.view(func(resultBucket kvdb.RBucket) error {
+	err := b.db.update(func(resultBucket kvdb.RwBucket) error {
 		results = make([]*paymentResult, 0)
+		corruptedKeys = make([][]byte, 0)
 
-		return resultBucket.ForEach(func(k, v []byte) error {
+		err := resultBucket.ForEach(func(k, v []byte) error {
 			result, err := deserializeResult(k, v)
+
+			// In case of an error, track the key for removal.
 			if err != nil {
-				return err
+				log.Warnf("Failed to deserialize mission "+
+					"control entry (key=%x): %v", k, err)
+
+				// Make a copy of the key since ForEach reuses
+				// the slice.
+				keyCopy := make([]byte, len(k))
+				copy(keyCopy, k)
+				corruptedKeys = append(corruptedKeys, keyCopy)
+
+				return nil
 			}
 
 			results = append(results, result)
 
 			return nil
 		})
+		if err != nil {
+			return err
+		}
+
+		// Delete corrupted entries from the database.
+		for _, key := range corruptedKeys {
+			if err := resultBucket.Delete(key); err != nil {
+				return fmt.Errorf("failed to delete corrupted "+
+					"entry: %w", err)
+			}
+		}
 
+		return nil
 	}, func() {
 		results = nil
+		corruptedKeys = nil
 	})
 	if err != nil {
 		return nil, err
 	}
 
+	// Remove corrupted keys from in-memory tracking.
+	for _, key := range corruptedKeys {
+		keyStr := string(key)
+		delete(b.keysMap, keyStr)
+
+		// Remove from the keys list.
+		for e := b.keys.Front(); e != nil; e = e.Next() {
+			keyVal, ok := e.Value.(string)
+			if !ok {
+				continue
+			}
+			if keyVal == keyStr {
+				b.keys.Remove(e)
+				break
+			}
+		}
+	}
+
+	if len(corruptedKeys) > 0 {
+		log.Infof("Removed %d corrupted mission control entries",
+			len(corruptedKeys))
+	}
+
 	return results, nil
 }
 
diff --git a/routing/missioncontrol_store_test.go b/routing/missioncontrol_store_test.go
@@ -332,3 +332,108 @@ func BenchmarkMissionControlStoreFlushing(b *testing.B) {
 		})
 	}
 }
+
+// TestMissionControlStoreDeletesCorruptedEntries tests that fetchAll() skips
+// entries that fail to deserialize, deletes them from the database, and
+// removes them from the in-memory tracking structures.
+func TestMissionControlStoreDeletesCorruptedEntries(t *testing.T) {
+	h := newMCStoreTestHarness(t, testMaxRecords, time.Second)
+	store := h.store
+
+	failureSourceIdx := 1
+
+	// Create two valid results.
+	result1 := newPaymentResult(
+		1, mcStoreTestRoute, testTime, testTime,
+		fn.Some(newPaymentFailure(
+			&failureSourceIdx,
+			lnwire.NewFailIncorrectDetails(100, 1000),
+		)),
+	)
+
+	result2 := newPaymentResult(
+		2, mcStoreTestRoute, testTime.Add(time.Hour),
+		testTime.Add(time.Hour),
+		fn.Some(newPaymentFailure(
+			&failureSourceIdx,
+			lnwire.NewFailIncorrectDetails(100, 1000),
+		)),
+	)
+
+	// Store both results.
+	store.AddResult(result1)
+	store.AddResult(result2)
+	require.NoError(t, store.storeResults())
+
+	// Insert a corrupted entry into the database.
+	var corruptedKey [8 + 8 + 33]byte
+	byteOrder.PutUint64(corruptedKey[:], uint64(testTime.Add(
+		30*time.Minute).UnixNano()),
+	)
+	byteOrder.PutUint64(corruptedKey[8:], 99) // Unique ID.
+	copy(corruptedKey[16:], result1.route.Val.sourcePubKey.Val[:])
+
+	err := store.db.update(func(bucket kvdb.RwBucket) error {
+		// Insert corrupted/invalid TLV data that will fail to
+		// deserialize.
+		corruptedValue := []byte{0xFF, 0xFF, 0xFF, 0xFF}
+
+		return bucket.Put(corruptedKey[:], corruptedValue)
+	}, func() {})
+	require.NoError(t, err)
+
+	// Add the corrupted key to in-memory tracking to simulate it being
+	// loaded at startup (newMissionControlStore populates keysMap from
+	// all DB keys).
+	corruptedKeyStr := string(corruptedKey[:])
+	store.keysMap[corruptedKeyStr] = struct{}{}
+	store.keys.PushBack(corruptedKeyStr)
+
+	// Verify the corrupted key is in the in-memory tracking.
+	_, exists := store.keysMap[corruptedKeyStr]
+	require.True(t, exists, "corrupted key should be in keysMap")
+
+	// Verify we have 3 entries in the database before fetchAll.
+	var dbEntryCountBefore int
+	err = store.db.view(func(bucket kvdb.RBucket) error {
+		return bucket.ForEach(func(k, v []byte) error {
+			dbEntryCountBefore++
+			return nil
+		})
+	}, func() {
+		dbEntryCountBefore = 0
+	})
+	require.NoError(t, err)
+	require.Equal(t, 3, dbEntryCountBefore, "should have 3 entries "+
+		"in the database before cleanup")
+
+	// Now fetch all results. The corrupted entry should be skipped,
+	// deleted from the DB, and removed from in-memory tracking.
+	results, err := store.fetchAll()
+	require.NoError(t, err, "fetchAll should not return an error "+
+		"even when encountering corrupted entries")
+	require.Len(t, results, 2, "should skip the corrupted entry and "+
+		"return only valid results")
+
+	// Verify we still have the correct results.
+	require.Equal(t, result1, results[0])
+	require.Equal(t, result2, results[1])
+
+	// Verify the corrupted entry was removed from in-memory tracking.
+	_, exists = store.keysMap[corruptedKeyStr]
+	require.False(t, exists, "corrupted key should not exist in keysMap")
+
+	// Verify the corrupted entry was deleted from the database.
+	var dbEntryCountAfter int
+	err = store.db.view(func(bucket kvdb.RBucket) error {
+		return bucket.ForEach(func(k, v []byte) error {
+			dbEntryCountAfter++
+			return nil
+		})
+	}, func() {
+		dbEntryCountAfter = 0
+	})
+	require.NoError(t, err)
+	require.Equal(t, 2, dbEntryCountAfter, "corrupted entry should be "+
+		"deleted from the database")
+}