Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6b39f59
feat: no error return on connection close
Aug 17, 2025
c42b242
feat: no error return on connection close
Aug 17, 2025
829e00e
feat: no error return on connection close
Aug 17, 2025
b255d66
feat: no error return on connection close
Aug 19, 2025
3afdaa0
feat: no error return on connection close
Aug 19, 2025
4de8be1
feat: no error return on connection close
Aug 19, 2025
a43fdd1
feat: no error return on connection close
Aug 31, 2025
762277d
feat: addressed review comments
Sep 20, 2025
b01fcbe
feat: addressed review comments
Oct 20, 2025
3e78cb5
feat: addressed review comments
Oct 20, 2025
29a9bd1
feat:tests are still timing out
Oct 26, 2025
728d9b1
feat: attempt to fix test
Oct 28, 2025
6163d32
feat: addressed review comments
Nov 24, 2025
f6b9134
Merge branch 'main' into feat/no-error-return
jkawan Nov 24, 2025
7c2d72a
feat: resolved merge conflict
Nov 24, 2025
aca6320
feat: test failure fix attempt
Nov 24, 2025
9e80115
Merge branch 'main' into feat/no-error-return
jkawan Nov 25, 2025
c2e7504
feat: test failure fix attempt
Nov 30, 2025
4807820
feat: test failure fix attempt
Nov 30, 2025
197286b
feat: test failure fix attempt
Nov 30, 2025
b37a245
feat: test failure fix attempt
Nov 30, 2025
42f7142
feat: test failure fix attempt
Nov 30, 2025
fdaceaf
feat: test failure fix attempt
Nov 30, 2025
990cca9
feat: test failure fix attempt
Nov 30, 2025
90450f2
feat: test failure fix attempt
Nov 30, 2025
37c6ad9
feat: addressed review comments
Dec 2, 2025
26c42c7
feat: attempt to fix failing tests
Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 59 additions & 9 deletions connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,52 @@ func (c *Connection) shutdown() {
close(c.errorChan)
}

// handleConnectionError handles connection-level errors centrally
func (c *Connection) handleConnectionError(err error) error {
if err == nil {
return nil
}

// Only propagate EOF errors when acting as a client with active server-side protocols
if errors.Is(err, io.EOF) {
// Check if we have any active server-side protocols
if c.server {
return err
}

// For clients, only propagate EOF if we have active server protocols
hasActiveServerProtocols := false
if c.chainSync != nil && c.chainSync.Server != nil && !c.chainSync.Server.IsDone() {
hasActiveServerProtocols = true
}
if c.blockFetch != nil && c.blockFetch.Server != nil && !c.blockFetch.Server.IsDone() {
hasActiveServerProtocols = true
}
if c.txSubmission != nil && c.txSubmission.Server != nil && !c.txSubmission.Server.IsDone() {
hasActiveServerProtocols = true
}
if c.localStateQuery != nil && c.localStateQuery.Server != nil && !c.localStateQuery.Server.IsDone() {
hasActiveServerProtocols = true
}
if c.localTxMonitor != nil && c.localTxMonitor.Server != nil && !c.localTxMonitor.Server.IsDone() {
hasActiveServerProtocols = true
}
if c.localTxSubmission != nil && c.localTxSubmission.Server != nil && !c.localTxSubmission.Server.IsDone() {
hasActiveServerProtocols = true
}

if hasActiveServerProtocols {
return err
}

// EOF with no active server protocols is normal connection closure
return nil
}

// For non-EOF errors, always propagate
return err
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

EOF handling logic inverted for server/client; also consider ErrUnexpectedEOF

  • As server, we should NOT surface EOF if all server-side protocols (chainsync, block-fetch, tx-submission; plus local protocols in NtC) are done or never started. Current code always returns err when c.server is true.
  • As client, we should check our client-side protocols, not server-side, to decide whether EOF is unexpected.
  • Also handle io.ErrUnexpectedEOF (or ensure the muxer remaps it to io.EOF).

Proposed fix:

 func (c *Connection) handleConnectionError(err error) error {
   if err == nil {
     return nil
   }
-  // Only propagate EOF errors when acting as a client with active server-side protocols
-  if errors.Is(err, io.EOF) {
-    // Check if we have any active server-side protocols
-    if c.server {
-      return err
-    }
-
-    // For clients, only propagate EOF if we have active server protocols
-    hasActiveServerProtocols := false
-    if c.chainSync != nil && c.chainSync.Server != nil && !c.chainSync.Server.IsDone() {
-      hasActiveServerProtocols = true
-    }
-    if c.blockFetch != nil && c.blockFetch.Server != nil && !c.blockFetch.Server.IsDone() {
-      hasActiveServerProtocols = true
-    }
-    if c.txSubmission != nil && c.txSubmission.Server != nil && !c.txSubmission.Server.IsDone() {
-      hasActiveServerProtocols = true
-    }
-    if c.localStateQuery != nil && c.localStateQuery.Server != nil && !c.localStateQuery.Server.IsDone() {
-      hasActiveServerProtocols = true
-    }
-    if c.localTxMonitor != nil && c.localTxMonitor.Server != nil && !c.localTxMonitor.Server.IsDone() {
-      hasActiveServerProtocols = true
-    }
-    if c.localTxSubmission != nil && c.localTxSubmission.Server != nil && !c.localTxSubmission.Server.IsDone() {
-      hasActiveServerProtocols = true
-    }
-
-    if hasActiveServerProtocols {
-      return err
-    }
-
-    // EOF with no active server protocols is normal connection closure
-    return nil
-  }
+  // Treat EOF/UnexpectedEOF as connection closed, decide based on active protocols for our role
+  if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
+    hasActive := false
+    if c.server {
+      // Server: check server-side protocols
+      if c.chainSync != nil && c.chainSync.Server != nil && !c.chainSync.Server.IsDone() {
+        hasActive = true
+      }
+      if c.blockFetch != nil && c.blockFetch.Server != nil && !c.blockFetch.Server.IsDone() {
+        hasActive = true
+      }
+      if c.txSubmission != nil && c.txSubmission.Server != nil && !c.txSubmission.Server.IsDone() {
+        hasActive = true
+      }
+      if c.localStateQuery != nil && c.localStateQuery.Server != nil && !c.localStateQuery.Server.IsDone() {
+        hasActive = true
+      }
+      if c.localTxMonitor != nil && c.localTxMonitor.Server != nil && !c.localTxMonitor.Server.IsDone() {
+        hasActive = true
+      }
+      if c.localTxSubmission != nil && c.localTxSubmission.Server != nil && !c.localTxSubmission.Server.IsDone() {
+        hasActive = true
+      }
+    } else {
+      // Client: check client-side protocols
+      if c.chainSync != nil && c.chainSync.Client != nil && !c.chainSync.Client.IsDone() {
+        hasActive = true
+      }
+      if c.blockFetch != nil && c.blockFetch.Client != nil && !c.blockFetch.Client.IsDone() {
+        hasActive = true
+      }
+      if c.txSubmission != nil && c.txSubmission.Client != nil && !c.txSubmission.Client.IsDone() {
+        hasActive = true
+      }
+      if c.localStateQuery != nil && c.localStateQuery.Client != nil && !c.localStateQuery.Client.IsDone() {
+        hasActive = true
+      }
+      if c.localTxMonitor != nil && c.localTxMonitor.Client != nil && !c.localTxMonitor.Client.IsDone() {
+        hasActive = true
+      }
+      if c.localTxSubmission != nil && c.localTxSubmission.Client != nil && !c.localTxSubmission.Client.IsDone() {
+        hasActive = true
+      }
+    }
+    // Propagate only if a protocol is still active; otherwise swallow as normal closure
+    if hasActive {
+      return err
+    }
+    return nil
+  }
   // For non-EOF errors, always propagate
   return err
 }

To confirm muxer behavior for UnexpectedEOF remapping, run:

Also applies to: 335-347


🏁 Script executed:

#!/bin/bash
# Check if muxer remaps io.ErrUnexpectedEOF to io.EOF
rg -nP 'ErrUnexpectedEOF|UnexpectedEOF' muxer/ --context 2 || true

Length of output: 0






🏁 Script executed:

#!/bin/bash
# 1. Check if muxer directory exists and list structure
echo "=== Checking muxer directory ==="
fd --type d 'muxer' . || echo "No muxer directory found"

# 2. Search for EOF handling patterns more broadly in codebase
echo -e "\n=== Searching for EOF handling patterns ==="
rg -n 'io\.EOF|ErrUnexpectedEOF' --type go -B2 -A2 || echo "No EOF patterns found"

# 3. Verify current connection.go exists and show lines 253-297
echo -e "\n=== Verifying connection.go current state ==="
if [ -f connection.go ]; then
  sed -n '253,297p' connection.go
else
  echo "connection.go not found at root; searching for it..."
  fd --type f 'connection.go' . --exec head -20 {}
fi

Length of output: 5839


🌐 Web query:

Go io.ErrUnexpectedEOF error type documentation

💡 Result:

io.ErrUnexpectedEOF

  • What it is: a package-level error variable defined as
    var ErrUnexpectedEOF = errors.New("unexpected EOF")
    meaning EOF was encountered in the middle of reading a fixed-size block or structured data. [1]

  • Where it’s declared: package io in the Go standard library. [1]

  • When it’s returned: commonly by helpers that require reading an exact number of bytes (e.g. io.ReadFull, io.ReadAtLeast) and by packages that read fixed-size binary structures (e.g. encoding/binary) when EOF occurs after some but not all bytes were read. If no bytes were read, functions typically return io.EOF instead. [2][3]

  • How to check it: since Go 1.13 errors may be wrapped, use errors.Is(err, io.ErrUnexpectedEOF) to detect it robustly. (Older code sometimes used err == io.ErrUnexpectedEOF.) [4]

Sources:
[1] io package docs (ErrUnexpectedEOF definition).
[2] io.ReadFull / ReadAtLeast behavior (returns ErrUnexpectedEOF when partial read then EOF).
[3] encoding/binary.Read behavior (returns io.ErrUnexpectedEOF if EOF after some bytes).
[4] Go error-handling FAQ (use errors.Is for comparisons).



🏁 Script executed:

#!/bin/bash
# Get current state of connection.go lines 250-350 for full context
echo "=== git/connection.go lines 250-350 ==="
sed -n '250,350p' git/connection.go

Length of output: 100



🏁 Script executed:

#!/bin/bash
# Find connection.go and show lines 250-350
echo "=== Locating connection.go ==="
CONN_FILE=$(fd -t f 'connection\.go$' 2>/dev/null | head -1)
echo "Found at: $CONN_FILE"

if [ -n "$CONN_FILE" ]; then
  echo -e "\n=== $CONN_FILE lines 250-350 ==="
  sed -n '250,350p' "$CONN_FILE"
else
  echo "connection.go not found"
fi

Length of output: 3089



🏁 Script executed:

#!/bin/bash
# Check lines 335-347 for the "also applies to" section
CONN_FILE=$(fd -t f 'connection\.go$' 2>/dev/null | head -1)
if [ -n "$CONN_FILE" ]; then
  echo "=== $CONN_FILE lines 330-355 (context for 335-347) ==="
  sed -n '330,355p' "$CONN_FILE"
fi

Length of output: 802


Fix EOF handling logic: server/client role checks are inverted

The current code has the logic backwards:

  • Server (line 266): returns EOF unconditionally without checking active server-side protocols
  • Client (lines 271–287): checks server-side protocol status instead of client-side

This means servers always propagate EOF (even when protocols are active), and clients check the wrong role's protocols. The proposed fix correctly swaps the logic: server checks Server fields, client checks Client fields. Additionally, handle io.ErrUnexpectedEOF alongside io.EOF, as the protocol layer already uses it for partial reads.

Apply the proposed diff and ensure the muxer behavior aligns with the corrected logic.

🤖 Prompt for AI Agents
In connection.go around lines 253 to 297, the EOF handling logic is inverted and
misses io.ErrUnexpectedEOF; change the EOF check to treat both io.EOF and
io.ErrUnexpectedEOF the same, and swap the role-specific checks so that when
c.server is true you inspect the protocol .Server fields for active server-side
protocols, and when c.server is false (client) you inspect the protocol .Client
fields for active client-side protocols; if any active protocols are found
return the error, otherwise treat EOF/ErrUnexpectedEOF as normal closure and
return nil.


// setupConnection establishes the muxer, configures and starts the handshake process, and initializes
// the appropriate mini-protocols
func (c *Connection) setupConnection() error {
Expand Down Expand Up @@ -285,16 +331,20 @@ func (c *Connection) setupConnection() error {
if !ok {
return
}
var connErr *muxer.ConnectionClosedError
if errors.As(err, &connErr) {
// Pass through ConnectionClosedError from muxer
c.errorChan <- err
} else {
// Wrap error message to denote it comes from the muxer
c.errorChan <- fmt.Errorf("muxer error: %w", err)

// Use centralized connection error handling
if handledErr := c.handleConnectionError(err); handledErr != nil {
var connErr *muxer.ConnectionClosedError
if errors.As(handledErr, &connErr) {
// Pass through ConnectionClosedError from muxer
c.errorChan <- handledErr
} else {
// Wrap error message to denote it comes from the muxer
c.errorChan <- fmt.Errorf("muxer error: %w", handledErr)
}
// Close connection on muxer errors
c.Close()
}
// Close connection on muxer errors
c.Close()
}
}()
protoOptions := protocol.ProtocolOptions{
Expand Down
244 changes: 205 additions & 39 deletions connection_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023 Blink Labs Software
// Copyright 2025 Blink Labs Software
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -20,65 +20,231 @@ import (
"time"

ouroboros "github.com/blinklabs-io/gouroboros"
"github.com/blinklabs-io/ouroboros-mock"
"github.com/blinklabs-io/gouroboros/protocol/chainsync"
"github.com/blinklabs-io/gouroboros/protocol/common"
ouroboros_mock "github.com/blinklabs-io/ouroboros-mock"
"go.uber.org/goleak"
)

// Ensure that we don't panic when closing the Connection object after a failed Dial() call
func TestDialFailClose(t *testing.T) {
// TestErrorHandlingWithActiveProtocols tests that connection errors are propagated
// when protocols are active, and ignored when protocols are stopped
func TestErrorHandlingWithActiveProtocols(t *testing.T) {
defer goleak.VerifyNone(t)
oConn, err := ouroboros.New()
if err != nil {
t.Fatalf("unexpected error when creating Connection object: %s", err)
}
err = oConn.Dial("unix", "/path/does/not/exist")
if err == nil {
t.Fatalf("did not get expected failure on Dial()")
}
// Close connection
oConn.Close()

t.Run("ErrorsPropagatedWhenProtocolsActive", func(t *testing.T) {
// Create a mock connection that will complete handshake and start the chainsync protocol
mockConn := ouroboros_mock.NewConnection(
ouroboros_mock.ProtocolRoleClient,
[]ouroboros_mock.ConversationEntry{
ouroboros_mock.ConversationEntryHandshakeRequestGeneric,
ouroboros_mock.ConversationEntryHandshakeNtCResponse,
// ChainSync messages
ouroboros_mock.ConversationEntryInput{
// FindIntersect
ProtocolId: chainsync.ProtocolIdNtC,
Message: chainsync.NewMsgFindIntersect(
[]common.Point{
{
Slot: 21600,
Hash: []byte("19297addad3da631einos029"),
},
},
),
},
},
)

oConn, err := ouroboros.New(
ouroboros.WithConnection(mockConn),
ouroboros.WithNetworkMagic(ouroboros_mock.MockNetworkMagic),
ouroboros.WithServer(true),
ouroboros.WithChainSyncConfig(
chainsync.NewConfig(
chainsync.WithFindIntersectFunc(
func(ctx chainsync.CallbackContext, points []common.Point) (common.Point, chainsync.Tip, error) {
// We need to block here to keep the protocol active
time.Sleep(5 * time.Second)
return common.Point{}, chainsync.Tip{}, fmt.Errorf("context cancelled")
},
),
),
),
)
if err != nil {
t.Fatalf("unexpected error when creating Connection object: %s", err)
}

// Wait for handshake to complete by checking if protocols are initialized
var chainSyncProtocol *chainsync.ChainSync
for i := 0; i < 100; i++ {
chainSyncProtocol = oConn.ChainSync()
if chainSyncProtocol != nil && chainSyncProtocol.Server != nil {
break
}
time.Sleep(10 * time.Millisecond)
}

if chainSyncProtocol == nil || chainSyncProtocol.Server == nil {
oConn.Close()
t.Fatal("chain sync protocol not initialized")
}

// Wait a bit for protocol to start
time.Sleep(100 * time.Millisecond)

// Close the mock connection to generate a connection error
mockConn.Close()

// We should receive a connection error since protocols are active
select {
case err := <-oConn.ErrorChan():
if err == nil {
t.Fatal("expected connection error, got nil")
}
t.Logf("Received connection error (expected with active protocols): %s", err)
case <-time.After(2 * time.Second):
t.Error("timed out waiting for connection error - error should be propagated when protocols are active")
}

oConn.Close()
})

t.Run("ErrorsIgnoredWhenProtocolsStopped", func(t *testing.T) {
// Create a mock connection that will send a Done message to stop the protocol
mockConn := ouroboros_mock.NewConnection(
ouroboros_mock.ProtocolRoleClient,
[]ouroboros_mock.ConversationEntry{
ouroboros_mock.ConversationEntryHandshakeRequestGeneric,
ouroboros_mock.ConversationEntryHandshakeNtCResponse,
// Send Done message to stop the protocol
ouroboros_mock.ConversationEntryInput{
ProtocolId: chainsync.ProtocolIdNtC,
},
},
)

oConn, err := ouroboros.New(
ouroboros.WithConnection(mockConn),
ouroboros.WithNetworkMagic(ouroboros_mock.MockNetworkMagic),
ouroboros.WithServer(true),
)
if err != nil {
t.Fatalf("unexpected error when creating Connection object: %s", err)
}

// Wait for handshake to complete
var chainSyncProtocol *chainsync.ChainSync
for i := 0; i < 100; i++ {
chainSyncProtocol = oConn.ChainSync()
if chainSyncProtocol != nil && chainSyncProtocol.Server != nil {
break
}
time.Sleep(10 * time.Millisecond)
}

if chainSyncProtocol == nil || chainSyncProtocol.Server == nil {
oConn.Close()
t.Fatal("chain sync protocol not initialized")
}

// Wait for protocol to be done (Done message from mock should trigger this)
select {
case <-chainSyncProtocol.Server.DoneChan():
// Protocol is stoppeds
case <-time.After(1 * time.Second):
t.Fatal("timed out waiting for protocol to stop")
}

// Now close the mock connection to generate an error
mockConn.Close()
select {
case err := <-oConn.ErrorChan():
t.Logf("Received error during shutdown: %s", err)
case <-time.After(500 * time.Millisecond):
t.Log("No connection error received (expected when protocols are stopped)")
}

oConn.Close()
})
}

func TestDoubleClose(t *testing.T) {
// TestErrorHandlingWithMultipleProtocols tests error handling with multiple active protocols
func TestErrorHandlingWithMultipleProtocols(t *testing.T) {
defer goleak.VerifyNone(t)

mockConn := ouroboros_mock.NewConnection(
ouroboros_mock.ProtocolRoleClient,
[]ouroboros_mock.ConversationEntry{
ouroboros_mock.ConversationEntryHandshakeRequestGeneric,
ouroboros_mock.ConversationEntryHandshakeNtCResponse,
ouroboros_mock.ConversationEntryHandshakeNtNResponse,
},
)

oConn, err := ouroboros.New(
ouroboros.WithConnection(mockConn),
ouroboros.WithNetworkMagic(ouroboros_mock.MockNetworkMagic),
ouroboros.WithNodeToNode(true),
)
if err != nil {
t.Fatalf("unexpected error when creating Connection object: %s", err)
}
// Async error handler
go func() {
err, ok := <-oConn.ErrorChan()
if !ok {
return

// Wait for handshake to complete
time.Sleep(100 * time.Millisecond)

// Close connection to generate error
mockConn.Close()

// Should receive error since protocols are active
select {
case err := <-oConn.ErrorChan():
if err == nil {
t.Fatal("expected connection error, got nil")
}
// We can't call t.Fatalf() from a different Goroutine, so we panic instead
panic(fmt.Sprintf("unexpected Ouroboros connection error: %s", err))
}()
// Close connection
if err := oConn.Close(); err != nil {
t.Fatalf("unexpected error when closing Connection object: %s", err)
t.Logf("Received connection error with multiple active protocols: %s", err)
case <-time.After(2 * time.Second):
t.Error("timed out waiting for connection error")
}
// Close connection again
if err := oConn.Close(); err != nil {
t.Fatalf(
"unexpected error when closing Connection object again: %s",
err,

oConn.Close()
}

// TestBasicErrorHandling tests basic error handling scenarios
func TestBasicErrorHandling(t *testing.T) {
defer goleak.VerifyNone(t)

t.Run("DialFailure", func(t *testing.T) {
oConn, err := ouroboros.New(
ouroboros.WithNetworkMagic(764824073),
)
}
// Wait for connection shutdown
select {
case <-oConn.ErrorChan():
case <-time.After(10 * time.Second):
t.Errorf("did not shutdown within timeout")
}
if err != nil {
t.Fatalf("unexpected error when creating Connection object: %s", err)
}

err = oConn.Dial("tcp", "invalid-hostname:9999")
if err == nil {
t.Fatal("expected dial error, got nil")
}

oConn.Close()
})

t.Run("DoubleClose", func(t *testing.T) {
oConn, err := ouroboros.New(
ouroboros.WithNetworkMagic(764824073),
)
if err != nil {
t.Fatalf("unexpected error when creating Connection object: %s", err)
}

// First close
if err := oConn.Close(); err != nil {
t.Fatalf("unexpected error on first close: %s", err)
}

// Second close should also work
if err := oConn.Close(); err != nil {
t.Fatalf("unexpected error on second close: %s", err)
}
})
}
Loading
Loading