Skip to content

Commit e60e495

Browse files
lovetodreamgwynnefabianfett
authored
Fix crash in PoolStateMachine+ConnectionGroup when closing connection while keepAlive is running (#444)
Fixes #443. Co-authored-by: Gwynne Raskind <gwynne@vapor.codes> Co-authored-by: Fabian Fett <fabianfett@apple.com>
1 parent 54f491c commit e60e495

File tree

8 files changed

+278
-6
lines changed

8 files changed

+278
-6
lines changed

.github/workflows/test.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
- swift:5.8-jammy
2323
- swift:5.9-jammy
2424
- swiftlang/swift:nightly-5.10-jammy
25-
#- swiftlang/swift:nightly-main-jammy
25+
- swiftlang/swift:nightly-main-jammy
2626
include:
2727
- swift-image: swift:5.9-jammy
2828
code-coverage: true
@@ -133,7 +133,7 @@ jobs:
133133
matrix:
134134
postgres-formula:
135135
# Only test one version on macOS, let Linux do the rest
136-
- postgresql@15
136+
- postgresql@16
137137
postgres-auth:
138138
# Only test one auth method on macOS, Linux tests will cover the others
139139
- scram-sha-256
@@ -157,10 +157,16 @@ jobs:
157157
- name: Install Postgres, setup DB and auth, and wait for server start
158158
run: |
159159
export PATH="$(brew --prefix)/opt/${POSTGRES_FORMULA}/bin:$PATH" PGDATA=/tmp/vapor-postgres-test
160-
(brew unlink postgresql || true) && brew install "${POSTGRES_FORMULA}" && brew link --force "${POSTGRES_FORMULA}"
160+
# ** BEGIN ** Work around bug in both Homebrew and GHA
161+
(brew upgrade python@3.11 || true) && (brew link --force --overwrite python@3.11 || true)
162+
(brew upgrade python@3.12 || true) && (brew link --force --overwrite python@3.12 || true)
163+
brew upgrade
164+
# ** END ** Work around bug in both Homebrew and GHA
165+
brew install --overwrite "${POSTGRES_FORMULA}"
166+
brew link --overwrite --force "${POSTGRES_FORMULA}"
161167
initdb --locale=C --auth-host "${POSTGRES_AUTH_METHOD}" -U "${POSTGRES_USER}" --pwfile=<(echo "${POSTGRES_PASSWORD}")
162168
pg_ctl start --wait
163-
timeout-minutes: 2
169+
timeout-minutes: 15
164170
- name: Checkout code
165171
uses: actions/checkout@v4
166172
- name: Run all tests
@@ -183,7 +189,7 @@ jobs:
183189
184190
gh-codeql:
185191
runs-on: ubuntu-latest
186-
container: swift:5.8-jammy # CodeQL currently broken with 5.9
192+
container: swift:5.9-jammy
187193
permissions: { actions: write, contents: read, security-events: write }
188194
steps:
189195
- name: Check out code

Sources/ConnectionPoolModule/ConnectionPool.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ public final class ConnectionPool<
481481
self.observabilityDelegate.keepAliveFailed(id: connection.id, error: error)
482482

483483
self.modifyStateAndRunActions { state in
484-
state.stateMachine.connectionClosed(connection)
484+
state.stateMachine.connectionKeepAliveFailed(connection.id)
485485
}
486486
}
487487
}

Sources/ConnectionPoolModule/PoolStateMachine+ConnectionGroup.swift

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,30 @@ extension PoolStateMachine {
449449
return (index, context)
450450
}
451451

452+
@inlinable
453+
mutating func keepAliveFailed(_ connectionID: Connection.ID) -> CloseAction? {
454+
guard let index = self.connections.firstIndex(where: { $0.id == connectionID }) else {
455+
// Connection has already been closed
456+
return nil
457+
}
458+
459+
guard let closeAction = self.connections[index].keepAliveFailed() else {
460+
return nil
461+
}
462+
463+
self.stats.idle -= 1
464+
self.stats.closing += 1
465+
self.stats.runningKeepAlive -= closeAction.runningKeepAlive ? 1 : 0
466+
self.stats.availableStreams -= closeAction.maxStreams - closeAction.usedStreams
467+
468+
// force unwrapping the connection is fine, because a close action due to failed
469+
// keepAlive cannot happen without a connection
470+
return CloseAction(
471+
connection: closeAction.connection!,
472+
timersToCancel: closeAction.cancelTimers
473+
)
474+
}
475+
452476
// MARK: Connection close/removal
453477

454478
@usableFromInline

Sources/ConnectionPoolModule/PoolStateMachine+ConnectionState.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,11 @@ extension PoolStateMachine {
455455
}
456456
}
457457

458+
@inlinable
459+
mutating func keepAliveFailed() -> CloseAction? {
460+
return self.close()
461+
}
462+
458463
@inlinable
459464
mutating func timerScheduled(
460465
_ timer: ConnectionTimer,

Sources/ConnectionPoolModule/PoolStateMachine.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,15 @@ struct PoolStateMachine<
374374
return self.handleAvailableConnection(index: index, availableContext: context)
375375
}
376376

377+
@inlinable
378+
mutating func connectionKeepAliveFailed(_ connectionID: ConnectionID) -> Action {
379+
guard let closeAction = self.connections.keepAliveFailed(connectionID) else {
380+
return .none()
381+
}
382+
383+
return .init(request: .none, connection: .closeConnection(closeAction.connection, closeAction.timersToCancel))
384+
}
385+
377386
@inlinable
378387
mutating func connectionIdleTimerTriggered(_ connectionID: ConnectionID) -> Action {
379388
precondition(self.requestQueue.isEmpty)

Tests/ConnectionPoolModuleTests/ConnectionPoolTests.swift

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,92 @@ final class ConnectionPoolTests: XCTestCase {
300300
}
301301
}
302302

303+
func testKeepAliveOnClose() async throws {
304+
let clock = MockClock()
305+
let factory = MockConnectionFactory<MockClock>()
306+
let keepAliveDuration = Duration.seconds(20)
307+
let keepAlive = MockPingPongBehavior(keepAliveFrequency: keepAliveDuration, connectionType: MockConnection.self)
308+
309+
var mutableConfig = ConnectionPoolConfiguration()
310+
mutableConfig.minimumConnectionCount = 0
311+
mutableConfig.maximumConnectionSoftLimit = 1
312+
mutableConfig.maximumConnectionHardLimit = 1
313+
let config = mutableConfig
314+
315+
let pool = ConnectionPool(
316+
configuration: config,
317+
idGenerator: ConnectionIDGenerator(),
318+
requestType: ConnectionRequest<MockConnection>.self,
319+
keepAliveBehavior: keepAlive,
320+
observabilityDelegate: NoOpConnectionPoolMetrics(connectionIDType: MockConnection.ID.self),
321+
clock: clock
322+
) {
323+
try await factory.makeConnection(id: $0, for: $1)
324+
}
325+
326+
try await withThrowingTaskGroup(of: Void.self) { taskGroup in
327+
taskGroup.addTask {
328+
await pool.run()
329+
}
330+
331+
async let lease1ConnectionAsync = pool.leaseConnection()
332+
333+
let connection = await factory.nextConnectAttempt { connectionID in
334+
return 1
335+
}
336+
337+
let lease1Connection = try await lease1ConnectionAsync
338+
XCTAssert(connection === lease1Connection)
339+
340+
pool.releaseConnection(lease1Connection)
341+
342+
// keep alive 1
343+
344+
// validate that a keep alive timer and an idle timeout timer is scheduled
345+
var expectedInstants: Set<MockClock.Instant> = [.init(keepAliveDuration), .init(config.idleTimeout)]
346+
let deadline1 = await clock.nextTimerScheduled()
347+
print(deadline1)
348+
XCTAssertNotNil(expectedInstants.remove(deadline1))
349+
let deadline2 = await clock.nextTimerScheduled()
350+
print(deadline2)
351+
XCTAssertNotNil(expectedInstants.remove(deadline2))
352+
XCTAssert(expectedInstants.isEmpty)
353+
354+
// move clock forward to keep alive
355+
let newTime = clock.now.advanced(by: keepAliveDuration)
356+
clock.advance(to: newTime)
357+
358+
await keepAlive.nextKeepAlive { keepAliveConnection in
359+
XCTAssertTrue(keepAliveConnection === lease1Connection)
360+
return true
361+
}
362+
363+
// keep alive 2
364+
let deadline3 = await clock.nextTimerScheduled()
365+
XCTAssertEqual(deadline3, clock.now.advanced(by: keepAliveDuration))
366+
clock.advance(to: clock.now.advanced(by: keepAliveDuration))
367+
368+
let failingKeepAliveDidRun = ManagedAtomic(false)
369+
// the following keep alive should not cause a crash
370+
_ = try? await keepAlive.nextKeepAlive { keepAliveConnection in
371+
defer {
372+
XCTAssertFalse(failingKeepAliveDidRun
373+
.compareExchange(expected: false, desired: true, ordering: .relaxed).original)
374+
}
375+
XCTAssertTrue(keepAliveConnection === lease1Connection)
376+
keepAliveConnection.close()
377+
throw CancellationError() // any error
378+
} // will fail and it's expected
379+
XCTAssertTrue(failingKeepAliveDidRun.load(ordering: .relaxed))
380+
381+
taskGroup.cancelAll()
382+
383+
for connection in factory.runningConnections {
384+
connection.closeIfClosing()
385+
}
386+
}
387+
}
388+
303389
func testKeepAliveWorksRacesAgainstShutdown() async throws {
304390
let clock = MockClock()
305391
let factory = MockConnectionFactory<MockClock>()

Tests/ConnectionPoolModuleTests/PoolStateMachine+ConnectionGroupTests.swift

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,4 +293,35 @@ final class PoolStateMachine_ConnectionGroupTests: XCTestCase {
293293
XCTAssertEqual(afterPingIdleContext.use, .persisted)
294294
XCTAssertEqual(connections.stats, .init(idle: 1, availableStreams: 1))
295295
}
296+
297+
func testKeepAliveShouldNotIndicateCloseConnectionAfterClosed() {
298+
var connections = TestPoolStateMachine.ConnectionGroup(
299+
generator: self.idGenerator,
300+
minimumConcurrentConnections: 0,
301+
maximumConcurrentConnectionSoftLimit: 2,
302+
maximumConcurrentConnectionHardLimit: 2,
303+
keepAlive: true,
304+
keepAliveReducesAvailableStreams: true
305+
)
306+
307+
guard let firstRequest = connections.createNewDemandConnectionIfPossible() else { return XCTFail("Expected to have a request here") }
308+
309+
let newConnection = MockConnection(id: firstRequest.connectionID)
310+
let (connectionIndex, establishedConnectionContext) = connections.newConnectionEstablished(newConnection, maxStreams: 1)
311+
XCTAssertEqual(establishedConnectionContext.info, .idle(availableStreams: 1, newIdle: true))
312+
XCTAssertEqual(connections.stats, .init(idle: 1, availableStreams: 1))
313+
_ = connections.parkConnection(at: connectionIndex, hasBecomeIdle: true)
314+
let keepAliveTimer = TestPoolStateMachine.ConnectionTimer(timerID: 0, connectionID: firstRequest.connectionID, usecase: .keepAlive)
315+
let keepAliveTimerCancellationToken = MockTimerCancellationToken(keepAliveTimer)
316+
XCTAssertNil(connections.timerScheduled(keepAliveTimer, cancelContinuation: keepAliveTimerCancellationToken))
317+
let keepAliveAction = connections.keepAliveIfIdle(newConnection.id)
318+
XCTAssertEqual(keepAliveAction, .init(connection: newConnection, keepAliveTimerCancellationContinuation: keepAliveTimerCancellationToken))
319+
XCTAssertEqual(connections.stats, .init(idle: 1, runningKeepAlive: 1, availableStreams: 0))
320+
321+
_ = connections.closeConnectionIfIdle(newConnection.id)
322+
guard connections.keepAliveFailed(newConnection.id) == nil else {
323+
return XCTFail("Expected keepAliveFailed not to cause close again")
324+
}
325+
XCTAssertEqual(connections.stats, .init(closing: 1))
326+
}
296327
}

Tests/ConnectionPoolModuleTests/PoolStateMachineTests.swift

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,4 +266,115 @@ final class PoolStateMachineTests: XCTestCase {
266266
XCTAssertEqual(releaseRequest1.connection, .none)
267267
}
268268

269+
func testKeepAliveOnClosingConnection() {
270+
var configuration = PoolConfiguration()
271+
configuration.minimumConnectionCount = 0
272+
configuration.maximumConnectionSoftLimit = 2
273+
configuration.maximumConnectionHardLimit = 2
274+
configuration.keepAliveDuration = .seconds(2)
275+
configuration.idleTimeoutDuration = .seconds(4)
276+
277+
278+
var stateMachine = TestPoolStateMachine(
279+
configuration: configuration,
280+
generator: .init(),
281+
timerCancellationTokenType: MockTimerCancellationToken.self
282+
)
283+
284+
// don't refill pool
285+
let requests = stateMachine.refillConnections()
286+
XCTAssertEqual(requests.count, 0)
287+
288+
// request connection while none exists
289+
let request1 = MockRequest()
290+
let leaseRequest1 = stateMachine.leaseConnection(request1)
291+
XCTAssertEqual(leaseRequest1.connection, .makeConnection(.init(connectionID: 0), []))
292+
XCTAssertEqual(leaseRequest1.request, .none)
293+
294+
// make connection 1
295+
let connection1 = MockConnection(id: 0)
296+
let createdAction1 = stateMachine.connectionEstablished(connection1, maxStreams: 1)
297+
XCTAssertEqual(createdAction1.request, .leaseConnection(.init(element: request1), connection1))
298+
XCTAssertEqual(createdAction1.connection, .none)
299+
_ = stateMachine.releaseConnection(connection1, streams: 1)
300+
301+
// trigger keep alive
302+
let keepAliveAction1 = stateMachine.connectionKeepAliveTimerTriggered(connection1.id)
303+
XCTAssertEqual(keepAliveAction1.connection, .runKeepAlive(connection1, nil))
304+
305+
// fail keep alive and cause closed
306+
let keepAliveFailed1 = stateMachine.connectionKeepAliveFailed(connection1.id)
307+
XCTAssertEqual(keepAliveFailed1.connection, .closeConnection(connection1, []))
308+
connection1.closeIfClosing()
309+
310+
// request connection while none exists anymore
311+
let request2 = MockRequest()
312+
let leaseRequest2 = stateMachine.leaseConnection(request2)
313+
XCTAssertEqual(leaseRequest2.connection, .makeConnection(.init(connectionID: 1), []))
314+
XCTAssertEqual(leaseRequest2.request, .none)
315+
316+
// make connection 2
317+
let connection2 = MockConnection(id: 1)
318+
let createdAction2 = stateMachine.connectionEstablished(connection2, maxStreams: 1)
319+
XCTAssertEqual(createdAction2.request, .leaseConnection(.init(element: request2), connection2))
320+
XCTAssertEqual(createdAction2.connection, .none)
321+
_ = stateMachine.releaseConnection(connection2, streams: 1)
322+
323+
// trigger keep alive while connection is still open
324+
let keepAliveAction2 = stateMachine.connectionKeepAliveTimerTriggered(connection2.id)
325+
XCTAssertEqual(keepAliveAction2.connection, .runKeepAlive(connection2, nil))
326+
327+
// close connection in the middle of keep alive
328+
connection2.close()
329+
connection2.closeIfClosing()
330+
331+
// fail keep alive and cause closed
332+
let keepAliveFailed2 = stateMachine.connectionKeepAliveFailed(connection2.id)
333+
XCTAssertEqual(keepAliveFailed2.connection, .closeConnection(connection2, []))
334+
}
335+
336+
func testConnectionIsEstablishedAfterFailedKeepAliveIfNotEnoughConnectionsLeft() {
337+
var configuration = PoolConfiguration()
338+
configuration.minimumConnectionCount = 1
339+
configuration.maximumConnectionSoftLimit = 2
340+
configuration.maximumConnectionHardLimit = 2
341+
configuration.keepAliveDuration = .seconds(2)
342+
configuration.idleTimeoutDuration = .seconds(4)
343+
344+
345+
var stateMachine = TestPoolStateMachine(
346+
configuration: configuration,
347+
generator: .init(),
348+
timerCancellationTokenType: MockTimerCancellationToken.self
349+
)
350+
351+
// refill pool
352+
let requests = stateMachine.refillConnections()
353+
XCTAssertEqual(requests.count, 1)
354+
355+
// one connection should exist
356+
let request = MockRequest()
357+
let leaseRequest = stateMachine.leaseConnection(request)
358+
XCTAssertEqual(leaseRequest.connection, .none)
359+
XCTAssertEqual(leaseRequest.request, .none)
360+
361+
// make connection 1
362+
let connection = MockConnection(id: 0)
363+
let createdAction = stateMachine.connectionEstablished(connection, maxStreams: 1)
364+
XCTAssertEqual(createdAction.request, .leaseConnection(.init(element: request), connection))
365+
XCTAssertEqual(createdAction.connection, .none)
366+
_ = stateMachine.releaseConnection(connection, streams: 1)
367+
368+
// trigger keep alive
369+
let keepAliveAction = stateMachine.connectionKeepAliveTimerTriggered(connection.id)
370+
XCTAssertEqual(keepAliveAction.connection, .runKeepAlive(connection, nil))
371+
372+
// fail keep alive, cause closed and make new connection
373+
let keepAliveFailed = stateMachine.connectionKeepAliveFailed(connection.id)
374+
XCTAssertEqual(keepAliveFailed.connection, .closeConnection(connection, []))
375+
let connectionClosed = stateMachine.connectionClosed(connection)
376+
XCTAssertEqual(connectionClosed.connection, .makeConnection(.init(connectionID: 1), []))
377+
connection.closeIfClosing()
378+
}
379+
269380
}

0 commit comments

Comments
 (0)