Skip to content

Commit b683fce

Browse files
authored
Fix waitForSidecar to respect timeout. (#1146)
* Fix waitForSidecar to respect timeout. Signed-off-by: Artur Souza <asouza.pro@gmail.com> * Bring back 500ms interval for retries and log for waitForSidecar Signed-off-by: Artur Souza <asouza.pro@gmail.com> * Fix flaky test for ConfigurationClientIT Signed-off-by: Artur Souza <asouza.pro@gmail.com> --------- Signed-off-by: Artur Souza <asouza.pro@gmail.com>
1 parent b1196d3 commit b683fce

File tree

4 files changed

+116
-50
lines changed

4 files changed

+116
-50
lines changed

sdk-tests/src/test/java/io/dapr/it/configuration/ConfigurationClientIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ public class ConfigurationClientIT extends BaseIT {
6969
public static void init() throws Exception {
7070
daprRun = startDaprApp(ConfigurationClientIT.class.getSimpleName(), 5000);
7171
daprClient = daprRun.newDaprClientBuilder().build();
72+
daprClient.waitForSidecar(10000).block();
7273
}
7374

7475
@AfterAll
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright 2024 The Dapr Authors
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
* Unless required by applicable law or agreed to in writing, software
8+
* distributed under the License is distributed on an "AS IS" BASIS,
9+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
* See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/
13+
14+
package io.dapr.it.resiliency;
15+
16+
import io.dapr.it.BaseIT;
17+
import io.dapr.it.DaprRun;
18+
import io.dapr.it.ToxiProxyRun;
19+
import org.junit.jupiter.api.BeforeAll;
20+
import org.junit.jupiter.api.Test;
21+
22+
import java.time.Duration;
23+
24+
import static org.junit.jupiter.api.Assertions.assertThrows;
25+
import static org.junit.jupiter.api.Assertions.assertTrue;
26+
27+
/**
28+
* Test SDK resiliency.
29+
*/
30+
public class WaitForSidecarIT extends BaseIT {
31+
32+
// Use a number large enough to make sure it will respect the entire timeout.
33+
private static final Duration LATENCY = Duration.ofSeconds(5);
34+
35+
private static final Duration JITTER = Duration.ofSeconds(0);
36+
37+
private static DaprRun daprRun;
38+
39+
private static ToxiProxyRun toxiProxyRun;
40+
41+
private static DaprRun daprNotRunning;
42+
43+
@BeforeAll
44+
public static void init() throws Exception {
45+
daprRun = startDaprApp(WaitForSidecarIT.class.getSimpleName(), 5000);
46+
daprNotRunning = startDaprApp(WaitForSidecarIT.class.getSimpleName()+"NotRunning", 5000);
47+
daprNotRunning.stop();
48+
49+
toxiProxyRun = new ToxiProxyRun(daprRun, LATENCY, JITTER);
50+
toxiProxyRun.start();
51+
}
52+
53+
@Test
54+
public void waitSucceeds() throws Exception {
55+
try(var client = daprRun.newDaprClient()) {
56+
client.waitForSidecar(5000).block();
57+
}
58+
}
59+
60+
@Test
61+
public void waitTimeout() {
62+
int timeoutInMillis = (int)LATENCY.minusMillis(100).toMillis();
63+
long started = System.currentTimeMillis();
64+
assertThrows(RuntimeException.class, () -> {
65+
try(var client = toxiProxyRun.newDaprClientBuilder().build()) {
66+
client.waitForSidecar(timeoutInMillis).block();
67+
}
68+
});
69+
long duration = System.currentTimeMillis() - started;
70+
assertTrue(duration >= timeoutInMillis);
71+
}
72+
73+
@Test
74+
public void waitSlow() throws Exception {
75+
int timeoutInMillis = (int)LATENCY.plusMillis(100).toMillis();
76+
long started = System.currentTimeMillis();
77+
try(var client = toxiProxyRun.newDaprClientBuilder().build()) {
78+
client.waitForSidecar(timeoutInMillis).block();
79+
}
80+
long duration = System.currentTimeMillis() - started;
81+
assertTrue(duration >= LATENCY.toMillis());
82+
}
83+
84+
@Test
85+
public void waitNotRunningTimeout() {
86+
// Does not make this number too smaller since bug does not repro when <= 2.5s.
87+
// This has to do with a previous bug in the implementation.
88+
int timeoutMilliseconds = 5000;
89+
long started = System.currentTimeMillis();
90+
assertThrows(RuntimeException.class, () -> {
91+
try(var client = daprNotRunning.newDaprClientBuilder().build()) {
92+
client.waitForSidecar(timeoutMilliseconds).block();
93+
}
94+
});
95+
long duration = System.currentTimeMillis() - started;
96+
assertTrue(duration >= timeoutMilliseconds);
97+
}
98+
}

sdk/src/main/java/io/dapr/client/DaprClientImpl.java

Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@
8181
import io.grpc.stub.AbstractStub;
8282
import io.grpc.stub.StreamObserver;
8383
import org.jetbrains.annotations.NotNull;
84+
import org.slf4j.Logger;
85+
import org.slf4j.LoggerFactory;
8486
import reactor.core.publisher.Flux;
8587
import reactor.core.publisher.FluxSink;
8688
import reactor.core.publisher.Mono;
@@ -97,7 +99,6 @@
9799
import java.util.Iterator;
98100
import java.util.List;
99101
import java.util.Map;
100-
import java.util.concurrent.ExecutionException;
101102
import java.util.function.Consumer;
102103
import java.util.function.Function;
103104
import java.util.stream.Collectors;
@@ -114,6 +115,8 @@
114115
*/
115116
public class DaprClientImpl extends AbstractDaprClient {
116117

118+
private final Logger logger;
119+
117120
/**
118121
* The GRPC managed channel to be used.
119122
*/
@@ -235,6 +238,7 @@ private DaprClientImpl(
235238
this.httpClient = httpClient;
236239
this.retryPolicy = retryPolicy;
237240
this.grpcInterceptors = new DaprClientGrpcInterceptors(daprApiToken, timeoutPolicy);
241+
this.logger = LoggerFactory.getLogger(DaprClientImpl.class);
238242
}
239243

240244
private CommonProtos.StateOptions.StateConsistency getGrpcStateConsistency(StateOptions options) {
@@ -273,53 +277,21 @@ public <T extends AbstractStub<T>> T newGrpcStub(String appId, Function<Channel,
273277
@Override
274278
public Mono<Void> waitForSidecar(int timeoutInMilliseconds) {
275279
String[] pathSegments = new String[] { DaprHttp.API_VERSION, "healthz", "outbound"};
276-
int maxRetries = 5;
277-
278-
Retry retrySpec = Retry
279-
.fixedDelay(maxRetries, Duration.ofMillis(500))
280-
.doBeforeRetry(retrySignal -> {
281-
System.out.println("Retrying component health check...");
282-
});
283-
284-
/*
285-
NOTE: (Cassie) Uncomment this once it actually gets implemented:
286-
https://github.com/grpc/grpc-java/issues/4359
287-
288-
int maxChannelStateRetries = 5;
289-
290-
// Retry logic for checking the channel state
291-
Retry channelStateRetrySpec = Retry
292-
.fixedDelay(maxChannelStateRetries, Duration.ofMillis(500))
293-
.doBeforeRetry(retrySignal -> {
294-
System.out.println("Retrying channel state check...");
295-
});
296-
*/
297280

298281
// Do the Dapr Http endpoint check to have parity with Dotnet
299282
Mono<DaprHttp.Response> responseMono = this.httpClient.invokeApi(DaprHttp.HttpMethods.GET.name(), pathSegments,
300283
null, "", null, null);
301284

302285
return responseMono
303-
.retryWhen(retrySpec)
304-
/*
305-
NOTE: (Cassie) Uncomment this once it actually gets implemented:
306-
https://github.com/grpc/grpc-java/issues/4359
307-
.flatMap(response -> {
308-
// Check the status code
309-
int statusCode = response.getStatusCode();
310-
311-
// Check if the channel's state is READY
312-
return Mono.defer(() -> {
313-
if (this.channel.getState(true) == ConnectivityState.READY) {
314-
// Return true if the status code is in the 2xx range
315-
if (statusCode >= 200 && statusCode < 300) {
316-
return Mono.empty(); // Continue with the flow
317-
}
318-
}
319-
return Mono.error(new RuntimeException("Health check failed"));
320-
}).retryWhen(channelStateRetrySpec);
321-
})
322-
*/
286+
// No method to "retry forever every 500ms", so we make it practically forever.
287+
// 9223372036854775807 * 500 ms = 1.46235604 x 10^11 years
288+
// If anyone needs to wait for the sidecar for longer than that, sorry.
289+
.retryWhen(
290+
Retry
291+
.fixedDelay(Long.MAX_VALUE, Duration.ofMillis(500))
292+
.doBeforeRetry(s -> {
293+
this.logger.info("Retrying sidecar health check ...");
294+
}))
323295
.timeout(Duration.ofMillis(timeoutInMilliseconds))
324296
.onErrorResume(DaprException.class, e ->
325297
Mono.error(new RuntimeException(e)))

sdk/src/test/java/io/dapr/client/DaprClientHttpTest.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,16 +133,11 @@ public void waitForSidecarBadHealthCheck() throws Exception {
133133
.times(6)
134134
.respond(404, ResponseBody.create("Not Found", MediaType.get("application/json")));
135135

136-
// retry the max allowed retries (5 times)
136+
// it will timeout.
137137
StepVerifier.create(daprClientHttp.waitForSidecar(5000))
138138
.expectSubscription()
139-
.expectErrorMatches(throwable -> {
140-
if (throwable instanceof RuntimeException) {
141-
return "Retries exhausted: 5/5".equals(throwable.getMessage());
142-
}
143-
return false;
144-
})
145-
.verify(Duration.ofSeconds(20));
139+
.expectError()
140+
.verify(Duration.ofMillis(6000));
146141
}
147142

148143
@Test

0 commit comments

Comments
 (0)