Skip to content

Commit eb9260c

Browse files
Copilotneilime
andcommitted
Implement retry strategy and rate limiting for HTTP requests to handle 429 responses
Co-authored-by: neilime <314088+neilime@users.noreply.github.com>
1 parent 4af8f44 commit eb9260c

File tree

3 files changed

+204
-4
lines changed

3 files changed

+204
-4
lines changed

tests/Fixtures/Downloader/CachedHttpDownloader.php

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,18 @@
33
namespace Tests\Fixtures\Downloader;
44

55
use GuzzleHttp\Client;
6+
use GuzzleHttp\Exception\ClientException;
67
use GuzzleHttp\Exception\RequestException;
78
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
89

910
class CachedHttpDownloader
1011
{
1112
private Client $client;
1213
private FilesystemAdapter $cache;
14+
private int $requestDelayMs;
15+
private int $maxRetries;
1316

14-
public function __construct(string $namespace, string $cachePath = __DIR__ . '/../../../.cache')
17+
public function __construct(string $namespace, string $cachePath = __DIR__ . '/../../../.cache', int $requestDelayMs = 1000, int $maxRetries = 3)
1518
{
1619
$this->client = new Client([
1720
'headers' => [
@@ -20,6 +23,8 @@ public function __construct(string $namespace, string $cachePath = __DIR__ . '/.
2023
]);
2124

2225
$this->cache = new FilesystemAdapter($namespace, 0, $cachePath);
26+
$this->requestDelayMs = $requestDelayMs;
27+
$this->maxRetries = $maxRetries;
2328
}
2429

2530
public function fetch(string $url, bool $forceRefresh = false): string
@@ -38,7 +43,7 @@ public function fetch(string $url, bool $forceRefresh = false): string
3843
}
3944
}
4045

41-
$response = $this->client->get($url, ['headers' => $headers]);
46+
$response = $this->fetchWithRetry($url, ['headers' => $headers]);
4247

4348
if ($response->getStatusCode() === 304) {
4449
$cachedData = $cachedItem->get();
@@ -56,4 +61,44 @@ public function fetch(string $url, bool $forceRefresh = false): string
5661

5762
return $body;
5863
}
64+
65+
private function fetchWithRetry(string $url, array $options = []): \Psr\Http\Message\ResponseInterface
66+
{
67+
$attempt = 0;
68+
$maxRetries = $this->maxRetries;
69+
70+
while ($attempt <= $maxRetries) {
71+
try {
72+
// Add a delay before each request (except the first one)
73+
if ($attempt > 0) {
74+
$delayMs = $this->requestDelayMs * (2 ** ($attempt - 1)); // Exponential backoff
75+
usleep($delayMs * 1000); // Convert to microseconds
76+
} elseif ($this->requestDelayMs > 0) {
77+
usleep($this->requestDelayMs * 1000); // Basic rate limiting
78+
}
79+
80+
$response = $this->client->get($url, $options);
81+
return $response;
82+
83+
} catch (ClientException $e) {
84+
// Check if it's a 429 (Too Many Requests) or other retryable client error
85+
if ($e->getResponse() && in_array($e->getResponse()->getStatusCode(), [429, 503, 502, 504])) {
86+
if ($attempt < $maxRetries) {
87+
$attempt++;
88+
continue;
89+
}
90+
}
91+
throw $e;
92+
} catch (RequestException $e) {
93+
// Handle network/connection errors
94+
if ($attempt < $maxRetries) {
95+
$attempt++;
96+
continue;
97+
}
98+
throw $e;
99+
}
100+
}
101+
102+
throw new \RuntimeException("Max retries ({$maxRetries}) exceeded for URL: {$url}");
103+
}
59104
}

tests/Fixtures/Downloader/CssReferentialScraper.php

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,19 @@ class CssReferentialScraper
1818
public function __construct(bool $forceRefresh = false)
1919
{
2020
$this->downloaders = [
21-
self::MDN_URL => new CachedHttpDownloader('css_referential_mdn'),
22-
self::W3C_URL => new CachedHttpDownloader('css_referential_w3c'),
21+
self::MDN_URL => new CachedHttpDownloader('css_referential_mdn', __DIR__ . '/../../../.cache', 1000, 3),
22+
self::W3C_URL => new CachedHttpDownloader('css_referential_w3c', __DIR__ . '/../../../.cache', 1000, 3),
2323
];
2424
$this->forceRefresh = $forceRefresh;
2525
}
2626

2727
public function fetchReferentials(): array
2828
{
2929
$w3cReferencial = $this->fetchW3CReferential();
30+
31+
// Add delay between different API calls to be respectful to servers
32+
usleep(2000000); // 2 seconds
33+
3034
$mdnReferencial = $this->fetchMdnReferential();
3135

3236
$properties = $w3cReferencial['properties'] ?? [];
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
<?php
2+
3+
namespace Tests\TestSuite;
4+
5+
use GuzzleHttp\Client;
6+
use GuzzleHttp\Exception\ClientException;
7+
use GuzzleHttp\Handler\MockHandler;
8+
use GuzzleHttp\HandlerStack;
9+
use GuzzleHttp\Psr7\Response;
10+
use GuzzleHttp\Psr7\Request;
11+
use PHPUnit\Framework\TestCase;
12+
use Tests\Fixtures\Downloader\CachedHttpDownloader;
13+
14+
class CachedHttpDownloaderTest extends TestCase
15+
{
16+
private string $tempCacheDir;
17+
18+
protected function setUp(): void
19+
{
20+
$this->tempCacheDir = sys_get_temp_dir() . '/test_cache_' . uniqid();
21+
mkdir($this->tempCacheDir, 0777, true);
22+
}
23+
24+
protected function tearDown(): void
25+
{
26+
$this->removeDirectory($this->tempCacheDir);
27+
}
28+
29+
public function testRetryOn429Error(): void
30+
{
31+
// Mock handler that returns 429 twice, then succeeds
32+
$mock = new MockHandler([
33+
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
34+
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
35+
new Response(200, [], 'success content'),
36+
]);
37+
38+
$handlerStack = HandlerStack::create($mock);
39+
$client = new Client(['handler' => $handlerStack]);
40+
41+
$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3); // 10ms delay, 3 max retries
42+
43+
// Use reflection to replace the client
44+
$reflection = new \ReflectionClass($downloader);
45+
$clientProperty = $reflection->getProperty('client');
46+
$clientProperty->setAccessible(true);
47+
$clientProperty->setValue($downloader, $client);
48+
49+
$result = $downloader->fetch('http://test.com', true);
50+
51+
$this->assertEquals('success content', $result);
52+
}
53+
54+
public function testRetryFailsAfterMaxAttempts(): void
55+
{
56+
// Mock handler that always returns 429
57+
$mock = new MockHandler([
58+
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
59+
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
60+
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
61+
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
62+
]);
63+
64+
$handlerStack = HandlerStack::create($mock);
65+
$client = new Client(['handler' => $handlerStack]);
66+
67+
$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3); // 10ms delay, 3 max retries
68+
69+
// Use reflection to replace the client
70+
$reflection = new \ReflectionClass($downloader);
71+
$clientProperty = $reflection->getProperty('client');
72+
$clientProperty->setAccessible(true);
73+
$clientProperty->setValue($downloader, $client);
74+
75+
$this->expectException(ClientException::class);
76+
$downloader->fetch('http://test.com', true);
77+
}
78+
79+
public function testSuccessfulRequest(): void
80+
{
81+
// Mock handler that returns success immediately
82+
$mock = new MockHandler([
83+
new Response(200, [], 'success content'),
84+
]);
85+
86+
$handlerStack = HandlerStack::create($mock);
87+
$client = new Client(['handler' => $handlerStack]);
88+
89+
$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3);
90+
91+
// Use reflection to replace the client
92+
$reflection = new \ReflectionClass($downloader);
93+
$clientProperty = $reflection->getProperty('client');
94+
$clientProperty->setAccessible(true);
95+
$clientProperty->setValue($downloader, $client);
96+
97+
$result = $downloader->fetch('http://test.com', true);
98+
99+
$this->assertEquals('success content', $result);
100+
}
101+
102+
public function testCachedResponse(): void
103+
{
104+
// First call
105+
$mock1 = new MockHandler([
106+
new Response(200, ['ETag' => '"test-etag"'], 'cached content'),
107+
]);
108+
$handlerStack1 = HandlerStack::create($mock1);
109+
$client1 = new Client(['handler' => $handlerStack1]);
110+
111+
$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3);
112+
113+
$reflection = new \ReflectionClass($downloader);
114+
$clientProperty = $reflection->getProperty('client');
115+
$clientProperty->setAccessible(true);
116+
$clientProperty->setValue($downloader, $client1);
117+
118+
$result1 = $downloader->fetch('http://test.com', true);
119+
$this->assertEquals('cached content', $result1);
120+
121+
// Second call should use cache and return 304
122+
$mock2 = new MockHandler([
123+
new Response(304),
124+
]);
125+
$handlerStack2 = HandlerStack::create($mock2);
126+
$client2 = new Client(['handler' => $handlerStack2]);
127+
$clientProperty->setValue($downloader, $client2);
128+
129+
$result2 = $downloader->fetch('http://test.com', false);
130+
$this->assertEquals('cached content', $result2);
131+
}
132+
133+
private function removeDirectory(string $dir): void
134+
{
135+
if (!is_dir($dir)) {
136+
return;
137+
}
138+
139+
$files = new \RecursiveIteratorIterator(
140+
new \RecursiveDirectoryIterator($dir, \RecursiveDirectoryIterator::SKIP_DOTS),
141+
\RecursiveIteratorIterator::CHILD_FIRST
142+
);
143+
144+
foreach ($files as $fileinfo) {
145+
$todo = ($fileinfo->isDir() ? 'rmdir' : 'unlink');
146+
$todo($fileinfo->getRealPath());
147+
}
148+
149+
rmdir($dir);
150+
}
151+
}

0 commit comments

Comments
 (0)