Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
composer.lock
.cache
tests/.phpunit.result.cache
build/
build/
51 changes: 49 additions & 2 deletions tests/Fixtures/Downloader/CachedHttpDownloader.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,20 @@
namespace Tests\Fixtures\Downloader;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\RequestException;
use Symfony\Component\Cache\Adapter\FilesystemAdapter;
use Psr\Http\Message\ResponseInterface;
use RuntimeException;

class CachedHttpDownloader
{
private Client $client;
private FilesystemAdapter $cache;
private int $requestDelayMs;
private int $maxRetries;

public function __construct(string $namespace, string $cachePath = __DIR__ . '/../../../.cache')
public function __construct(string $namespace, string $cachePath = __DIR__ . '/../../../.cache', int $requestDelayMs = 1000, int $maxRetries = 3)
{
$this->client = new Client([
'headers' => [
Expand All @@ -20,6 +25,8 @@ public function __construct(string $namespace, string $cachePath = __DIR__ . '/.
]);

$this->cache = new FilesystemAdapter($namespace, 0, $cachePath);
$this->requestDelayMs = $requestDelayMs;
$this->maxRetries = $maxRetries;
}

public function fetch(string $url, bool $forceRefresh = false): string
Expand All @@ -38,7 +45,7 @@ public function fetch(string $url, bool $forceRefresh = false): string
}
}

$response = $this->client->get($url, ['headers' => $headers]);
$response = $this->fetchWithRetry($url, ['headers' => $headers]);

if ($response->getStatusCode() === 304) {
$cachedData = $cachedItem->get();
Expand All @@ -56,4 +63,44 @@ public function fetch(string $url, bool $forceRefresh = false): string

return $body;
}

private function fetchWithRetry(string $url, array $options = []): ResponseInterface
{
$attempt = 0;
$maxRetries = $this->maxRetries;

while ($attempt <= $maxRetries) {
try {
// Add a delay before each request (except the first one)
if ($attempt > 0) {
$delayMs = $this->requestDelayMs * (2 ** ($attempt - 1)); // Exponential backoff
usleep($delayMs * 1000); // Convert to microseconds
} elseif ($this->requestDelayMs > 0) {
usleep($this->requestDelayMs * 1000); // Basic rate limiting
}

$response = $this->client->get($url, $options);
return $response;

} catch (ClientException $e) {
// Check if it's a 429 (Too Many Requests) or other retryable client error
if ($e->getResponse() && in_array($e->getResponse()->getStatusCode(), [429, 503, 502, 504])) {
if ($attempt < $maxRetries) {
$attempt++;
continue;
}
}
throw $e;
} catch (RequestException $e) {
// Handle network/connection errors
if ($attempt < $maxRetries) {
$attempt++;
continue;
}
throw $e;
}
}

throw new RuntimeException("Max retries ({$maxRetries}) exceeded for URL: {$url}");
}
}
8 changes: 6 additions & 2 deletions tests/Fixtures/Downloader/CssReferentialScraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,19 @@ class CssReferentialScraper
public function __construct(bool $forceRefresh = false)
{
$this->downloaders = [
self::MDN_URL => new CachedHttpDownloader('css_referential_mdn'),
self::W3C_URL => new CachedHttpDownloader('css_referential_w3c'),
self::MDN_URL => new CachedHttpDownloader('css_referential_mdn', __DIR__ . '/../../../.cache', 1000, 3),
self::W3C_URL => new CachedHttpDownloader('css_referential_w3c', __DIR__ . '/../../../.cache', 1000, 3),
];
$this->forceRefresh = $forceRefresh;
}

public function fetchReferentials(): array
{
$w3cReferencial = $this->fetchW3CReferential();

// Add delay between different API calls to be respectful to servers
usleep(2000000); // 2 seconds

$mdnReferencial = $this->fetchMdnReferential();

$properties = $w3cReferencial['properties'] ?? [];
Expand Down
154 changes: 154 additions & 0 deletions tests/TestSuite/Fixtures/Downloader/CachedHttpDownloaderTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
<?php

namespace Tests\TestSuite\Fixtures\Downloader;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Handler\MockHandler;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Psr7\Response;
use GuzzleHttp\Psr7\Request;
use PHPUnit\Framework\TestCase;
use Tests\Fixtures\Downloader\CachedHttpDownloader;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
use ReflectionClass;

class CachedHttpDownloaderTest extends TestCase
{
private string $tempCacheDir;

protected function setUp(): void
{
$this->tempCacheDir = sys_get_temp_dir() . '/test_cache_' . uniqid();
mkdir($this->tempCacheDir, 0o777, true);
}

protected function tearDown(): void
{
$this->removeDirectory($this->tempCacheDir);
}

public function testRetryOn429Error(): void
{
// Mock handler that returns 429 twice, then succeeds
$mock = new MockHandler([
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
new Response(200, [], 'success content'),
]);

$handlerStack = HandlerStack::create($mock);
$client = new Client(['handler' => $handlerStack]);

$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3); // 10ms delay, 3 max retries

// Use reflection to replace the client
$reflection = new ReflectionClass($downloader);
$clientProperty = $reflection->getProperty('client');
$clientProperty->setAccessible(true);
$clientProperty->setValue($downloader, $client);

$result = $downloader->fetch('http://test.com', true);

$this->assertEquals('success content', $result);
}

public function testRetryFailsAfterMaxAttempts(): void
{
// Mock handler that always returns 429
$mock = new MockHandler([
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
new ClientException('Too Many Requests', new Request('GET', 'test'), new Response(429)),
]);

$handlerStack = HandlerStack::create($mock);
$client = new Client(['handler' => $handlerStack]);

$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3); // 10ms delay, 3 max retries

// Use reflection to replace the client
$reflection = new ReflectionClass($downloader);
$clientProperty = $reflection->getProperty('client');
$clientProperty->setAccessible(true);
$clientProperty->setValue($downloader, $client);

$this->expectException(ClientException::class);
$downloader->fetch('http://test.com', true);
}

public function testSuccessfulRequest(): void
{
// Mock handler that returns success immediately
$mock = new MockHandler([
new Response(200, [], 'success content'),
]);

$handlerStack = HandlerStack::create($mock);
$client = new Client(['handler' => $handlerStack]);

$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3);

// Use reflection to replace the client
$reflection = new ReflectionClass($downloader);
$clientProperty = $reflection->getProperty('client');
$clientProperty->setAccessible(true);
$clientProperty->setValue($downloader, $client);

$result = $downloader->fetch('http://test.com', true);

$this->assertEquals('success content', $result);
}

public function testCachedResponse(): void
{
// First call
$mock1 = new MockHandler([
new Response(200, ['ETag' => '"test-etag"'], 'cached content'),
]);
$handlerStack1 = HandlerStack::create($mock1);
$client1 = new Client(['handler' => $handlerStack1]);

$downloader = new CachedHttpDownloader('test', $this->tempCacheDir, 10, 3);

$reflection = new ReflectionClass($downloader);
$clientProperty = $reflection->getProperty('client');
$clientProperty->setAccessible(true);
$clientProperty->setValue($downloader, $client1);

$result1 = $downloader->fetch('http://test.com', true);
$this->assertEquals('cached content', $result1);

// Second call should use cache and return 304
$mock2 = new MockHandler([
new Response(304),
]);
$handlerStack2 = HandlerStack::create($mock2);
$client2 = new Client(['handler' => $handlerStack2]);
$clientProperty->setValue($downloader, $client2);

$result2 = $downloader->fetch('http://test.com', false);
$this->assertEquals('cached content', $result2);
}

private function removeDirectory(string $dir): void
{
if (!is_dir($dir)) {
return;
}

$files = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS),
RecursiveIteratorIterator::CHILD_FIRST
);

foreach ($files as $fileinfo) {
$todo = ($fileinfo->isDir() ? 'rmdir' : 'unlink');
$todo($fileinfo->getRealPath());
}

rmdir($dir);
}
}