|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * Class URLExtractor |
| 4 | + * |
| 5 | + * @created 15.08.2019 |
| 6 | + * @author smiley <smiley@chillerlan.net> |
| 7 | + * @copyright 2019 smiley |
| 8 | + * @license MIT |
| 9 | + */ |
| 10 | + |
| 11 | +declare(strict_types=1); |
| 12 | + |
| 13 | +namespace chillerlan\HTTP\Utils\Client; |
| 14 | + |
| 15 | +use Psr\Http\Client\ClientInterface; |
| 16 | +use Psr\Http\Message\{RequestFactoryInterface, RequestInterface, ResponseInterface, UriInterface}; |
| 17 | +use function array_reverse, in_array; |
| 18 | + |
| 19 | +/** |
| 20 | + * A client that follows redirects until it reaches a non-30x response, e.g. to extract shortened URLs |
| 21 | + * |
| 22 | + * The given HTTP client needs to be set up accordingly: |
| 23 | + * |
| 24 | + * - CURLOPT_FOLLOWLOCATION must be set to false so that we can intercept the 30x responses |
| 25 | + * - CURLOPT_MAXREDIRS should be set to a value > 1 |
| 26 | + */ |
| 27 | +class URLExtractor implements ClientInterface{ |
| 28 | + |
| 29 | + /** @var \Psr\Http\Message\ResponseInterface[] */ |
| 30 | + protected array $responses = []; |
| 31 | + |
| 32 | + /** |
| 33 | + * URLExtractor constructor. |
| 34 | + */ |
| 35 | + public function __construct( |
| 36 | + protected ClientInterface $http, |
| 37 | + protected RequestFactoryInterface $requestFactory, |
| 38 | + ){ |
| 39 | + |
| 40 | + } |
| 41 | + |
| 42 | + /** |
| 43 | + * @inheritDoc |
| 44 | + */ |
| 45 | + public function sendRequest(RequestInterface $request):ResponseInterface{ |
| 46 | + |
| 47 | + do{ |
| 48 | + // fetch the response for the current request |
| 49 | + $response = $this->http->sendRequest($request); |
| 50 | + $location = $response->getHeaderLine('location'); |
| 51 | + $this->responses[] = $response; |
| 52 | + |
| 53 | + if($location === ''){ |
| 54 | + break; |
| 55 | + } |
| 56 | + |
| 57 | + // set up a new request to the location header of the last response |
| 58 | + $request = $this->requestFactory->createRequest($request->getMethod(), $location); |
| 59 | + } |
| 60 | + while(in_array($response->getStatusCode(), [301, 302, 303, 307, 308], true)); |
| 61 | + |
| 62 | + return $response; |
| 63 | + } |
| 64 | + |
| 65 | + /** |
| 66 | + * extract the given URL and return the last valid location header |
| 67 | + */ |
| 68 | + public function extract(UriInterface|string $shortURL):string|null{ |
| 69 | + $request = $this->requestFactory->createRequest('GET', $shortURL); |
| 70 | + $response = $this->sendRequest($request); |
| 71 | + |
| 72 | + if($response->getStatusCode() !== 200 || empty($this->responses)){ |
| 73 | + return null; |
| 74 | + } |
| 75 | + |
| 76 | + foreach(array_reverse($this->responses) as $r){ |
| 77 | + $url = $r->getHeaderLine('location'); |
| 78 | + |
| 79 | + if(!empty($url)){ |
| 80 | + return $url; |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + return null; |
| 85 | + } |
| 86 | + |
| 87 | + /** |
| 88 | + * @return \Psr\Http\Message\ResponseInterface[] |
| 89 | + */ |
| 90 | + public function getResponses():array{ |
| 91 | + return $this->responses; |
| 92 | + } |
| 93 | + |
| 94 | +} |
0 commit comments