diff --git a/src/Html.php b/src/Html.php deleted file mode 100644 index 08c4c40..0000000 --- a/src/Html.php +++ /dev/null @@ -1,182 +0,0 @@ -]*>(.*?)<\s*\/\s*script\s*>/is', '', $html); - return $html; - } - - public static function remove_styles($html){ - $html = preg_replace('/<\s*style[^>]*>(.*?)<\s*\/\s*style\s*>/is', '', $html); - return $html; - } - - public static function remove_comments($html){ - return preg_replace('//s', '', $html); - } - - private static function find($selector, $html, $start_from = 0){ - - $html = substr($html, $start_from); - - $inner_start = 0; - $inner_end = 0; - - $pattern = '//'; - - if(substr($selector, 0, 1) == '#'){ - $pattern = '/<(\w+)[^>]+id="'.substr($selector, 1).'"[^>]*>/is'; - } else if(substr($selector, 0, 1) == '.'){ - $pattern = '/<(\w+)[^>]+class="'.substr($selector, 1).'"[^>]*>/is'; - } else { - return false; - } - - if(preg_match($pattern, $html, $matches, PREG_OFFSET_CAPTURE)){ - - $outer_start = $matches[0][1]; - $inner_start = $matches[0][1] + strlen($matches[0][0]); - - // tag stuff - $tag_name = $matches[1][0]; - $tag_len = strlen($tag_name); - - $run_count = 300; - - // "open" 0){ - - $open_tag = strpos($html, "<{$tag_name}", $start); - $close_tag = strpos($html, " $outer_start + $start_from, - 'inner_start' => $inner_start + $start_from, - 'inner_end' => $inner_end + $start_from, - 'outer_end' => $outer_end + $start_from - ); - } - - return false; - } - - public static function extract_inner($selector, $html){ - return self::extract($selector, $html, true); - } - - public static function extract_outer($selector, $html){ - return self::extract($selector, $html, false); - } - - private static function extract($selector, $html, $inner = false){ - - $pos = 0; - $limit = 300; - - $result = array(); - $data = false; - - do { - - $data = self::find($selector, $html, $pos); - - if($data){ - - $code = substr($html, $inner ? $data['inner_start'] : $data['outer_start'], - $inner ? $data['inner_end'] - $data['inner_start'] : $data['outer_end'] - $data['outer_start']); - - $result[] = $code; - $pos = $data['outer_end']; - } - - } while ($data && --$limit > 0); - - return $result; - } - - public static function remove($selector, $html){ - return self::replace($selector, '', $html, false); - } - - public static function replace_outer($selector, $replace, $html, &$matches = NULL){ - return self::replace($selector, $replace, $html, false, $matches); - } - - public static function replace_inner($selector, $replace, $html, &$matches = NULL){ - return self::replace($selector, $replace, $html, true, $matches); - } - - private static function replace($selector, $replace, $html, $replace_inner = false, &$matches = NULL){ - - $start_from = 0; - $limit = 300; - - $data = false; - $replace = (array)$replace; - - do { - - $data = self::find($selector, $html, $start_from); - - if($data){ - - $r = array_shift($replace); - - // from where to where will we be replacing? - $replace_space = $replace_inner ? $data['inner_end'] - $data['inner_start'] : $data['outer_end'] - $data['outer_start']; - $replace_len = strlen($r); - - if($matches !== NULL){ - $matches[] = substr($html, $replace_inner ? $data['inner_start'] : $data['outer_start'], $replace_space); - } - - $html = substr_replace($html, $r, $replace_inner ? $data['inner_start'] : $data['outer_start'], $replace_space); - - // next time we resume search at position right at the end of this element - $start_from = $data['outer_end'] + ($replace_len - $replace_space); - } - - } while ($data && --$limit > 0); - - return $html; - } -} - -?> \ No newline at end of file diff --git a/src/Plugin/AbstractPlugin.php b/src/Plugin/AbstractPlugin.php index 605cee3..69771b0 100644 --- a/src/Plugin/AbstractPlugin.php +++ b/src/Plugin/AbstractPlugin.php @@ -4,78 +4,62 @@ use Proxy\Event\ProxyEvent; -abstract class AbstractPlugin { - - // apply these methods only to those events whose request URL passes this filter - protected $url_pattern; - - public function onBeforeRequest(ProxyEvent $event){ - // fired right before a request is being sent to a proxy - } - - public function onHeadersReceived(ProxyEvent $event){ - // fired right after response headers have been fully received - last chance to modify before sending it back to the user - } - - public function onCurlWrite(ProxyEvent $event){ - // fired as the data is being written piece by piece - } - - public function onCompleted(ProxyEvent $event){ - // fired after the full response=headers+body has been read - will only be called on "non-streaming" responses - } - - final public function subscribe($dispatcher){ - - $dispatcher->addListener('request.before_send', function($event){ - $this->route('request.before_send', $event); - }); - - $dispatcher->addListener('request.sent', function($event){ - $this->route('request.sent', $event); - }); - - $dispatcher->addListener('curl.callback.write', function($event){ - $this->route('curl.callback.write', $event); - }); - - $dispatcher->addListener('request.complete', function($event){ - $this->route('request.complete', $event); - }); - } - - // dispatch based on filter - final private function route($event_name, ProxyEvent $event){ - $url = $event['request']->getUri(); - - // url filter provided and current request url does not match it - if($this->url_pattern){ - if(starts_with($this->url_pattern, '/') && preg_match($this->url_pattern, $url) !== 1){ - return; - } else if(stripos($url, $this->url_pattern) === false){ - return; - } - } - - switch($event_name){ - - case 'request.before_send': - $this->onBeforeRequest($event); - break; - - case 'request.sent': - $this->onHeadersReceived($event); - break; - - case 'curl.callback.write': - $this->onCurlWrite($event); - break; - - case 'request.complete': - $this->onCompleted($event); - break; - } - } -} +abstract class AbstractPlugin +{ + const EVENT_LISTENERS = [ + 'request.before_send' => 'onBeforeRequest', + 'request.sent' => 'onHeadersReceived', + 'curl.callback.write' => 'onCurlWrite', + 'request.complete' => 'onCompleted', + ]; + + // apply these methods only to those events whose request URL passes this filter + protected $url_pattern; + + public function onBeforeRequest(ProxyEvent $event) + { + // fired right before a request is being sent to a proxy + } + + public function onHeadersReceived(ProxyEvent $event) + { + // fired right after response headers have been fully received - last chance to modify before sending it back to the user + } + + public function onCurlWrite(ProxyEvent $event) + { + // fired as the data is being written piece by piece + } + + public function onCompleted(ProxyEvent $event) + { + // fired after the full response=headers+body has been read - will only be called on "non-streaming" responses + } -?> + final public function subscribe($dispatcher) + { + foreach (self::EVENT_LISTENERS as $event_name => $listener) { + $dispatcher->addListener($event_name, function ($event) use ($event_name) { + $this->route($event_name, $event); + }); + } + } + + // dispatch based on filter + final private function route($event_name, ProxyEvent $event) + { + $url = $event['request']->getUri(); + + // url filter provided and current request url does not match it + if ($this->url_pattern) { + if (starts_with($this->url_pattern, '/') && preg_match($this->url_pattern, $url) !== 1) { + return; + } else if (stripos($url, $this->url_pattern) === false) { + return; + } + } + + // Call the handler for this event + [$this, self::EVENT_LISTENERS[$event_name]]($event); + } +} diff --git a/src/Plugin/ProxifyPlugin.php b/src/Plugin/ProxifyPlugin.php index 3165428..11b4c65 100644 --- a/src/Plugin/ProxifyPlugin.php +++ b/src/Plugin/ProxifyPlugin.php @@ -1,202 +1,82 @@ \'|")\d+\s*;\s*url=(?.*?)\k@is' => 'self::proxifyUrlCallback', // content="X;url=" (meta-refresh) + '@\b(?:src|href)\s*=\s*(?\'|")(?.*?)\k@is' => 'self::proxifyUrlCallback', // src="" & href="" + '@[^a-z]{1}url\s*\((?\'|"|)(?[^\)]*)\k\)@im' => 'self::proxifyUrlCallback', // url() + '@\@import\s+(?\'|")(?.*?)\k@im' => 'self::proxifyUrlCallback', // @import '' + '@\b(?:srcset)\s*=\s*(?\'|")(?.*?)\k@im' => 'self::proxifySrcsetAttributeCallback', // srcset=" xxx, …" + '@<\s*form[^>]*action=(?\'|")(?.*?)\k[^>]*>@im' => 'self::proxifyFormCallback', //
+ ]; - private $base_url = ''; - - private function css_url($matches){ - - $url = trim($matches[1]); - if(starts_with($url, 'data:')){ - return $matches[0]; - } - - return str_replace($matches[1], proxify_url($matches[1], $this->base_url), $matches[0]); - } - - // this.params.logoImg&&(e="background-image: url("+this.params.logoImg+")") - private function css_import($matches){ - return str_replace($matches[2], proxify_url($matches[2], $this->base_url), $matches[0]); - } + private $base_url = ''; - // replace src= and href= - private function html_attr($matches){ - - // could be empty? - $url = trim($matches[2]); - - $schemes = array('data:', 'magnet:', 'about:', 'javascript:', 'mailto:', 'tel:', 'ios-app:', 'android-app:'); - if(starts_with($url, $schemes)){ - return $matches[0]; - } - - return str_replace($url, proxify_url($url, $this->base_url), $matches[0]); - } + public function onCompleted(ProxyEvent $event) + { + $response = $event['response']; + $content_type = $response->headers->get('content-type'); + if (starts_with($content_type, self::CONTENT_TYPE_BLACKLIST)) { + return; + } - private function form_action($matches){ - - // sometimes form action is empty - which means a postback to the current page - // $matches[1] holds single or double quote - whichever was used by webmaster - - // $matches[2] holds form submit URL - can be empty which in that case should be replaced with current URL - if(!$matches[2]){ - $matches[2] = $this->base_url; - } - - $new_action = proxify_url($matches[2], $this->base_url); - - // what is form method? - $form_post = preg_match('@method=(["\'])post\1@i', $matches[0]) == 1; - - // take entire form string - find real url and replace it with proxified url - $result = str_replace($matches[2], $new_action, $matches[0]); - - // must be converted to POST otherwise GET form would just start appending name=value pairs to your proxy url - if(!$form_post){ - - // may throw Duplicate Attribute warning but only first method matters - $result = str_replace("post->has('convertGET')){ - - // we don't need this parameter anymore - $request->post->remove('convertGET'); - - // replace all GET parameters with POST data - $request->get->replace($request->post->all()); - - // remove POST data - $request->post->clear(); - - // This is now a GET request - $request->setMethod('GET'); - - $request->prepare(); - } - } - - private function meta_refresh($matches){ - $url = $matches[2]; - return str_replace($url, proxify_url($url, $this->base_url), $matches[0]); - } - - // , <base>, <link>, <style>, <meta>, <script>, <noscript> - private function proxify_head($str){ - - // let's replace page titles with something custom - if(Config::get('replace_title')){ - $str = preg_replace('/<title[^>]*>(.*?)<\/title>/is', '<title>'.Config::get('replace_title').'', $str); - } - - - // base - update base_url contained in href - remove tag entirely - //$str = preg_replace_callback('/]*href= - - // link - replace href with proxified - // link rel="shortcut icon" - replace or remove - - // meta - only interested in http-equiv - replace url refresh - // - $str = preg_replace_callback('/content=(["\'])\d+\s*;\s*url=(.*?)\1/is', array($this, 'meta_refresh'), $str); - - return $str; - } - - // The background attribute is not supported in HTML5. Use CSS instead. - private function proxify_css($str){ - - // The HTML5 standard does not require quotes around attribute values. - - // if {1} is not there then youtube breaks for some reason - $str = preg_replace_callback('@[^a-z]{1}url\s*\((?:\'|"|)(.*?)(?:\'|"|)\)@im', array($this, 'css_url'), $str); - - // https://developer.mozilla.org/en-US/docs/Web/CSS/@import - // TODO: what about @import directives that are outside