Skip to content

Commit 5b6b59f

Browse files
author
reef-actor
committed
Refactor ProxifyPlugin
Use regex named capture groups to allow reuse of callbacks. Added 'image' & 'font' to content-type blacklist.
1 parent 19adb70 commit 5b6b59f

File tree

1 file changed

+76
-175
lines changed

1 file changed

+76
-175
lines changed

src/Plugin/ProxifyPlugin.php

Lines changed: 76 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -1,184 +1,85 @@
11
<?php
2-
32
namespace Proxy\Plugin;
43

54
use Proxy\Plugin\AbstractPlugin;
65
use Proxy\Event\ProxyEvent;
76

8-
class ProxifyPlugin extends AbstractPlugin {
9-
10-
private $base_url = '';
11-
12-
private function css_url($matches){
13-
14-
$url = trim($matches[1]);
15-
if(starts_with($url, 'data:')){
16-
return $matches[0];
17-
}
18-
19-
return str_replace($matches[1], proxify_url($matches[1], $this->base_url), $matches[0]);
20-
}
21-
22-
// this.params.logoImg&&(e="background-image: url("+this.params.logoImg+")")
23-
private function css_import($matches){
24-
return str_replace($matches[2], proxify_url($matches[2], $this->base_url), $matches[0]);
25-
}
7+
class ProxifyPlugin extends AbstractPlugin
8+
{
9+
private const CONTENT_TYPE_BLACKLIST = ['image', 'font', 'application/javascript', 'application/x-javascript', 'text/javascript', 'text/plain'];
10+
private const LINK_TYPE_BLACKLIST = ['data:', 'magnet:', 'about:', 'javascript:', 'mailto:', 'tel:', 'ios-app:', 'android-app:'];
11+
private const CONTENT_PARSERS = [
12+
'@\bcontent=(?<quote>\'|")\d+\s*;\s*url=(?<url>.*?)\k<quote>@is' => 'self::proxifyUrlCallback', // content="X;url=<url>" (meta-refresh)
13+
'@\b(?:src|href)\s*=\s*(?<quote>\'|")(?<url>.*?)\k<quote>@is' => 'self::proxifyUrlCallback', // src="<url>" & href="<url>"
14+
'@[^a-z]{1}url\s*\((?<delim>\'|"|)(?<url>[^\)]*)\k<delim>\)@im' => 'self::proxifyUrlCallback', // url(<url>)
15+
'@\@import\s+(?<quote>\'|")(?<url>.*?)\k<quote>@im' => 'self::proxifyUrlCallback', // @import '<url>'
16+
'@\b(?:srcset)\s*=\s*(?<quote>\'|")(?<value>.*?)\k<quote>@im' => 'self::proxifySrcsetAttributeCallback', // srcset="<url> xxx, …"
17+
'@<\s*form[^>]*action=(?<quote>\'|")(?<url>.*?)\k<quote>[^>]*>@im' => 'self::proxifyFormCallback', // <form action="<url>" …>
18+
];
19+
20+
private $base_url = '';
21+
22+
public function onCompleted(ProxyEvent $event)
23+
{
24+
$response = $event['response'];
25+
$content_type = $response->headers->get('content-type');
26+
if (starts_with($content_type, self::CONTENT_TYPE_BLACKLIST))
27+
{
28+
return;
29+
}
2630

27-
// replace src= and href=
28-
private function html_attr($matches){
29-
30-
// could be empty?
31-
$url = trim($matches[2]);
32-
33-
$schemes = array('data:', 'magnet:', 'about:', 'javascript:', 'mailto:', 'tel:', 'ios-app:', 'android-app:');
34-
if(starts_with($url, $schemes)){
35-
return $matches[0];
36-
}
37-
38-
return str_replace($url, proxify_url($url, $this->base_url), $matches[0]);
39-
}
40-
41-
private function form_action($matches){
42-
43-
// sometimes form action is empty - which means a postback to the current page
44-
// $matches[1] holds single or double quote - whichever was used by webmaster
45-
46-
// $matches[2] holds form submit URL - can be empty which in that case should be replaced with current URL
47-
if(!$matches[2]){
48-
$matches[2] = $this->base_url;
49-
}
50-
51-
$new_action = proxify_url($matches[2], $this->base_url);
52-
53-
// what is form method?
54-
$form_post = preg_match('@method=(["\'])post\1@i', $matches[0]) == 1;
55-
56-
// take entire form string - find real url and replace it with proxified url
57-
$result = str_replace($matches[2], $new_action, $matches[0]);
58-
59-
// must be converted to POST otherwise GET form would just start appending name=value pairs to your proxy url
60-
if(!$form_post){
61-
62-
// may throw Duplicate Attribute warning but only first method matters
63-
$result = str_replace("<form", '<form method="POST"', $result);
64-
65-
// got the idea from Glype - insert this input field to notify proxy later that this form must be converted to GET during http
66-
$result .= '<input type="hidden" name="convertGET" value="1">';
67-
}
68-
69-
return $result;
70-
}
71-
72-
public function onBeforeRequest(ProxyEvent $event){
73-
74-
$request = $event['request'];
75-
76-
// check if one of the POST pairs is convertGET - if so, convert this request to GET
77-
if($request->post->has('convertGET')){
78-
79-
// we don't need this parameter anymore
80-
$request->post->remove('convertGET');
81-
82-
// replace all GET parameters with POST data
83-
$request->get->replace($request->post->all());
84-
85-
// remove POST data
86-
$request->post->clear();
87-
88-
// This is now a GET request
89-
$request->setMethod('GET');
90-
91-
$request->prepare();
92-
}
93-
}
94-
95-
private function meta_refresh($matches){
96-
$url = $matches[2];
97-
return str_replace($url, proxify_url($url, $this->base_url), $matches[0]);
98-
}
99-
100-
// <title>, <base>, <link>, <style>, <meta>, <script>, <noscript>
101-
private function proxify_head($str){
102-
103-
// base - update base_url contained in href - remove <base> tag entirely
104-
//$str = preg_replace_callback('/<base[^>]*href=
105-
106-
// link - replace href with proxified
107-
// link rel="shortcut icon" - replace or remove
108-
109-
// meta - only interested in http-equiv - replace url refresh
110-
// <meta http-equiv="refresh" content="5; url=http://example.com/">
111-
$str = preg_replace_callback('/content=(["\'])\d+\s*;\s*url=(.*?)\1/is', array($this, 'meta_refresh'), $str);
112-
113-
return $str;
114-
}
115-
116-
// The <body> background attribute is not supported in HTML5. Use CSS instead.
117-
private function proxify_css($str){
118-
119-
// The HTML5 standard does not require quotes around attribute values.
120-
121-
// if {1} is not there then youtube breaks for some reason
122-
$str = preg_replace_callback('@[^a-z]{1}url\s*\((?:\'|"|)(.*?)(?:\'|"|)\)@im', array($this, 'css_url'), $str);
123-
124-
// https://developer.mozilla.org/en-US/docs/Web/CSS/@import
125-
// TODO: what about @import directives that are outside <style>?
126-
$str = preg_replace_callback('/@import (\'|")(.*?)\1/i', array($this, 'css_import'), $str);
127-
128-
return $str;
129-
}
130-
131-
public function onCompleted(ProxyEvent $event){
132-
133-
// to be used when proxifying all the relative links
134-
$this->base_url = $event['request']->getUri();
135-
$url_host = parse_url($this->base_url, PHP_URL_HOST);
136-
137-
$response = $event['response'];
138-
$content_type = $response->headers->get('content-type');
139-
140-
$str = $response->getContent();
141-
142-
// DO NOT do any proxification on .js files and text/plain content type
143-
$no_proxify = array('text/javascript', 'application/javascript', 'application/x-javascript', 'text/plain');
144-
if(in_array($content_type, $no_proxify)){
145-
return;
146-
}
147-
148-
// let's remove all frames?? does not protect against the frames created dynamically via javascript
149-
$str = preg_replace('@<iframe[^>]*>[^<]*<\\/iframe>@is', '', $str);
150-
151-
$str = $this->proxify_head($str);
152-
$str = $this->proxify_css($str);
153-
154-
// src= and href=
155-
$str = preg_replace_callback('@(?:src|href)\s*=\s*(["|\'])(.*?)\1@is', array($this, 'html_attr'), $str);
156-
157-
// img srcset
158-
$str = preg_replace_callback('/srcset=\"(.*?)\"/i', function($matches){
159-
$src = $matches[1];
160-
161-
// url_1 1x, url_2 4x, ...
162-
$urls = preg_split('/\s*,\s*/', $src);
163-
foreach($urls as $part){
164-
165-
// TODO: add str_until helper
166-
$pos = strpos($part, ' ');
167-
if($pos !== false){
168-
$url = substr($part, 0, $pos);
169-
$src = str_replace($url, proxify_url($url, $this->base_url), $src);
170-
}
171-
}
172-
173-
return 'srcset="'.$src.'"';
174-
}, $str);
175-
176-
// form
177-
$str = preg_replace_callback('@<form[^>]*action=(["\'])(.*?)\1[^>]*>@i', array($this, 'form_action'), $str);
178-
179-
$response->setContent($str);
180-
}
31+
// to be used when proxifying all the relative links
32+
$this->base_url = $event['request']->getUri();
33+
$proxified_content = preg_replace_callback_array(self::CONTENT_PARSERS, $response->getContent());
34+
$response->setContent($proxified_content);
35+
}
36+
37+
public function onBeforeRequest(ProxyEvent $event)
38+
{
39+
$request = $event['request'];
40+
$this->convertPostToGet($request);
41+
}
42+
43+
private function convertPostToGet($request)
44+
{
45+
if (!$request->post->has('convertGET'))
46+
{
47+
return;
48+
}
49+
50+
$request->get->replace($request->post->all()); // Change POST data to GET data
51+
$request->post->clear(); // Remove POST data
52+
$request->setMethod('GET'); // This is now a GET request
53+
$request->prepare();
54+
}
55+
56+
private function proxifyFormCallback($matches)
57+
{
58+
$full_capture = $this->proxifyUrlCallback($matches);
18159

60+
// If the form method is not post, inject method="post" and add a hidden input field called "convertGET"
61+
$full_capture = preg_replace('@(<\s*form\s*)((?:(?!method=(\'|")post\3)[^>])*>)@i', '$1 method="post" $2<input type="hidden" name="convertGET" value="1">', $full_capture);
62+
return $full_capture;
63+
}
64+
65+
private function proxifySrcsetAttributeCallback($matches)
66+
{
67+
$attribute = $matches[0];
68+
$value = $matches['value'];
69+
$srcset_url_pattern = "@(?:\s*(?<url>[^\s,]*)(?:\s*(?:,|\S*)))@im";
70+
$proxified_value = preg_replace_callback($srcset_url_pattern, array($this,'proxifyUrlCallback'), $value);
71+
return str_replace($value, $proxified_value, $attribute);
72+
}
73+
74+
private function proxifyUrlCallback($matches)
75+
{
76+
$full_capture = $matches[0];
77+
if (!($url = $matches['url'] ?? null) || starts_with($url, self::LINK_TYPE_BLACKLIST))
78+
{
79+
return $full_capture;
80+
}
81+
82+
$proxified_url = proxify_url($url, $this->base_url);
83+
return str_replace($url, $proxified_url, $full_capture);
84+
}
18285
}
183-
184-
?>

0 commit comments

Comments
 (0)