|
1 | 1 | <?php |
2 | | - |
3 | 2 | namespace Proxy\Plugin; |
4 | 3 |
|
5 | 4 | use Proxy\Plugin\AbstractPlugin; |
6 | 5 | use Proxy\Event\ProxyEvent; |
7 | 6 |
|
8 | | -class ProxifyPlugin extends AbstractPlugin { |
9 | | - |
10 | | - private $base_url = ''; |
11 | | - |
12 | | - private function css_url($matches){ |
13 | | - |
14 | | - $url = trim($matches[1]); |
15 | | - if(starts_with($url, 'data:')){ |
16 | | - return $matches[0]; |
17 | | - } |
18 | | - |
19 | | - return str_replace($matches[1], proxify_url($matches[1], $this->base_url), $matches[0]); |
20 | | - } |
21 | | - |
22 | | - // this.params.logoImg&&(e="background-image: url("+this.params.logoImg+")") |
23 | | - private function css_import($matches){ |
24 | | - return str_replace($matches[2], proxify_url($matches[2], $this->base_url), $matches[0]); |
25 | | - } |
| 7 | +class ProxifyPlugin extends AbstractPlugin |
| 8 | +{ |
| 9 | + private const CONTENT_TYPE_BLACKLIST = ['image', 'font', 'application/javascript', 'application/x-javascript', 'text/javascript', 'text/plain']; |
| 10 | + private const LINK_TYPE_BLACKLIST = ['data:', 'magnet:', 'about:', 'javascript:', 'mailto:', 'tel:', 'ios-app:', 'android-app:']; |
| 11 | + private const CONTENT_PARSERS = [ |
| 12 | + '@\bcontent=(?<quote>\'|")\d+\s*;\s*url=(?<url>.*?)\k<quote>@is' => 'self::proxifyUrlCallback', // content="X;url=<url>" (meta-refresh) |
| 13 | + '@\b(?:src|href)\s*=\s*(?<quote>\'|")(?<url>.*?)\k<quote>@is' => 'self::proxifyUrlCallback', // src="<url>" & href="<url>" |
| 14 | + '@[^a-z]{1}url\s*\((?<delim>\'|"|)(?<url>[^\)]*)\k<delim>\)@im' => 'self::proxifyUrlCallback', // url(<url>) |
| 15 | + '@\@import\s+(?<quote>\'|")(?<url>.*?)\k<quote>@im' => 'self::proxifyUrlCallback', // @import '<url>' |
| 16 | + '@\b(?:srcset)\s*=\s*(?<quote>\'|")(?<value>.*?)\k<quote>@im' => 'self::proxifySrcsetAttributeCallback', // srcset="<url> xxx, …" |
| 17 | + '@<\s*form[^>]*action=(?<quote>\'|")(?<url>.*?)\k<quote>[^>]*>@im' => 'self::proxifyFormCallback', // <form action="<url>" …> |
| 18 | + ]; |
| 19 | + |
| 20 | + private $base_url = ''; |
| 21 | + |
| 22 | + public function onCompleted(ProxyEvent $event) |
| 23 | + { |
| 24 | + $response = $event['response']; |
| 25 | + $content_type = $response->headers->get('content-type'); |
| 26 | + if (starts_with($content_type, self::CONTENT_TYPE_BLACKLIST)) |
| 27 | + { |
| 28 | + return; |
| 29 | + } |
26 | 30 |
|
27 | | - // replace src= and href= |
28 | | - private function html_attr($matches){ |
29 | | - |
30 | | - // could be empty? |
31 | | - $url = trim($matches[2]); |
32 | | - |
33 | | - $schemes = array('data:', 'magnet:', 'about:', 'javascript:', 'mailto:', 'tel:', 'ios-app:', 'android-app:'); |
34 | | - if(starts_with($url, $schemes)){ |
35 | | - return $matches[0]; |
36 | | - } |
37 | | - |
38 | | - return str_replace($url, proxify_url($url, $this->base_url), $matches[0]); |
39 | | - } |
40 | | - |
41 | | - private function form_action($matches){ |
42 | | - |
43 | | - // sometimes form action is empty - which means a postback to the current page |
44 | | - // $matches[1] holds single or double quote - whichever was used by webmaster |
45 | | - |
46 | | - // $matches[2] holds form submit URL - can be empty which in that case should be replaced with current URL |
47 | | - if(!$matches[2]){ |
48 | | - $matches[2] = $this->base_url; |
49 | | - } |
50 | | - |
51 | | - $new_action = proxify_url($matches[2], $this->base_url); |
52 | | - |
53 | | - // what is form method? |
54 | | - $form_post = preg_match('@method=(["\'])post\1@i', $matches[0]) == 1; |
55 | | - |
56 | | - // take entire form string - find real url and replace it with proxified url |
57 | | - $result = str_replace($matches[2], $new_action, $matches[0]); |
58 | | - |
59 | | - // must be converted to POST otherwise GET form would just start appending name=value pairs to your proxy url |
60 | | - if(!$form_post){ |
61 | | - |
62 | | - // may throw Duplicate Attribute warning but only first method matters |
63 | | - $result = str_replace("<form", '<form method="POST"', $result); |
64 | | - |
65 | | - // got the idea from Glype - insert this input field to notify proxy later that this form must be converted to GET during http |
66 | | - $result .= '<input type="hidden" name="convertGET" value="1">'; |
67 | | - } |
68 | | - |
69 | | - return $result; |
70 | | - } |
71 | | - |
72 | | - public function onBeforeRequest(ProxyEvent $event){ |
73 | | - |
74 | | - $request = $event['request']; |
75 | | - |
76 | | - // check if one of the POST pairs is convertGET - if so, convert this request to GET |
77 | | - if($request->post->has('convertGET')){ |
78 | | - |
79 | | - // we don't need this parameter anymore |
80 | | - $request->post->remove('convertGET'); |
81 | | - |
82 | | - // replace all GET parameters with POST data |
83 | | - $request->get->replace($request->post->all()); |
84 | | - |
85 | | - // remove POST data |
86 | | - $request->post->clear(); |
87 | | - |
88 | | - // This is now a GET request |
89 | | - $request->setMethod('GET'); |
90 | | - |
91 | | - $request->prepare(); |
92 | | - } |
93 | | - } |
94 | | - |
95 | | - private function meta_refresh($matches){ |
96 | | - $url = $matches[2]; |
97 | | - return str_replace($url, proxify_url($url, $this->base_url), $matches[0]); |
98 | | - } |
99 | | - |
100 | | - // <title>, <base>, <link>, <style>, <meta>, <script>, <noscript> |
101 | | - private function proxify_head($str){ |
102 | | - |
103 | | - // base - update base_url contained in href - remove <base> tag entirely |
104 | | - //$str = preg_replace_callback('/<base[^>]*href= |
105 | | - |
106 | | - // link - replace href with proxified |
107 | | - // link rel="shortcut icon" - replace or remove |
108 | | - |
109 | | - // meta - only interested in http-equiv - replace url refresh |
110 | | - // <meta http-equiv="refresh" content="5; url=http://example.com/"> |
111 | | - $str = preg_replace_callback('/content=(["\'])\d+\s*;\s*url=(.*?)\1/is', array($this, 'meta_refresh'), $str); |
112 | | - |
113 | | - return $str; |
114 | | - } |
115 | | - |
116 | | - // The <body> background attribute is not supported in HTML5. Use CSS instead. |
117 | | - private function proxify_css($str){ |
118 | | - |
119 | | - // The HTML5 standard does not require quotes around attribute values. |
120 | | - |
121 | | - // if {1} is not there then youtube breaks for some reason |
122 | | - $str = preg_replace_callback('@[^a-z]{1}url\s*\((?:\'|"|)(.*?)(?:\'|"|)\)@im', array($this, 'css_url'), $str); |
123 | | - |
124 | | - // https://developer.mozilla.org/en-US/docs/Web/CSS/@import |
125 | | - // TODO: what about @import directives that are outside <style>? |
126 | | - $str = preg_replace_callback('/@import (\'|")(.*?)\1/i', array($this, 'css_import'), $str); |
127 | | - |
128 | | - return $str; |
129 | | - } |
130 | | - |
131 | | - public function onCompleted(ProxyEvent $event){ |
132 | | - |
133 | | - // to be used when proxifying all the relative links |
134 | | - $this->base_url = $event['request']->getUri(); |
135 | | - $url_host = parse_url($this->base_url, PHP_URL_HOST); |
136 | | - |
137 | | - $response = $event['response']; |
138 | | - $content_type = $response->headers->get('content-type'); |
139 | | - |
140 | | - $str = $response->getContent(); |
141 | | - |
142 | | - // DO NOT do any proxification on .js files and text/plain content type |
143 | | - $no_proxify = array('text/javascript', 'application/javascript', 'application/x-javascript', 'text/plain'); |
144 | | - if(in_array($content_type, $no_proxify)){ |
145 | | - return; |
146 | | - } |
147 | | - |
148 | | - // let's remove all frames?? does not protect against the frames created dynamically via javascript |
149 | | - $str = preg_replace('@<iframe[^>]*>[^<]*<\\/iframe>@is', '', $str); |
150 | | - |
151 | | - $str = $this->proxify_head($str); |
152 | | - $str = $this->proxify_css($str); |
153 | | - |
154 | | - // src= and href= |
155 | | - $str = preg_replace_callback('@(?:src|href)\s*=\s*(["|\'])(.*?)\1@is', array($this, 'html_attr'), $str); |
156 | | - |
157 | | - // img srcset |
158 | | - $str = preg_replace_callback('/srcset=\"(.*?)\"/i', function($matches){ |
159 | | - $src = $matches[1]; |
160 | | - |
161 | | - // url_1 1x, url_2 4x, ... |
162 | | - $urls = preg_split('/\s*,\s*/', $src); |
163 | | - foreach($urls as $part){ |
164 | | - |
165 | | - // TODO: add str_until helper |
166 | | - $pos = strpos($part, ' '); |
167 | | - if($pos !== false){ |
168 | | - $url = substr($part, 0, $pos); |
169 | | - $src = str_replace($url, proxify_url($url, $this->base_url), $src); |
170 | | - } |
171 | | - } |
172 | | - |
173 | | - return 'srcset="'.$src.'"'; |
174 | | - }, $str); |
175 | | - |
176 | | - // form |
177 | | - $str = preg_replace_callback('@<form[^>]*action=(["\'])(.*?)\1[^>]*>@i', array($this, 'form_action'), $str); |
178 | | - |
179 | | - $response->setContent($str); |
180 | | - } |
| 31 | + // to be used when proxifying all the relative links |
| 32 | + $this->base_url = $event['request']->getUri(); |
| 33 | + $proxified_content = preg_replace_callback_array(self::CONTENT_PARSERS, $response->getContent()); |
| 34 | + $response->setContent($proxified_content); |
| 35 | + } |
| 36 | + |
| 37 | + public function onBeforeRequest(ProxyEvent $event) |
| 38 | + { |
| 39 | + $request = $event['request']; |
| 40 | + $this->convertPostToGet($request); |
| 41 | + } |
| 42 | + |
| 43 | + private function convertPostToGet($request) |
| 44 | + { |
| 45 | + if (!$request->post->has('convertGET')) |
| 46 | + { |
| 47 | + return; |
| 48 | + } |
| 49 | + |
| 50 | + $request->get->replace($request->post->all()); // Change POST data to GET data |
| 51 | + $request->post->clear(); // Remove POST data |
| 52 | + $request->setMethod('GET'); // This is now a GET request |
| 53 | + $request->prepare(); |
| 54 | + } |
| 55 | + |
| 56 | + private function proxifyFormCallback($matches) |
| 57 | + { |
| 58 | + $full_capture = $this->proxifyUrlCallback($matches); |
181 | 59 |
|
| 60 | + // If the form method is not post, inject method="post" and add a hidden input field called "convertGET" |
| 61 | + $full_capture = preg_replace('@(<\s*form\s*)((?:(?!method=(\'|")post\3)[^>])*>)@i', '$1 method="post" $2<input type="hidden" name="convertGET" value="1">', $full_capture); |
| 62 | + return $full_capture; |
| 63 | + } |
| 64 | + |
| 65 | + private function proxifySrcsetAttributeCallback($matches) |
| 66 | + { |
| 67 | + $attribute = $matches[0]; |
| 68 | + $value = $matches['value']; |
| 69 | + $srcset_url_pattern = "@(?:\s*(?<url>[^\s,]*)(?:\s*(?:,|\S*)))@im"; |
| 70 | + $proxified_value = preg_replace_callback($srcset_url_pattern, array($this,'proxifyUrlCallback'), $value); |
| 71 | + return str_replace($value, $proxified_value, $attribute); |
| 72 | + } |
| 73 | + |
| 74 | + private function proxifyUrlCallback($matches) |
| 75 | + { |
| 76 | + $full_capture = $matches[0]; |
| 77 | + if (!($url = $matches['url'] ?? null) || starts_with($url, self::LINK_TYPE_BLACKLIST)) |
| 78 | + { |
| 79 | + return $full_capture; |
| 80 | + } |
| 81 | + |
| 82 | + $proxified_url = proxify_url($url, $this->base_url); |
| 83 | + return str_replace($url, $proxified_url, $full_capture); |
| 84 | + } |
182 | 85 | } |
183 | | - |
184 | | -?> |
0 commit comments