@@ -22,39 +22,27 @@ private function css_url($matches){
2222 return str_replace ($ matches [1 ], proxify_url ($ matches [1 ], $ this ->base_url ), $ matches [0 ]);
2323 }
2424
25- /*
26-
27- this.params.logoImg&&(e="background-image: url("+this.params.logoImg+")")
28-
29- */
25+ // this.params.logoImg&&(e="background-image: url("+this.params.logoImg+")")
3026 private function css_import ($ matches ){
3127 return str_replace ($ matches [2 ], proxify_url ($ matches [2 ], $ this ->base_url ), $ matches [0 ]);
3228 }
3329
34- private function html_href ($ matches ){
30+ // replace src= and href=
31+ private function html_attr ($ matches ){
3532
33+ // could be empty?
3634 $ url = trim ($ matches [2 ]);
3735
38- // do not proxify magnet: links
39- if (strpos ($ url , "magnet " ) === 0 ){
36+ if (stripos ($ url , 'data: ' ) === 0 || stripos ($ url , 'magnet: ' ) === 0 ){
4037 return $ matches [0 ];
4138 }
4239
43- // do we even need to proxify this URL?
4440 return str_replace ($ url , proxify_url ($ url , $ this ->base_url ), $ matches [0 ]);
4541 }
4642
47- private function html_src ($ matches ){
48-
49- if (stripos (trim ($ matches [2 ]), 'data: ' ) === 0 ){
50- return $ matches [0 ];
51- }
52-
53- return str_replace ($ matches [2 ], proxify_url ($ matches [2 ], $ this ->base_url ), $ matches [0 ]);
54- }
55-
5643 private function form_action ($ matches ){
57-
44+
45+ // sometimes form action is empty - which means a postback to the current page
5846 // $matches[1] holds single or double quote - whichever was used by webmaster
5947
6048 // $matches[2] holds form submit URL - can be empty which in that case should be replaced with current URL
@@ -105,35 +93,59 @@ public function onBeforeRequest(ProxyEvent $event){
10593 $ request ->prepare ();
10694 }
10795 }
96+
97+ private function meta_refresh ($ matches ){
98+ $ url = $ matches [2 ];
99+ return str_replace ($ url , proxify_url ($ url , $ this ->base_url ), $ matches [0 ]);
100+ }
101+
102+ // <title>, <base>, <link>, <style>, <meta>, <script>, <noscript>
103+ private function proxify_head ($ str ){
108104
109- /*
110- TODO:
111- $input = preg_replace('#<meta[^>]*name=["\'](title|description|keywords)["\'][^>]*>#is', '', $input, 3);
112- $input = preg_replace('#<link[^>]*rel=["\'](icon|shortcut icon)["\'][^>]*>#is', '', $input, 2);
113-
114- # Remove and record a <base> href
115- $input = preg_replace_callback('#<base href\s*=\s*([\\\'"])?((?(1)(?(?<=")[^"]{1,2048}|[^\\\']{1,2048})|[^\s"\\\'>]{1,2048}))(?(1)\\1|)[^>]*>#i', 'html_stripBase', $input, 1);
105+ // let's replace page titles with something custom
106+ if (Config::get ('replace_title ' )){
107+ $ str = preg_replace ('/<title[^>]*>(.*?)<\/title>/is ' , '<title> ' .Config::get ('replace_title ' ).'</title> ' , $ str );
108+ }
116109
117- # Proxy url= values in meta redirects
118- $input = preg_replace_callback('#content\s*=\s*(["\\\'])?[0-9]+\s*;\s*url=([\\\'"]|&\#39;)?((?(?<=")[^"]+|(?(?<=\\\')[^\\\']+|[^\\\'" >]+)))(?(2)\\2|)(?(1)\\1|)#i', 'html_metaRefresh', $input, 1);
119110
111+ // base - update base_url contained in href - remove <base> tag entirely
112+ //$str = preg_replace_callback('/<base[^>]*href=
120113
114+ // link - replace href with proxified
115+ // link rel="shortcut icon" - replace or remove
121116
122- # Process forms
123- $input = preg_replace_callback('#<form([^>]*)>(.*?)</form>#is', 'html_form', $input);
117+ // meta - only interested in http-equiv - replace url refresh
118+ // <meta http-equiv="refresh" content="5; url=http://example.com/">
119+ $ str = preg_replace_callback ('/content=([" \'])\d+\s*;\s*url=(.*?)\1/is ' , array ($ this , 'meta_refresh ' ), $ str );
124120
125- */
121+ return $ str ;
122+ }
123+
124+ // The <body> background attribute is not supported in HTML5. Use CSS instead.
125+ private function proxify_css ($ str ){
126+
127+ // The HTML5 standard does not require quotes around attribute values.
128+
129+ // if {1} is not there then youtube breaks for some reason
130+ $ str = preg_replace_callback ('@[^a-z]{1}url\s*\((?: \'|"|)(.*?)(?: \'|"|)\)@im ' , array ($ this , 'css_url ' ), $ str );
131+
132+ // https://developer.mozilla.org/en-US/docs/Web/CSS/@import
133+ // TODO: what about @import directives that are outside <style>?
134+ $ str = preg_replace_callback ('/@import ( \'|")(.*?)\1/i ' , array ($ this , 'css_import ' ), $ str );
135+
136+ return $ str ;
137+ }
126138
127139 public function onCompleted (ProxyEvent $ event ){
128140
129141 // to be used when proxifying all the relative links
130142 $ this ->base_url = $ event ['request ' ]->getUri ();
131143
132144 $ response = $ event ['response ' ];
133- $ str = $ response ->getContent ();
134-
135145 $ content_type = $ response ->headers ->get ('content-type ' );
136146
147+ $ str = $ response ->getContent ();
148+
137149 // DO NOT do any proxification on .js files
138150 if ($ content_type == 'text/javascript ' || $ content_type == 'application/javascript ' || $ content_type == 'application/x-javascript ' ){
139151 return ;
@@ -147,44 +159,24 @@ public function onCompleted(ProxyEvent $event){
147159 foreach ($ js_remove as $ pattern ){
148160 if (strpos ($ domain , $ pattern ) !== false ){
149161 $ str = Html::remove_scripts ($ str );
150- break ;
151162 }
152163 }
153164 }
154165
166+ // add html.no-js
167+
155168 // let's remove all frames?? does not protect against the frames created dynamically via javascript
156169 $ str = preg_replace ('@<iframe[^>]*>[^<]*< \\/iframe>@is ' , '' , $ str );
157170
158- // let's replace page titles with something custom
159- if (Config::get ('replace_title ' )){
160- $ str = preg_replace ('/<title[^>]*>(.*?)<\/title>/ims ' , '<title> ' .Config::get ('replace_title ' ).'</title> ' , $ str );
161- }
171+ $ str = $ this ->proxify_head ($ str );
172+ $ str = $ this ->proxify_css ($ str );
162173
163- /* css
164- if {1} is not there then youtube breaks for some reason
165- */
166- $ str = preg_replace_callback ('@[^a-z]{1}url\s*\((?: \'|"|)(.*?)(?: \'|"|)\)@im ' , array ($ this , 'css_url ' ), $ str );
167-
168- // https://developer.mozilla.org/en-US/docs/Web/CSS/@import
169- // TODO: what about @import directives that are outside <style>?
170- $ str = preg_replace_callback ('/@import ( \'|")(.*?)\1/i ' , array ($ this , 'css_import ' ), $ str );
171-
172- // html .*? just in case href is empty...
173- $ str = preg_replace_callback ('@href\s*=\s*([" \'])(.*?)\1@im ' , array ($ this , 'html_href ' ), $ str );
174+ // src= and href=
175+ $ str = preg_replace_callback ('@(?:src|href)\s*=\s*(["| \'])(.*?)\1@is ' , array ($ this , 'html_attr ' ), $ str );
174176
175-
176- /*
177-
178- src= can be empty - then what?
179-
180- */
181- $ str = preg_replace_callback ('@src\s*=\s*(["| \'])(.*?)\1@i ' , array ($ this , 'html_src ' ), $ str );
182-
183- // sometimes form action is empty - which means a postback to the current page
177+ // form
184178 $ str = preg_replace_callback ('@<form[^>]*action=([" \'])(.*?)\1[^>]*>@i ' , array ($ this , 'form_action ' ), $ str );
185179
186- //$str = str_replace('document.forms[0]', 'document.forms[1]', $str);
187-
188180 $ response ->setContent ($ str );
189181 }
190182
0 commit comments