@@ -29,11 +29,16 @@ abstract class AbstractDumper implements DataDumperInterface, DumperInterface
2929 protected $ decimalPoint ; // This is locale dependent
3030 protected $ indentPad = ' ' ;
3131
32+ private $ charset ;
33+ private $ charsetConverter ;
34+
3235 /**
33- * @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput.
36+ * @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput.
37+ * @param string $charset The default character encoding to use for non-UTF8 strings.
3438 */
35- public function __construct ($ output = null )
39+ public function __construct ($ output = null , $ charset = null )
3640 {
41+ $ this ->setCharset ($ charset ?: ini_get ('php.output_encoding ' ) ?: ini_get ('default_charset ' ) ?: 'UTF-8 ' );
3742 $ this ->decimalPoint = (string ) 0.5 ;
3843 $ this ->decimalPoint = $ this ->decimalPoint [1 ];
3944 $ this ->setOutput ($ output ?: static ::$ defaultOutput );
@@ -67,6 +72,43 @@ public function setOutput($output)
6772 return $ prev ;
6873 }
6974
75+ /**
76+ * Sets the default character encoding to use for non-UTF8 strings.
77+ *
78+ * @param string $charset The default character encoding to use for non-UTF8 strings.
79+ *
80+ * @return string The previous charset.
81+ */
82+ public function setCharset ($ charset )
83+ {
84+ $ prev = $ this ->charset ;
85+ $ this ->charsetConverter = 'fallback ' ;
86+
87+ $ charset = strtoupper ($ charset );
88+ $ charset = null === $ charset || 'UTF-8 ' === $ charset || 'UTF8 ' === $ charset ? 'CP1252 ' : $ charset ;
89+
90+ $ supported = true ;
91+ set_error_handler (function () use (&$ supported ) {$ supported = false ;});
92+
93+ if (function_exists ('mb_encoding_aliases ' ) && mb_encoding_aliases ($ charset )) {
94+ $ this ->charset = $ charset ;
95+ $ this ->charsetConverter = 'mbstring ' ;
96+ } elseif (function_exists ('iconv ' )) {
97+ $ supported = true ;
98+ iconv ($ charset , 'UTF-8 ' , '' );
99+ if ($ supported ) {
100+ $ this ->charset = $ charset ;
101+ $ this ->charsetConverter = 'iconv ' ;
102+ }
103+ }
104+ if ('fallback ' === $ this ->charsetConverter ) {
105+ $ this ->charset = 'ISO-8859-1 ' ;
106+ }
107+ restore_error_handler ();
108+
109+ return $ prev ;
110+ }
111+
70112 /**
71113 * Sets the indentation pad string.
72114 *
@@ -131,4 +173,50 @@ protected function echoLine($line, $depth, $indentPad)
131173 fwrite ($ this ->outputStream , str_repeat ($ indentPad , $ depth ).$ line ."\n" );
132174 }
133175 }
176+
177+ /**
178+ * Converts a non-UTF-8 string to UTF-8.
179+ *
180+ * @param string $s The non-UTF-8 string to convert.
181+ *
182+ * @return string The string converted to UTF-8.
183+ */
184+ protected function utf8Encode ($ s )
185+ {
186+ if ('mbstring ' === $ this ->charsetConverter ) {
187+ return mb_convert_encoding ($ s , 'UTF-8 ' , mb_check_encoding ($ s , $ this ->charset ) ? $ this ->charset : '8bit ' );
188+ }
189+ if ('iconv ' === $ this ->charsetConverter ) {
190+ $ valid = true ;
191+ set_error_handler (function () use (&$ valid ) {$ valid = false ;});
192+ $ c = iconv ($ this ->charset , 'UTF-8 ' , $ s );
193+ restore_error_handler ();
194+ if ($ valid ) {
195+ return $ c ;
196+ }
197+ }
198+
199+ $ s .= $ s ;
200+ $ len = strlen ($ s );
201+
202+ for ($ i = $ len >> 1 , $ j = 0 ; $ i < $ len ; ++$ i , ++$ j ) {
203+ switch (true ) {
204+ case $ s [$ i ] < "\x80" :
205+ $ s [$ j ] = $ s [$ i ];
206+ break ;
207+
208+ case $ s [$ i ] < "\xC0" :
209+ $ s [$ j ] = "\xC2" ;
210+ $ s [++$ j ] = $ s [$ i ];
211+ break ;
212+
213+ default :
214+ $ s [$ j ] = "\xC3" ;
215+ $ s [++$ j ] = chr (ord ($ s [$ i ]) - 64 );
216+ break ;
217+ }
218+ }
219+
220+ return substr ($ s , 0 , $ j );
221+ }
134222}
0 commit comments