1- /**
2- * @license
3- *
4- * MIT License
5- *
6- * Copyright (c) 2019 Richie Bendall and 2016 - 2019 The Node Fetch Team
7- *
8- * Permission is hereby granted, free of charge, to any person obtaining a copy
9- * of this software and associated documentation files (the 'Software'), to deal
10- * in the Software without restriction, including without limitation the rights
11- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12- * copies of the Software, and to permit persons to whom the Software is
13- * furnished to do so, subject to the following conditions:
14- *
15- * The above copyright notice and this permission notice shall be included in all
16- * copies or substantial portions of the Software.
17- *
18- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24- * SOFTWARE.
25- */
26-
271import getCharset from "./utils/get-charset"
282import { decode } from "iconv-lite"
293import { load as $ } from "cheerio"
@@ -36,40 +10,40 @@ import is from "@sindresorhus/is"
3610* @param content The content to convert.
3711* @param headers HTTP Headers provided with a request.
3812*/
39- export default function convertBody ( content : Buffer | string , headers ?: Headers ) : string {
40- // Try to extract content-type header
41- const contentType = ! is . nullOrUndefined ( headers ) ? headers . get ( "content-type" ) : null
42-
43- // Resulting charset
44- let charset : string
45-
46- // Convert to buffer
47- if ( is . string ( content ) ) content = Buffer . from ( content )
48-
49- // Header
50- if ( contentType ) charset = getCharset ( contentType )
51-
52- // No charset in content type, peek at response body for at most 1024 bytes
53- const res = content . slice ( 0 , 1024 ) . toString ( )
54-
55- // HTML5, HTML4 and XML
56- if ( ! charset && res ) {
57- charset = getCharset (
58- $ ( res ) ( "meta[charset]" ) . attr ( "charset" ) || // HTML5
59- $ ( res ) ( "meta[http-equiv][content]" ) . attr ( "content" ) || // HTML4
60- $ ( res . replace ( / < \? ( .* ) \? > / im, "<$1>" ) , { xmlMode : true } ) . root ( ) . find ( "xml" ) . attr ( "encoding" ) , // XML
61- )
62- }
63-
64- // Prevent decode issues when sites use incorrect encoding
65- // ref: https://hsivonen.fi/encoding-menu/
66- if ( charset && [ "gb2312" , "gbk" ] . includes ( charset . toLowerCase ( ) ) ) charset = "gb18030"
67-
68- // Turn raw buffers into a single utf-8 buffer
69- return decode (
70- content ,
71- charset || "utf-8" ,
72- )
13+ function convertBody ( content : Buffer | string , headers ?: Headers ) : string {
14+ // Try to extract content-type header
15+ const contentType = ! is . nullOrUndefined ( headers ) ? headers . get ( "content-type" ) : null
16+
17+ // Resulting charset
18+ let charset : string
19+
20+ // Convert to buffer
21+ if ( is . string ( content ) ) content = Buffer . from ( content )
22+
23+ // Header
24+ if ( contentType ) charset = getCharset ( contentType )
25+
26+ // No charset in content type, peek at response body for at most 1024 bytes
27+ const res = content . slice ( 0 , 1024 ) . toString ( )
28+
29+ // HTML5, HTML4 and XML
30+ if ( ! charset && res ) {
31+ charset = getCharset (
32+ $ ( res ) ( "meta[charset]" ) . attr ( "charset" ) || // HTML5
33+ $ ( res ) ( "meta[http-equiv][content]" ) . attr ( "content" ) || // HTML4
34+ $ ( res . replace ( / < \? ( .* ) \? > / im, "<$1>" ) , { xmlMode : true } ) . root ( ) . find ( "xml" ) . attr ( "encoding" ) , // XML
35+ )
36+ }
37+
38+ // Prevent decode issues when sites use incorrect encoding
39+ // ref: https://hsivonen.fi/encoding-menu/
40+ if ( charset && [ "gb2312" , "gbk" ] . includes ( charset . toLowerCase ( ) ) ) charset = "gb18030"
41+
42+ // Turn raw buffers into a single utf-8 buffer
43+ return decode (
44+ content ,
45+ charset || "utf-8" ,
46+ )
7347}
7448
75- module . exports = convertBody
49+ export = convertBody
0 commit comments