1+ // #region File Annotation
2+ //
3+ // Author:Zhiqiang Li
4+ //
5+ // FileName:FileEncoding.cs
6+ //
7+ // Project:CnBlogPublishTool
8+ //
9+ // CreateDate:2018/05/20
10+ //
11+ // Note: The reference to this document code must not delete this note, and indicate the source!
12+ //
13+ // #endregion
14+
15+ using System ;
16+ using System . IO ;
17+ using System . Text ;
18+
19+ namespace CnBlogPublishTool . Util
20+ {
21+ /// <summary>
22+ /// 获取文件的编码格式
23+ /// </summary>
24+ public class EncodingType
25+ {
26+ /// <summary>
27+ /// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
28+ /// </summary>
29+ /// <param name=“FILE_NAME“>文件路径</param>
30+ /// <returns>文件的编码类型</returns>
31+ public static System . Text . Encoding GetType ( string FILE_NAME )
32+ {
33+ FileStream fs = new FileStream ( FILE_NAME , FileMode . Open , FileAccess . Read ) ;
34+ Encoding r = GetType ( fs ) ;
35+ fs . Close ( ) ;
36+ return r ;
37+ }
38+
39+ /// <summary>
40+ /// 通过给定的文件流,判断文件的编码类型
41+ /// </summary>
42+ /// <param name=“fs“>文件流</param>
43+ /// <returns>文件的编码类型</returns>
44+ public static System . Text . Encoding GetType ( FileStream fs )
45+ {
46+ byte [ ] Unicode = new byte [ ] { 0xFF , 0xFE , 0x41 } ;
47+ byte [ ] UnicodeBIG = new byte [ ] { 0xFE , 0xFF , 0x00 } ;
48+ byte [ ] UTF8 = new byte [ ] { 0xEF , 0xBB , 0xBF } ; //带BOM
49+ Encoding reVal = Encoding . Default ;
50+
51+ BinaryReader r = new BinaryReader ( fs , System . Text . Encoding . Default ) ;
52+ int i ;
53+ int . TryParse ( fs . Length . ToString ( ) , out i ) ;
54+ byte [ ] ss = r . ReadBytes ( i ) ;
55+ if ( IsUTF8Bytes ( ss ) || ( ss [ 0 ] == 0xEF && ss [ 1 ] == 0xBB && ss [ 2 ] == 0xBF ) )
56+ {
57+ reVal = Encoding . UTF8 ;
58+ }
59+ else if ( ss [ 0 ] == 0xFE && ss [ 1 ] == 0xFF && ss [ 2 ] == 0x00 )
60+ {
61+ reVal = Encoding . BigEndianUnicode ;
62+ }
63+ else if ( ss [ 0 ] == 0xFF && ss [ 1 ] == 0xFE && ss [ 2 ] == 0x41 )
64+ {
65+ reVal = Encoding . Unicode ;
66+ }
67+ r . Close ( ) ;
68+ return reVal ;
69+
70+ }
71+
72+ /// <summary>
73+ /// 判断是否是不带 BOM 的 UTF8 格式
74+ /// </summary>
75+ /// <param name=“data“></param>
76+ /// <returns></returns>
77+ private static bool IsUTF8Bytes ( byte [ ] data )
78+ {
79+ int charByteCounter = 1 ; //计算当前正分析的字符应还有的字节数
80+ byte curByte ; //当前分析的字节.
81+ for ( int i = 0 ; i < data . Length ; i ++ )
82+ {
83+ curByte = data [ i ] ;
84+ if ( charByteCounter == 1 )
85+ {
86+ if ( curByte >= 0x80 )
87+ {
88+ //判断当前
89+ while ( ( ( curByte <<= 1 ) & 0x80 ) != 0 )
90+ {
91+ charByteCounter ++ ;
92+ }
93+ //标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
94+ if ( charByteCounter == 1 || charByteCounter > 6 )
95+ {
96+ return false ;
97+ }
98+ }
99+ }
100+ else
101+ {
102+ //若是UTF-8 此时第一位必须为1
103+ if ( ( curByte & 0xC0 ) != 0x80 )
104+ {
105+ return false ;
106+ }
107+ charByteCounter -- ;
108+ }
109+ }
110+ if ( charByteCounter > 1 )
111+ {
112+ throw new Exception ( "非预期的byte格式" ) ;
113+ }
114+ return true ;
115+ }
116+
117+ }
118+ }
0 commit comments