|
2846 | 2846 | 0x9F: "\u0178", |
2847 | 2847 | } |
2848 | 2848 |
|
2849 | | -encodings = { |
2850 | | - '437': 'cp437', |
2851 | | - '850': 'cp850', |
2852 | | - '852': 'cp852', |
2853 | | - '855': 'cp855', |
2854 | | - '857': 'cp857', |
2855 | | - '860': 'cp860', |
2856 | | - '861': 'cp861', |
2857 | | - '862': 'cp862', |
2858 | | - '863': 'cp863', |
2859 | | - '865': 'cp865', |
2860 | | - '866': 'cp866', |
2861 | | - '869': 'cp869', |
2862 | | - 'ansix341968': 'ascii', |
2863 | | - 'ansix341986': 'ascii', |
2864 | | - 'arabic': 'iso8859-6', |
2865 | | - 'ascii': 'ascii', |
2866 | | - 'asmo708': 'iso8859-6', |
2867 | | - 'big5': 'big5', |
2868 | | - 'big5hkscs': 'big5hkscs', |
2869 | | - 'chinese': 'gbk', |
2870 | | - 'cp037': 'cp037', |
2871 | | - 'cp1026': 'cp1026', |
2872 | | - 'cp154': 'ptcp154', |
2873 | | - 'cp367': 'ascii', |
2874 | | - 'cp424': 'cp424', |
2875 | | - 'cp437': 'cp437', |
2876 | | - 'cp500': 'cp500', |
2877 | | - 'cp775': 'cp775', |
2878 | | - 'cp819': 'windows-1252', |
2879 | | - 'cp850': 'cp850', |
2880 | | - 'cp852': 'cp852', |
2881 | | - 'cp855': 'cp855', |
2882 | | - 'cp857': 'cp857', |
2883 | | - 'cp860': 'cp860', |
2884 | | - 'cp861': 'cp861', |
2885 | | - 'cp862': 'cp862', |
2886 | | - 'cp863': 'cp863', |
2887 | | - 'cp864': 'cp864', |
2888 | | - 'cp865': 'cp865', |
2889 | | - 'cp866': 'cp866', |
2890 | | - 'cp869': 'cp869', |
2891 | | - 'cp936': 'gbk', |
2892 | | - 'cpgr': 'cp869', |
2893 | | - 'cpis': 'cp861', |
2894 | | - 'csascii': 'ascii', |
2895 | | - 'csbig5': 'big5', |
2896 | | - 'cseuckr': 'cp949', |
2897 | | - 'cseucpkdfmtjapanese': 'euc_jp', |
2898 | | - 'csgb2312': 'gbk', |
2899 | | - 'cshproman8': 'hp-roman8', |
2900 | | - 'csibm037': 'cp037', |
2901 | | - 'csibm1026': 'cp1026', |
2902 | | - 'csibm424': 'cp424', |
2903 | | - 'csibm500': 'cp500', |
2904 | | - 'csibm855': 'cp855', |
2905 | | - 'csibm857': 'cp857', |
2906 | | - 'csibm860': 'cp860', |
2907 | | - 'csibm861': 'cp861', |
2908 | | - 'csibm863': 'cp863', |
2909 | | - 'csibm864': 'cp864', |
2910 | | - 'csibm865': 'cp865', |
2911 | | - 'csibm866': 'cp866', |
2912 | | - 'csibm869': 'cp869', |
2913 | | - 'csiso2022jp': 'iso2022_jp', |
2914 | | - 'csiso2022jp2': 'iso2022_jp_2', |
2915 | | - 'csiso2022kr': 'iso2022_kr', |
2916 | | - 'csiso58gb231280': 'gbk', |
2917 | | - 'csisolatin1': 'windows-1252', |
2918 | | - 'csisolatin2': 'iso8859-2', |
2919 | | - 'csisolatin3': 'iso8859-3', |
2920 | | - 'csisolatin4': 'iso8859-4', |
2921 | | - 'csisolatin5': 'windows-1254', |
2922 | | - 'csisolatin6': 'iso8859-10', |
2923 | | - 'csisolatinarabic': 'iso8859-6', |
2924 | | - 'csisolatincyrillic': 'iso8859-5', |
2925 | | - 'csisolatingreek': 'iso8859-7', |
2926 | | - 'csisolatinhebrew': 'iso8859-8', |
2927 | | - 'cskoi8r': 'koi8-r', |
2928 | | - 'csksc56011987': 'cp949', |
2929 | | - 'cspc775baltic': 'cp775', |
2930 | | - 'cspc850multilingual': 'cp850', |
2931 | | - 'cspc862latinhebrew': 'cp862', |
2932 | | - 'cspc8codepage437': 'cp437', |
2933 | | - 'cspcp852': 'cp852', |
2934 | | - 'csptcp154': 'ptcp154', |
2935 | | - 'csshiftjis': 'shift_jis', |
2936 | | - 'csunicode11utf7': 'utf-7', |
2937 | | - 'cyrillic': 'iso8859-5', |
2938 | | - 'cyrillicasian': 'ptcp154', |
2939 | | - 'ebcdiccpbe': 'cp500', |
2940 | | - 'ebcdiccpca': 'cp037', |
2941 | | - 'ebcdiccpch': 'cp500', |
2942 | | - 'ebcdiccphe': 'cp424', |
2943 | | - 'ebcdiccpnl': 'cp037', |
2944 | | - 'ebcdiccpus': 'cp037', |
2945 | | - 'ebcdiccpwt': 'cp037', |
2946 | | - 'ecma114': 'iso8859-6', |
2947 | | - 'ecma118': 'iso8859-7', |
2948 | | - 'elot928': 'iso8859-7', |
2949 | | - 'eucjp': 'euc_jp', |
2950 | | - 'euckr': 'cp949', |
2951 | | - 'extendedunixcodepackedformatforjapanese': 'euc_jp', |
2952 | | - 'gb18030': 'gb18030', |
2953 | | - 'gb2312': 'gbk', |
2954 | | - 'gb231280': 'gbk', |
2955 | | - 'gbk': 'gbk', |
2956 | | - 'greek': 'iso8859-7', |
2957 | | - 'greek8': 'iso8859-7', |
2958 | | - 'hebrew': 'iso8859-8', |
2959 | | - 'hproman8': 'hp-roman8', |
2960 | | - 'hzgb2312': 'hz', |
2961 | | - 'ibm037': 'cp037', |
2962 | | - 'ibm1026': 'cp1026', |
2963 | | - 'ibm367': 'ascii', |
2964 | | - 'ibm424': 'cp424', |
2965 | | - 'ibm437': 'cp437', |
2966 | | - 'ibm500': 'cp500', |
2967 | | - 'ibm775': 'cp775', |
2968 | | - 'ibm819': 'windows-1252', |
2969 | | - 'ibm850': 'cp850', |
2970 | | - 'ibm852': 'cp852', |
2971 | | - 'ibm855': 'cp855', |
2972 | | - 'ibm857': 'cp857', |
2973 | | - 'ibm860': 'cp860', |
2974 | | - 'ibm861': 'cp861', |
2975 | | - 'ibm862': 'cp862', |
2976 | | - 'ibm863': 'cp863', |
2977 | | - 'ibm864': 'cp864', |
2978 | | - 'ibm865': 'cp865', |
2979 | | - 'ibm866': 'cp866', |
2980 | | - 'ibm869': 'cp869', |
2981 | | - 'iso2022jp': 'iso2022_jp', |
2982 | | - 'iso2022jp2': 'iso2022_jp_2', |
2983 | | - 'iso2022kr': 'iso2022_kr', |
2984 | | - 'iso646irv1991': 'ascii', |
2985 | | - 'iso646us': 'ascii', |
2986 | | - 'iso88591': 'windows-1252', |
2987 | | - 'iso885910': 'iso8859-10', |
2988 | | - 'iso8859101992': 'iso8859-10', |
2989 | | - 'iso885911987': 'windows-1252', |
2990 | | - 'iso885913': 'iso8859-13', |
2991 | | - 'iso885914': 'iso8859-14', |
2992 | | - 'iso8859141998': 'iso8859-14', |
2993 | | - 'iso885915': 'iso8859-15', |
2994 | | - 'iso885916': 'iso8859-16', |
2995 | | - 'iso8859162001': 'iso8859-16', |
2996 | | - 'iso88592': 'iso8859-2', |
2997 | | - 'iso885921987': 'iso8859-2', |
2998 | | - 'iso88593': 'iso8859-3', |
2999 | | - 'iso885931988': 'iso8859-3', |
3000 | | - 'iso88594': 'iso8859-4', |
3001 | | - 'iso885941988': 'iso8859-4', |
3002 | | - 'iso88595': 'iso8859-5', |
3003 | | - 'iso885951988': 'iso8859-5', |
3004 | | - 'iso88596': 'iso8859-6', |
3005 | | - 'iso885961987': 'iso8859-6', |
3006 | | - 'iso88597': 'iso8859-7', |
3007 | | - 'iso885971987': 'iso8859-7', |
3008 | | - 'iso88598': 'iso8859-8', |
3009 | | - 'iso885981988': 'iso8859-8', |
3010 | | - 'iso88599': 'windows-1254', |
3011 | | - 'iso885991989': 'windows-1254', |
3012 | | - 'isoceltic': 'iso8859-14', |
3013 | | - 'isoir100': 'windows-1252', |
3014 | | - 'isoir101': 'iso8859-2', |
3015 | | - 'isoir109': 'iso8859-3', |
3016 | | - 'isoir110': 'iso8859-4', |
3017 | | - 'isoir126': 'iso8859-7', |
3018 | | - 'isoir127': 'iso8859-6', |
3019 | | - 'isoir138': 'iso8859-8', |
3020 | | - 'isoir144': 'iso8859-5', |
3021 | | - 'isoir148': 'windows-1254', |
3022 | | - 'isoir149': 'cp949', |
3023 | | - 'isoir157': 'iso8859-10', |
3024 | | - 'isoir199': 'iso8859-14', |
3025 | | - 'isoir226': 'iso8859-16', |
3026 | | - 'isoir58': 'gbk', |
3027 | | - 'isoir6': 'ascii', |
3028 | | - 'koi8r': 'koi8-r', |
3029 | | - 'koi8u': 'koi8-u', |
3030 | | - 'korean': 'cp949', |
3031 | | - 'ksc5601': 'cp949', |
3032 | | - 'ksc56011987': 'cp949', |
3033 | | - 'ksc56011989': 'cp949', |
3034 | | - 'l1': 'windows-1252', |
3035 | | - 'l10': 'iso8859-16', |
3036 | | - 'l2': 'iso8859-2', |
3037 | | - 'l3': 'iso8859-3', |
3038 | | - 'l4': 'iso8859-4', |
3039 | | - 'l5': 'windows-1254', |
3040 | | - 'l6': 'iso8859-10', |
3041 | | - 'l8': 'iso8859-14', |
3042 | | - 'latin1': 'windows-1252', |
3043 | | - 'latin10': 'iso8859-16', |
3044 | | - 'latin2': 'iso8859-2', |
3045 | | - 'latin3': 'iso8859-3', |
3046 | | - 'latin4': 'iso8859-4', |
3047 | | - 'latin5': 'windows-1254', |
3048 | | - 'latin6': 'iso8859-10', |
3049 | | - 'latin8': 'iso8859-14', |
3050 | | - 'latin9': 'iso8859-15', |
3051 | | - 'ms936': 'gbk', |
3052 | | - 'mskanji': 'shift_jis', |
3053 | | - 'pt154': 'ptcp154', |
3054 | | - 'ptcp154': 'ptcp154', |
3055 | | - 'r8': 'hp-roman8', |
3056 | | - 'roman8': 'hp-roman8', |
3057 | | - 'shiftjis': 'shift_jis', |
3058 | | - 'tis620': 'cp874', |
3059 | | - 'unicode11utf7': 'utf-7', |
3060 | | - 'us': 'ascii', |
3061 | | - 'usascii': 'ascii', |
3062 | | - 'utf16': 'utf-16', |
3063 | | - 'utf16be': 'utf-16-be', |
3064 | | - 'utf16le': 'utf-16-le', |
3065 | | - 'utf8': 'utf-8', |
3066 | | - 'windows1250': 'cp1250', |
3067 | | - 'windows1251': 'cp1251', |
3068 | | - 'windows1252': 'cp1252', |
3069 | | - 'windows1253': 'cp1253', |
3070 | | - 'windows1254': 'cp1254', |
3071 | | - 'windows1255': 'cp1255', |
3072 | | - 'windows1256': 'cp1256', |
3073 | | - 'windows1257': 'cp1257', |
3074 | | - 'windows1258': 'cp1258', |
3075 | | - 'windows936': 'gbk', |
3076 | | - 'x-x-big5': 'big5'} |
3077 | | - |
3078 | 2849 | tokenTypes = { |
3079 | 2850 | "Doctype": 0, |
3080 | 2851 | "Characters": 1, |
|
0 commit comments