@@ -72,10 +72,12 @@ def print_code_point_at(self, location: int) -> str:
7272 if "\x20 " <= char <= "\x7E " :
7373 return "'\" '" if char == '"' else f"'{ char } '"
7474 # Unicode code point
75- point = (
76- decode_surrogate_pair (ord (char ), ord (body [location + 1 ]))
75+ point = ord (
76+ body [location : location + 2 ]
77+ .encode ("utf-16" , "surrogatepass" )
78+ .decode ("utf-16" )
7779 if is_supplementary_code_point (body , location )
78- else ord ( char )
80+ else char
7981 )
8082 return f"U+{ point :04X} "
8183
@@ -351,7 +353,10 @@ def read_escaped_unicode_fixed_width(self, position: int) -> EscapeSequence:
351353 trailing_code = read_16_bit_hex_code (body , position + 8 )
352354 if 0xDC00 <= trailing_code <= 0xDFFF :
353355 return EscapeSequence (
354- chr (decode_surrogate_pair (code , trailing_code )), 12
356+ (chr (code ) + chr (trailing_code ))
357+ .encode ("utf-16" , "surrogatepass" )
358+ .decode ("utf-16" ),
359+ 12 ,
355360 )
356361
357362 raise GraphQLSyntaxError (
@@ -546,11 +551,10 @@ def is_supplementary_code_point(body: str, location: int) -> bool:
546551 The GraphQL specification defines source text as a sequence of unicode scalar
547552 values (which Unicode defines to exclude surrogate code points).
548553 """
549- return (
550- "\ud800 " <= body [location ] <= "\udbff "
551- and "\udc00 " <= body [location + 1 ] <= "\udfff "
552- )
553-
554-
555- def decode_surrogate_pair (leading : int , trailing : int ) -> int :
556- return 0x10000 + (((leading & 0x03FF ) << 10 ) | (trailing & 0x03FF ))
554+ try :
555+ return (
556+ "\ud800 " <= body [location ] <= "\udbff "
557+ and "\udc00 " <= body [location + 1 ] <= "\udfff "
558+ )
559+ except IndexError :
560+ return False
0 commit comments