11use std:: borrow:: Cow ;
22use std:: fmt:: Debug ;
3+ use std:: io:: { self , Write } ;
34
45use pyo3:: exceptions:: PyTypeError ;
56use pyo3:: prelude:: * ;
@@ -9,7 +10,7 @@ use pyo3::{intern, PyTraverseError, PyVisit};
910
1011use enum_dispatch:: enum_dispatch;
1112use serde:: Serialize ;
12- use serde_json:: ser:: PrettyFormatter ;
13+ use serde_json:: ser:: { Formatter , PrettyFormatter } ;
1314
1415use crate :: build_tools:: py_schema_err;
1516use crate :: build_tools:: py_schema_error_type;
@@ -432,6 +433,87 @@ impl Serialize for PydanticSerializer<'_> {
432433 }
433434}
434435
436+ struct EscapeNonAsciiFormatter ;
437+
438+ impl Formatter for EscapeNonAsciiFormatter {
439+ fn write_string_fragment < W : ?Sized + Write > ( & mut self , writer : & mut W , fragment : & str ) -> io:: Result < ( ) > {
440+ let mut input = fragment;
441+
442+ while let Some ( ( idx, non_ascii_char) ) = input. chars ( ) . enumerate ( ) . find ( |( _, c) | !c. is_ascii ( ) ) {
443+ if idx > 0 {
444+ // write all ascii characters before the non-ascii one
445+ let ascii_run = & input[ ..idx] ;
446+ writer. write_all ( ascii_run. as_bytes ( ) ) . unwrap ( ) ;
447+ }
448+
449+ let codepoint = non_ascii_char as u32 ;
450+ if codepoint < 0xFFFF {
451+ // write basic codepoint as single escape
452+ write ! ( writer, "\\ u{codepoint:04x}" ) . unwrap ( ) ;
453+ } else {
454+ // encode extended plane character as utf16 pair
455+ for escape in non_ascii_char. encode_utf16 ( & mut [ 0 ; 2 ] ) {
456+ write ! ( writer, "\\ u{escape:04x}" ) . unwrap ( ) ;
457+ }
458+ }
459+
460+ input = & input[ ( idx + non_ascii_char. len_utf8 ( ) ) ..] ;
461+ }
462+
463+ // write any ascii trailer
464+ writer. write_all ( input. as_bytes ( ) ) ?;
465+ Ok ( ( ) )
466+ }
467+ }
468+
469+ struct EscapeNonAsciiPrettyFormatter < ' a > {
470+ pretty : PrettyFormatter < ' a > ,
471+ escape_non_ascii : EscapeNonAsciiFormatter ,
472+ }
473+
474+ impl < ' a > EscapeNonAsciiPrettyFormatter < ' a > {
475+ pub fn with_indent ( indent : & ' a [ u8 ] ) -> Self {
476+ Self {
477+ pretty : PrettyFormatter :: with_indent ( indent) ,
478+ escape_non_ascii : EscapeNonAsciiFormatter ,
479+ }
480+ }
481+ }
482+
483+ macro_rules! defer {
484+ ( $formatter: ident, $fun: ident) => {
485+ fn $fun<W >( & mut self , writer: & mut W ) -> io:: Result <( ) >
486+ where
487+ W : ?Sized + io:: Write ,
488+ {
489+ self . $formatter. $fun( writer)
490+ }
491+ } ;
492+ ( $formatter: ident, $fun: ident, $val: ty) => {
493+ fn $fun<W >( & mut self , writer: & mut W , val: $val) -> io:: Result <( ) >
494+ where
495+ W : ?Sized + io:: Write ,
496+ {
497+ self . $formatter. $fun( writer, val)
498+ }
499+ } ;
500+ }
501+
502+ #[ allow( clippy:: needless_lifetimes) ]
503+ impl Formatter for EscapeNonAsciiPrettyFormatter < ' _ > {
504+ defer ! ( escape_non_ascii, write_string_fragment, & str ) ;
505+ defer ! ( pretty, begin_array) ;
506+ defer ! ( pretty, end_array) ;
507+ defer ! ( pretty, begin_array_value, bool ) ;
508+ defer ! ( pretty, end_array_value) ;
509+ defer ! ( pretty, begin_object) ;
510+ defer ! ( pretty, end_object) ;
511+ defer ! ( pretty, begin_object_key, bool ) ;
512+ defer ! ( pretty, end_object_key) ;
513+ defer ! ( pretty, begin_object_value) ;
514+ defer ! ( pretty, end_object_value) ;
515+ }
516+
435517#[ allow( clippy:: too_many_arguments) ]
436518pub ( crate ) fn to_json_bytes (
437519 value : & Bound < ' _ , PyAny > ,
@@ -440,25 +522,40 @@ pub(crate) fn to_json_bytes(
440522 exclude : Option < & Bound < ' _ , PyAny > > ,
441523 extra : & Extra ,
442524 indent : Option < usize > ,
525+ ensure_ascii : bool ,
443526 expected_json_size : usize ,
444527) -> PyResult < Vec < u8 > > {
445528 let serializer = PydanticSerializer :: new ( value, serializer, include, exclude, extra) ;
446529
447530 let writer: Vec < u8 > = Vec :: with_capacity ( expected_json_size) ;
448- let bytes = match indent {
449- Some ( indent) => {
531+
532+ let bytes = match ( indent, ensure_ascii) {
533+ ( Some ( indent) , true ) => {
534+ let indent = vec ! [ b' ' ; indent] ;
535+ let formatter = EscapeNonAsciiPrettyFormatter :: with_indent ( & indent) ;
536+ let mut ser = PythonSerializer :: with_formatter ( writer, formatter) ;
537+ serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
538+ ser. into_inner ( )
539+ }
540+ ( Some ( indent) , false ) => {
450541 let indent = vec ! [ b' ' ; indent] ;
451542 let formatter = PrettyFormatter :: with_indent ( & indent) ;
452543 let mut ser = PythonSerializer :: with_formatter ( writer, formatter) ;
453544 serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
454545 ser. into_inner ( )
455546 }
456- None => {
547+ ( None , true ) => {
548+ let mut ser = PythonSerializer :: with_formatter ( writer, EscapeNonAsciiFormatter ) ;
549+ serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
550+ ser. into_inner ( )
551+ }
552+ ( None , false ) => {
457553 let mut ser = PythonSerializer :: new ( writer) ;
458554 serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
459555 ser. into_inner ( )
460556 }
461557 } ;
558+
462559 Ok ( bytes)
463560}
464561
0 commit comments