6060 * @author Sebastien Deleuze
6161 * @author Soby Chacko
6262 * @author Thomas Vitale
63+ * @author liugddx
6364 */
6465public class BeanOutputConverter <T > implements StructuredOutputConverter <T > {
6566
@@ -76,12 +77,15 @@ public class BeanOutputConverter<T> implements StructuredOutputConverter<T> {
7677 /** Holds the generated JSON schema for the target type. */
7778 private String jsonSchema ;
7879
80+ /** The text cleaner used to preprocess LLM responses before parsing. */
81+ private final ResponseTextCleaner textCleaner ;
82+
7983 /**
8084 * Constructor to initialize with the target type's class.
8185 * @param clazz The target type's class.
8286 */
8387 public BeanOutputConverter (Class <T > clazz ) {
84- this (ParameterizedTypeReference . forType ( clazz ) );
88+ this (clazz , null , null );
8589 }
8690
8791 /**
@@ -91,15 +95,26 @@ public BeanOutputConverter(Class<T> clazz) {
9195 * @param objectMapper Custom object mapper for JSON operations. endings.
9296 */
9397 public BeanOutputConverter (Class <T > clazz , ObjectMapper objectMapper ) {
94- this (ParameterizedTypeReference .forType (clazz ), objectMapper );
98+ this (clazz , objectMapper , null );
99+ }
100+
101+ /**
102+ * Constructor to initialize with the target type's class, a custom object mapper, and
103+ * a custom text cleaner.
104+ * @param clazz The target type's class.
105+ * @param objectMapper Custom object mapper for JSON operations.
106+ * @param textCleaner Custom text cleaner for preprocessing responses.
107+ */
108+ public BeanOutputConverter (Class <T > clazz , ObjectMapper objectMapper , ResponseTextCleaner textCleaner ) {
109+ this (ParameterizedTypeReference .forType (clazz ), objectMapper , textCleaner );
95110 }
96111
97112 /**
98113 * Constructor to initialize with the target class type reference.
99114 * @param typeRef The target class type reference.
100115 */
101116 public BeanOutputConverter (ParameterizedTypeReference <T > typeRef ) {
102- this (typeRef . getType () , null );
117+ this (typeRef , null , null );
103118 }
104119
105120 /**
@@ -110,7 +125,19 @@ public BeanOutputConverter(ParameterizedTypeReference<T> typeRef) {
110125 * @param objectMapper Custom object mapper for JSON operations. endings.
111126 */
112127 public BeanOutputConverter (ParameterizedTypeReference <T > typeRef , ObjectMapper objectMapper ) {
113- this (typeRef .getType (), objectMapper );
128+ this (typeRef , objectMapper , null );
129+ }
130+
131+ /**
132+ * Constructor to initialize with the target class type reference, a custom object
133+ * mapper, and a custom text cleaner.
134+ * @param typeRef The target class type reference.
135+ * @param objectMapper Custom object mapper for JSON operations.
136+ * @param textCleaner Custom text cleaner for preprocessing responses.
137+ */
138+ public BeanOutputConverter (ParameterizedTypeReference <T > typeRef , ObjectMapper objectMapper ,
139+ ResponseTextCleaner textCleaner ) {
140+ this (typeRef .getType (), objectMapper , textCleaner );
114141 }
115142
116143 /**
@@ -119,14 +146,42 @@ public BeanOutputConverter(ParameterizedTypeReference<T> typeRef, ObjectMapper o
119146 * platform.
120147 * @param type The target class type.
121148 * @param objectMapper Custom object mapper for JSON operations. endings.
149+ * @param textCleaner Custom text cleaner for preprocessing responses.
122150 */
123- private BeanOutputConverter (Type type , ObjectMapper objectMapper ) {
151+ private BeanOutputConverter (Type type , ObjectMapper objectMapper , ResponseTextCleaner textCleaner ) {
124152 Objects .requireNonNull (type , "Type cannot be null;" );
125153 this .type = type ;
126154 this .objectMapper = objectMapper != null ? objectMapper : getObjectMapper ();
155+ this .textCleaner = textCleaner != null ? textCleaner : createDefaultTextCleaner ();
127156 generateSchema ();
128157 }
129158
159+ /**
160+ * Creates the default text cleaner that handles common response formats from various
161+ * AI models.
162+ * <p>
163+ * The default cleaner includes:
164+ * <ul>
165+ * <li>{@link ThinkingTagCleaner} - Removes thinking tags from models like Amazon Nova
166+ * and Qwen. For models that don't generate thinking tags, this has minimal
167+ * performance impact due to fast-path optimization.</li>
168+ * <li>{@link MarkdownCodeBlockCleaner} - Removes markdown code block formatting.</li>
169+ * <li>{@link WhitespaceCleaner} - Trims whitespace.</li>
170+ * </ul>
171+ * <p>
172+ * To customize the cleaning behavior, provide a custom {@link ResponseTextCleaner}
173+ * via the constructor.
174+ * @return a composite text cleaner with default cleaning strategies
175+ */
176+ private static ResponseTextCleaner createDefaultTextCleaner () {
177+ return CompositeResponseTextCleaner .builder ()
178+ .addCleaner (new WhitespaceCleaner ())
179+ .addCleaner (new ThinkingTagCleaner ())
180+ .addCleaner (new MarkdownCodeBlockCleaner ())
181+ .addCleaner (new WhitespaceCleaner ()) // Final trim after all cleanups
182+ .build ();
183+ }
184+
130185 /**
131186 * Generates the JSON schema for the target type.
132187 */
@@ -166,26 +221,9 @@ private void generateSchema() {
166221 @ Override
167222 public T convert (@ NonNull String text ) {
168223 try {
169- // Remove leading and trailing whitespace
170- text = text .trim ();
171-
172- // Check for and remove triple backticks and "json" identifier
173- if (text .startsWith ("```" ) && text .endsWith ("```" )) {
174- // Remove the first line if it contains "```json"
175- String [] lines = text .split ("\n " , 2 );
176- if (lines [0 ].trim ().equalsIgnoreCase ("```json" )) {
177- text = lines .length > 1 ? lines [1 ] : "" ;
178- }
179- else {
180- text = text .substring (3 ); // Remove leading ```
181- }
182-
183- // Remove trailing ```
184- text = text .substring (0 , text .length () - 3 );
185-
186- // Trim again to remove any potential whitespace
187- text = text .trim ();
188- }
224+ // Clean the text using the configured text cleaner
225+ text = this .textCleaner .clean (text );
226+
189227 return (T ) this .objectMapper .readValue (text , this .objectMapper .constructType (this .type ));
190228 }
191229 catch (JsonProcessingException e ) {
0 commit comments