Skip to content

Commit d1aa8d4

Browse files
liugddxilayaperumalg
authored andcommitted
feat: add support for removing thinking tags from input text in BeanOutputConverter
- enhance BeanOutputConverter with customizable text cleaning capabilities - Update @SInCE to 1.1.0 as suggested by nicolaskrier - Add fast-path optimization in ThinkingTagCleaner for non-thinking models - Enhance documentation explaining why ThinkingTagCleaner is safe to use by default - Clarify performance characteristics in JavaDoc - Addresses review comments from nicolaskrier in PR #4667 - Improve documentation for ThinkingTagCleaner and BeanOutputConverter Signed-off-by: liugddx <liugddx@gmail.com>
1 parent 9b0eaf2 commit d1aa8d4

File tree

9 files changed

+880
-25
lines changed

9 files changed

+880
-25
lines changed

spring-ai-model/src/main/java/org/springframework/ai/converter/BeanOutputConverter.java

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
* @author Sebastien Deleuze
6161
* @author Soby Chacko
6262
* @author Thomas Vitale
63+
* @author liugddx
6364
*/
6465
public class BeanOutputConverter<T> implements StructuredOutputConverter<T> {
6566

@@ -76,12 +77,15 @@ public class BeanOutputConverter<T> implements StructuredOutputConverter<T> {
7677
/** Holds the generated JSON schema for the target type. */
7778
private String jsonSchema;
7879

80+
/** The text cleaner used to preprocess LLM responses before parsing. */
81+
private final ResponseTextCleaner textCleaner;
82+
7983
/**
8084
* Constructor to initialize with the target type's class.
8185
* @param clazz The target type's class.
8286
*/
8387
public BeanOutputConverter(Class<T> clazz) {
84-
this(ParameterizedTypeReference.forType(clazz));
88+
this(clazz, null, null);
8589
}
8690

8791
/**
@@ -91,15 +95,26 @@ public BeanOutputConverter(Class<T> clazz) {
9195
* @param objectMapper Custom object mapper for JSON operations. endings.
9296
*/
9397
public BeanOutputConverter(Class<T> clazz, ObjectMapper objectMapper) {
94-
this(ParameterizedTypeReference.forType(clazz), objectMapper);
98+
this(clazz, objectMapper, null);
99+
}
100+
101+
/**
102+
* Constructor to initialize with the target type's class, a custom object mapper, and
103+
* a custom text cleaner.
104+
* @param clazz The target type's class.
105+
* @param objectMapper Custom object mapper for JSON operations.
106+
* @param textCleaner Custom text cleaner for preprocessing responses.
107+
*/
108+
public BeanOutputConverter(Class<T> clazz, ObjectMapper objectMapper, ResponseTextCleaner textCleaner) {
109+
this(ParameterizedTypeReference.forType(clazz), objectMapper, textCleaner);
95110
}
96111

97112
/**
98113
* Constructor to initialize with the target class type reference.
99114
* @param typeRef The target class type reference.
100115
*/
101116
public BeanOutputConverter(ParameterizedTypeReference<T> typeRef) {
102-
this(typeRef.getType(), null);
117+
this(typeRef, null, null);
103118
}
104119

105120
/**
@@ -110,7 +125,19 @@ public BeanOutputConverter(ParameterizedTypeReference<T> typeRef) {
110125
* @param objectMapper Custom object mapper for JSON operations. endings.
111126
*/
112127
public BeanOutputConverter(ParameterizedTypeReference<T> typeRef, ObjectMapper objectMapper) {
113-
this(typeRef.getType(), objectMapper);
128+
this(typeRef, objectMapper, null);
129+
}
130+
131+
/**
132+
* Constructor to initialize with the target class type reference, a custom object
133+
* mapper, and a custom text cleaner.
134+
* @param typeRef The target class type reference.
135+
* @param objectMapper Custom object mapper for JSON operations.
136+
* @param textCleaner Custom text cleaner for preprocessing responses.
137+
*/
138+
public BeanOutputConverter(ParameterizedTypeReference<T> typeRef, ObjectMapper objectMapper,
139+
ResponseTextCleaner textCleaner) {
140+
this(typeRef.getType(), objectMapper, textCleaner);
114141
}
115142

116143
/**
@@ -119,14 +146,42 @@ public BeanOutputConverter(ParameterizedTypeReference<T> typeRef, ObjectMapper o
119146
* platform.
120147
* @param type The target class type.
121148
* @param objectMapper Custom object mapper for JSON operations. endings.
149+
* @param textCleaner Custom text cleaner for preprocessing responses.
122150
*/
123-
private BeanOutputConverter(Type type, ObjectMapper objectMapper) {
151+
private BeanOutputConverter(Type type, ObjectMapper objectMapper, ResponseTextCleaner textCleaner) {
124152
Objects.requireNonNull(type, "Type cannot be null;");
125153
this.type = type;
126154
this.objectMapper = objectMapper != null ? objectMapper : getObjectMapper();
155+
this.textCleaner = textCleaner != null ? textCleaner : createDefaultTextCleaner();
127156
generateSchema();
128157
}
129158

159+
/**
160+
* Creates the default text cleaner that handles common response formats from various
161+
* AI models.
162+
* <p>
163+
* The default cleaner includes:
164+
* <ul>
165+
* <li>{@link ThinkingTagCleaner} - Removes thinking tags from models like Amazon Nova
166+
* and Qwen. For models that don't generate thinking tags, this has minimal
167+
* performance impact due to fast-path optimization.</li>
168+
* <li>{@link MarkdownCodeBlockCleaner} - Removes markdown code block formatting.</li>
169+
* <li>{@link WhitespaceCleaner} - Trims whitespace.</li>
170+
* </ul>
171+
* <p>
172+
* To customize the cleaning behavior, provide a custom {@link ResponseTextCleaner}
173+
* via the constructor.
174+
* @return a composite text cleaner with default cleaning strategies
175+
*/
176+
private static ResponseTextCleaner createDefaultTextCleaner() {
177+
return CompositeResponseTextCleaner.builder()
178+
.addCleaner(new WhitespaceCleaner())
179+
.addCleaner(new ThinkingTagCleaner())
180+
.addCleaner(new MarkdownCodeBlockCleaner())
181+
.addCleaner(new WhitespaceCleaner()) // Final trim after all cleanups
182+
.build();
183+
}
184+
130185
/**
131186
* Generates the JSON schema for the target type.
132187
*/
@@ -166,26 +221,9 @@ private void generateSchema() {
166221
@Override
167222
public T convert(@NonNull String text) {
168223
try {
169-
// Remove leading and trailing whitespace
170-
text = text.trim();
171-
172-
// Check for and remove triple backticks and "json" identifier
173-
if (text.startsWith("```") && text.endsWith("```")) {
174-
// Remove the first line if it contains "```json"
175-
String[] lines = text.split("\n", 2);
176-
if (lines[0].trim().equalsIgnoreCase("```json")) {
177-
text = lines.length > 1 ? lines[1] : "";
178-
}
179-
else {
180-
text = text.substring(3); // Remove leading ```
181-
}
182-
183-
// Remove trailing ```
184-
text = text.substring(0, text.length() - 3);
185-
186-
// Trim again to remove any potential whitespace
187-
text = text.trim();
188-
}
224+
// Clean the text using the configured text cleaner
225+
text = this.textCleaner.clean(text);
226+
189227
return (T) this.objectMapper.readValue(text, this.objectMapper.constructType(this.type));
190228
}
191229
catch (JsonProcessingException e) {
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Copyright 2023-2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.converter;
18+
19+
import java.util.ArrayList;
20+
import java.util.Arrays;
21+
import java.util.List;
22+
23+
import org.springframework.util.Assert;
24+
25+
/**
26+
* A composite {@link ResponseTextCleaner} that applies multiple cleaners in sequence.
27+
* This allows for a flexible pipeline of text cleaning operations.
28+
*
29+
* @author liugddx
30+
* @since 1.1.0
31+
*/
32+
public class CompositeResponseTextCleaner implements ResponseTextCleaner {
33+
34+
private final List<ResponseTextCleaner> cleaners;
35+
36+
/**
37+
* Creates a composite cleaner with the given cleaners.
38+
* @param cleaners the list of cleaners to apply in order
39+
*/
40+
public CompositeResponseTextCleaner(List<ResponseTextCleaner> cleaners) {
41+
Assert.notNull(cleaners, "cleaners cannot be null");
42+
this.cleaners = new ArrayList<>(cleaners);
43+
}
44+
45+
/**
46+
* Creates a composite cleaner with no cleaners. Text will be returned unchanged.
47+
*/
48+
public CompositeResponseTextCleaner() {
49+
this(new ArrayList<>());
50+
}
51+
52+
/**
53+
* Creates a composite cleaner with the given cleaners.
54+
* @param cleaners the cleaners to apply in order
55+
*/
56+
public CompositeResponseTextCleaner(ResponseTextCleaner... cleaners) {
57+
this(Arrays.asList(cleaners));
58+
}
59+
60+
@Override
61+
public String clean(String text) {
62+
String result = text;
63+
for (ResponseTextCleaner cleaner : this.cleaners) {
64+
result = cleaner.clean(result);
65+
}
66+
return result;
67+
}
68+
69+
/**
70+
* Creates a builder for constructing a composite cleaner.
71+
* @return a new builder instance
72+
*/
73+
public static Builder builder() {
74+
return new Builder();
75+
}
76+
77+
/**
78+
* Builder for {@link CompositeResponseTextCleaner}.
79+
*/
80+
public static final class Builder {
81+
82+
private final List<ResponseTextCleaner> cleaners = new ArrayList<>();
83+
84+
private Builder() {
85+
}
86+
87+
/**
88+
* Add a cleaner to the pipeline.
89+
* @param cleaner the cleaner to add
90+
* @return this builder
91+
*/
92+
public Builder addCleaner(ResponseTextCleaner cleaner) {
93+
Assert.notNull(cleaner, "cleaner cannot be null");
94+
this.cleaners.add(cleaner);
95+
return this;
96+
}
97+
98+
/**
99+
* Build the composite cleaner.
100+
* @return a new composite cleaner instance
101+
*/
102+
public CompositeResponseTextCleaner build() {
103+
return new CompositeResponseTextCleaner(this.cleaners);
104+
}
105+
106+
}
107+
108+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Copyright 2023-2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.converter;
18+
19+
/**
20+
* A {@link ResponseTextCleaner} that removes markdown code block formatting from LLM
21+
* responses. This cleaner handles:
22+
* <ul>
23+
* <li>{@code ```json ... ```}</li>
24+
* <li>{@code ``` ... ```}</li>
25+
* </ul>
26+
*
27+
* @author liugddx
28+
* @since 1.1.0
29+
*/
30+
public class MarkdownCodeBlockCleaner implements ResponseTextCleaner {
31+
32+
@Override
33+
public String clean(String text) {
34+
if (text == null || text.isEmpty()) {
35+
return text;
36+
}
37+
38+
// Trim leading and trailing whitespace first
39+
text = text.trim();
40+
41+
// Check for and remove triple backticks
42+
if (text.startsWith("```") && text.endsWith("```")) {
43+
// Remove the first line if it contains "```json" or similar
44+
String[] lines = text.split("\n", 2);
45+
if (lines[0].trim().toLowerCase().startsWith("```")) {
46+
// Extract language identifier if present
47+
String firstLine = lines[0].trim();
48+
if (firstLine.length() > 3) {
49+
// Has language identifier like ```json
50+
text = lines.length > 1 ? lines[1] : "";
51+
}
52+
else {
53+
// Just ``` without language
54+
text = text.substring(3);
55+
}
56+
}
57+
else {
58+
text = text.substring(3);
59+
}
60+
61+
// Remove trailing ```
62+
if (text.endsWith("```")) {
63+
text = text.substring(0, text.length() - 3);
64+
}
65+
66+
// Trim again to remove any potential whitespace
67+
text = text.trim();
68+
}
69+
70+
return text;
71+
}
72+
73+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright 2023-2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.converter;
18+
19+
/**
20+
* Strategy interface for cleaning LLM response text before parsing. Different
21+
* implementations can handle various response formats and patterns from different AI
22+
* models.
23+
*
24+
* @author liugddx
25+
* @since 1.1.0
26+
*/
27+
@FunctionalInterface
28+
public interface ResponseTextCleaner {
29+
30+
/**
31+
* Clean the given text by removing unwanted patterns, tags, or formatting.
32+
* @param text the raw text from LLM response
33+
* @return the cleaned text ready for parsing
34+
*/
35+
String clean(String text);
36+
37+
}

0 commit comments

Comments
 (0)