Skip to content

Commit 0b8293e

Browse files
markpollackilayaperumalg
authored andcommitted
feat(ollama): add thinking mode support for reasoning models
Add support for Ollama's thinking mode, which enables reasoning-capable models to emit their internal reasoning process in a separate field. Key changes: - Implement ThinkOption sealed interface with boolean and level variants - Add think configuration to OllamaChatOptions with builder methods - Filter think from options map to send as top-level request field - Add QWEN3_4B_THINKING model constant for thinking-enabled variant - Upgrade Ollama test container to 0.12.10 for thinking support - Document auto-enable behavior for thinking-capable models Supported models: Qwen3, DeepSeek-v3.1, DeepSeek R1, GPT-OSS. Note: Thinking-capable models auto-enable thinking by default in Ollama 0.12+. Use .disableThinking() to explicitly disable. Signed-off-by: Mark Pollack <mark.pollack@broadcom.com>
1 parent 91a8d3a commit 0b8293e

File tree

12 files changed

+846
-30
lines changed

12 files changed

+846
-30
lines changed

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon
266266
if (ollamaResponse.promptEvalCount() != null && ollamaResponse.evalCount() != null) {
267267
generationMetadata = ChatGenerationMetadata.builder()
268268
.finishReason(ollamaResponse.doneReason())
269+
.metadata("thinking", ollamaResponse.message().thinking())
269270
.build();
270271
}
271272

@@ -505,7 +506,8 @@ else if (message.getMessageType() == MessageType.TOOL) {
505506
OllamaApi.ChatRequest.Builder requestBuilder = OllamaApi.ChatRequest.builder(requestOptions.getModel())
506507
.stream(stream)
507508
.messages(ollamaMessages)
508-
.options(requestOptions);
509+
.options(requestOptions)
510+
.think(requestOptions.getThinkOption());
509511

510512
if (requestOptions.getFormat() != null) {
511513
requestBuilder.format(requestOptions.getFormat());

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ public record ChatRequest(
402402
@JsonProperty("keep_alive") String keepAlive,
403403
@JsonProperty("tools") List<Tool> tools,
404404
@JsonProperty("options") Map<String, Object> options,
405-
@JsonProperty("think") Boolean think
405+
@JsonProperty("think") ThinkOption think
406406
) {
407407

408408
public static Builder builder(String model) {
@@ -475,7 +475,7 @@ public static final class Builder {
475475
private String keepAlive;
476476
private List<Tool> tools = List.of();
477477
private Map<String, Object> options = Map.of();
478-
private Boolean think;
478+
private ThinkOption think;
479479

480480
public Builder(String model) {
481481
Assert.notNull(model, "The model can not be null.");
@@ -509,16 +509,60 @@ public Builder tools(List<Tool> tools) {
509509

510510
public Builder options(Map<String, Object> options) {
511511
Objects.requireNonNull(options, "The options can not be null.");
512-
513-
this.options = OllamaOptions.filterNonSupportedFields(options);
512+
this.options = OllamaChatOptions.filterNonSupportedFields(options);
514513
return this;
515514
}
516515

517-
public Builder think(Boolean think) {
516+
public Builder think(ThinkOption think) {
518517
this.think = think;
519518
return this;
520519
}
521520

521+
/**
522+
* Enable thinking mode for the model.
523+
* @return this builder
524+
*/
525+
public Builder enableThinking() {
526+
this.think = ThinkOption.ThinkBoolean.ENABLED;
527+
return this;
528+
}
529+
530+
/**
531+
* Disable thinking mode for the model.
532+
* @return this builder
533+
*/
534+
public Builder disableThinking() {
535+
this.think = ThinkOption.ThinkBoolean.DISABLED;
536+
return this;
537+
}
538+
539+
/**
540+
* Set thinking level to "low" (for GPT-OSS model).
541+
* @return this builder
542+
*/
543+
public Builder thinkLow() {
544+
this.think = ThinkOption.ThinkLevel.LOW;
545+
return this;
546+
}
547+
548+
/**
549+
* Set thinking level to "medium" (for GPT-OSS model).
550+
* @return this builder
551+
*/
552+
public Builder thinkMedium() {
553+
this.think = ThinkOption.ThinkLevel.MEDIUM;
554+
return this;
555+
}
556+
557+
/**
558+
* Set thinking level to "high" (for GPT-OSS model).
559+
* @return this builder
560+
*/
561+
public Builder thinkHigh() {
562+
this.think = ThinkOption.ThinkLevel.HIGH;
563+
return this;
564+
}
565+
522566
@Deprecated
523567
public Builder options(OllamaOptions options) {
524568
Objects.requireNonNull(options, "The options can not be null.");

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaChatOptions.java

Lines changed: 124 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,31 @@ public class OllamaChatOptions implements ToolCallingChatOptions {
317317
@JsonProperty("truncate")
318318
private Boolean truncate;
319319

320+
/**
321+
* The model should think before responding, if supported.
322+
* <p>
323+
* Most models (Qwen 3, DeepSeek-v3.1, DeepSeek R1) use boolean enable/disable.
324+
* The GPT-OSS model requires string levels: "low", "medium", or "high".
325+
* <p>
326+
* <strong>Default Behavior (Ollama 0.12+):</strong>
327+
* <ul>
328+
* <li>Thinking-capable models (e.g., qwen3:*-thinking, deepseek-r1, deepseek-v3.1)
329+
* <strong>auto-enable thinking by default</strong> when this field is not set.</li>
330+
* <li>Standard models (e.g., qwen2.5:*, llama3.2) do not enable thinking by default.</li>
331+
* <li>To explicitly control behavior, use {@link Builder#enableThinking()} or
332+
* {@link Builder#disableThinking()}.</li>
333+
* </ul>
334+
* <p>
335+
* Use {@link Builder#enableThinking()}, {@link Builder#disableThinking()}, or
336+
* {@link Builder#thinkHigh()} to configure this option.
337+
*
338+
* @see ThinkOption
339+
* @see ThinkOption.ThinkBoolean
340+
* @see ThinkOption.ThinkLevel
341+
*/
342+
@JsonProperty("think")
343+
private ThinkOption thinkOption;
344+
320345
@JsonIgnore
321346
private Boolean internalToolExecutionEnabled;
322347

@@ -364,6 +389,7 @@ public static OllamaChatOptions fromOptions(OllamaChatOptions fromOptions) {
364389
.format(fromOptions.getFormat())
365390
.keepAlive(fromOptions.getKeepAlive())
366391
.truncate(fromOptions.getTruncate())
392+
.thinkOption(fromOptions.getThinkOption())
367393
.useNUMA(fromOptions.getUseNUMA())
368394
.numCtx(fromOptions.getNumCtx())
369395
.numBatch(fromOptions.getNumBatch())
@@ -745,6 +771,14 @@ public void setTruncate(Boolean truncate) {
745771
this.truncate = truncate;
746772
}
747773

774+
public ThinkOption getThinkOption() {
775+
return this.thinkOption;
776+
}
777+
778+
public void setThinkOption(ThinkOption thinkOption) {
779+
this.thinkOption = thinkOption;
780+
}
781+
748782
@Override
749783
@JsonIgnore
750784
public List<ToolCallback> getToolCallbacks() {
@@ -824,17 +858,17 @@ public boolean equals(Object o) {
824858
OllamaChatOptions that = (OllamaChatOptions) o;
825859
return Objects.equals(this.model, that.model) && Objects.equals(this.format, that.format)
826860
&& Objects.equals(this.keepAlive, that.keepAlive) && Objects.equals(this.truncate, that.truncate)
827-
&& Objects.equals(this.useNUMA, that.useNUMA) && Objects.equals(this.numCtx, that.numCtx)
828-
&& Objects.equals(this.numBatch, that.numBatch) && Objects.equals(this.numGPU, that.numGPU)
829-
&& Objects.equals(this.mainGPU, that.mainGPU) && Objects.equals(this.lowVRAM, that.lowVRAM)
830-
&& Objects.equals(this.f16KV, that.f16KV) && Objects.equals(this.logitsAll, that.logitsAll)
831-
&& Objects.equals(this.vocabOnly, that.vocabOnly) && Objects.equals(this.useMMap, that.useMMap)
832-
&& Objects.equals(this.useMLock, that.useMLock) && Objects.equals(this.numThread, that.numThread)
833-
&& Objects.equals(this.numKeep, that.numKeep) && Objects.equals(this.seed, that.seed)
834-
&& Objects.equals(this.numPredict, that.numPredict) && Objects.equals(this.topK, that.topK)
835-
&& Objects.equals(this.topP, that.topP) && Objects.equals(this.minP, that.minP)
836-
&& Objects.equals(this.tfsZ, that.tfsZ) && Objects.equals(this.typicalP, that.typicalP)
837-
&& Objects.equals(this.repeatLastN, that.repeatLastN)
861+
&& Objects.equals(this.thinkOption, that.thinkOption) && Objects.equals(this.useNUMA, that.useNUMA)
862+
&& Objects.equals(this.numCtx, that.numCtx) && Objects.equals(this.numBatch, that.numBatch)
863+
&& Objects.equals(this.numGPU, that.numGPU) && Objects.equals(this.mainGPU, that.mainGPU)
864+
&& Objects.equals(this.lowVRAM, that.lowVRAM) && Objects.equals(this.f16KV, that.f16KV)
865+
&& Objects.equals(this.logitsAll, that.logitsAll) && Objects.equals(this.vocabOnly, that.vocabOnly)
866+
&& Objects.equals(this.useMMap, that.useMMap) && Objects.equals(this.useMLock, that.useMLock)
867+
&& Objects.equals(this.numThread, that.numThread) && Objects.equals(this.numKeep, that.numKeep)
868+
&& Objects.equals(this.seed, that.seed) && Objects.equals(this.numPredict, that.numPredict)
869+
&& Objects.equals(this.topK, that.topK) && Objects.equals(this.topP, that.topP)
870+
&& Objects.equals(this.minP, that.minP) && Objects.equals(this.tfsZ, that.tfsZ)
871+
&& Objects.equals(this.typicalP, that.typicalP) && Objects.equals(this.repeatLastN, that.repeatLastN)
838872
&& Objects.equals(this.temperature, that.temperature)
839873
&& Objects.equals(this.repeatPenalty, that.repeatPenalty)
840874
&& Objects.equals(this.presencePenalty, that.presencePenalty)
@@ -849,13 +883,13 @@ public boolean equals(Object o) {
849883

850884
@Override
851885
public int hashCode() {
852-
return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.useNUMA, this.numCtx,
853-
this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly,
854-
this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK,
855-
this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty,
856-
this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta,
857-
this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
858-
this.toolContext);
886+
return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.thinkOption, this.useNUMA,
887+
this.numCtx, this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll,
888+
this.vocabOnly, this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict,
889+
this.topK, this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature,
890+
this.repeatPenalty, this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau,
891+
this.mirostatEta, this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames,
892+
this.internalToolExecutionEnabled, this.toolContext);
859893
}
860894

861895
public static final class Builder {
@@ -1037,6 +1071,78 @@ public Builder stop(List<String> stop) {
10371071
return this;
10381072
}
10391073

1074+
/**
1075+
* Enable thinking mode for the model. The model will include its reasoning
1076+
* process in the response's thinking field.
1077+
* <p>
1078+
* Supported by models: Qwen 3, DeepSeek-v3.1, DeepSeek R1
1079+
* @return this builder
1080+
* @see #disableThinking()
1081+
* @see #thinkLow()
1082+
*/
1083+
public Builder enableThinking() {
1084+
this.options.thinkOption = ThinkOption.ThinkBoolean.ENABLED;
1085+
return this;
1086+
}
1087+
1088+
/**
1089+
* Disable thinking mode for the model.
1090+
* @return this builder
1091+
* @see #enableThinking()
1092+
*/
1093+
public Builder disableThinking() {
1094+
this.options.thinkOption = ThinkOption.ThinkBoolean.DISABLED;
1095+
return this;
1096+
}
1097+
1098+
/**
1099+
* Set thinking level to "low" (for GPT-OSS model).
1100+
* <p>
1101+
* GPT-OSS requires one of: low, medium, high. Boolean enable/disable is not
1102+
* supported for this model.
1103+
* @return this builder
1104+
* @see #thinkMedium()
1105+
* @see #thinkHigh()
1106+
*/
1107+
public Builder thinkLow() {
1108+
this.options.thinkOption = ThinkOption.ThinkLevel.LOW;
1109+
return this;
1110+
}
1111+
1112+
/**
1113+
* Set thinking level to "medium" (for GPT-OSS model).
1114+
* @return this builder
1115+
* @see #thinkLow()
1116+
* @see #thinkHigh()
1117+
*/
1118+
public Builder thinkMedium() {
1119+
this.options.thinkOption = ThinkOption.ThinkLevel.MEDIUM;
1120+
return this;
1121+
}
1122+
1123+
/**
1124+
* Set thinking level to "high" (for GPT-OSS model).
1125+
* @return this builder
1126+
* @see #thinkLow()
1127+
* @see #thinkMedium()
1128+
*/
1129+
public Builder thinkHigh() {
1130+
this.options.thinkOption = ThinkOption.ThinkLevel.HIGH;
1131+
return this;
1132+
}
1133+
1134+
/**
1135+
* Set the think option explicitly. Use {@link #enableThinking()},
1136+
* {@link #disableThinking()}, {@link #thinkLow()}, {@link #thinkMedium()}, or
1137+
* {@link #thinkHigh()} for more convenient alternatives.
1138+
* @param thinkOption the think option
1139+
* @return this builder
1140+
*/
1141+
public Builder thinkOption(ThinkOption thinkOption) {
1142+
this.options.thinkOption = thinkOption;
1143+
return this;
1144+
}
1145+
10401146
public Builder toolCallbacks(List<ToolCallback> toolCallbacks) {
10411147
this.options.setToolCallbacks(toolCallbacks);
10421148
return this;

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
*
2424
* @author Siarhei Blashuk
2525
* @author Thomas Vitale
26+
* @author Sun Yuhan
2627
* @since 1.0.0
2728
*/
2829
public enum OllamaModel implements ChatModelDescription {
@@ -51,6 +52,23 @@ public enum OllamaModel implements ChatModelDescription {
5152
*/
5253
QWEN3_4B("qwen3:4b"),
5354

55+
/**
56+
* Qwen3 4B with thinking support. This variant auto-enables thinking by default in
57+
* Ollama 0.12+, providing separate reasoning traces in the response.
58+
* @see OllamaChatOptions#thinkOption
59+
*/
60+
QWEN3_4B_THINKING("qwen3:4b-thinking"),
61+
62+
/**
63+
* Qwen3 1.7b
64+
*/
65+
QWEN_3_1_7_B("qwen3:1.7b"),
66+
67+
/**
68+
* Qwen3 0.6b
69+
*/
70+
QWEN_3_06B("qwen3:0.6b"),
71+
5472
/**
5573
* QwQ is the reasoning model of the Qwen series.
5674
*/

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
* @author Christian Tzolov
4545
* @author Thomas Vitale
4646
* @author Ilayaperumal Gopinathan
47+
* @author Sun Yuhan
4748
* @since 0.8.0
4849
* @see <a href=
4950
* "https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
@@ -55,7 +56,8 @@
5556
@Deprecated
5657
public class OllamaOptions implements ToolCallingChatOptions, EmbeddingOptions {
5758

58-
private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate");
59+
private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive", "truncate",
60+
"think");
5961

6062
// Following fields are options which must be set when the model is loaded into
6163
// memory.

0 commit comments

Comments
 (0)