fix(api): SSE streaming format to comply with specification (#7182)

Copilot · mudler · web-flow · commit 34bc1bda1e3b · 2025-11-09T22:00:27.000+01:00
* Initial plan

* Fix SSE streaming format to comply with specification

- Replace json.Encoder with json.Marshal for explicit formatting
- Use explicit \n\n for all SSE messages (instead of relying on implicit newlines)
- Change %v to %s format specifier for proper string formatting
- Fix error message streaming to include proper SSE format
- Ensure consistency between chat.go and completion.go endpoints

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

* Add proper error handling for JSON marshal failures in streaming

- Handle json.Marshal errors explicitly in error response paths
- Add fallback simple error message if marshal fails
- Prevents sending 'data: &lt;nil&gt;' on marshal failures
- Addresses code review feedback

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

* Fix SSE streaming format to comply with specification

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

* Fix finish_reason field to use pointer for proper null handling

- Change FinishReason from string to *string in Choice schema
- Streaming chunks now omit finish_reason (null) instead of empty string
- Final chunks properly set finish_reason to "stop", "tool_calls", etc.
- Remove empty content from initial streaming chunks (only send role)
- Final streaming chunk sends empty delta with finish_reason
- Addresses OpenAI API compliance issues causing client failures

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

* Improve code consistency for string pointer creation

- Use consistent pattern: declare variable then take address
- Remove inline anonymous function for better readability
- Addresses code review feedback

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

* Move common finish reasons to constants

- Create constants.go with FinishReasonStop, FinishReasonToolCalls, FinishReasonFunctionCall
- Replace all string literals with constants in chat.go, completion.go, realtime.go
- Improves code maintainability and prevents typos

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

* Make it build

Signed-off-by: Ettore Di Giacinto &lt;mudler@localai.io&gt;

* Fix finish_reason to always be present with null or string value

- Remove omitempty from FinishReason field in Choice struct
- Explicitly set FinishReason to nil for all streaming chunks
- Ensures finish_reason appears as null in JSON for streaming chunks
- Final chunks still properly set finish_reason to "stop", "tool_calls", etc.
- Complies with OpenAI API specification example

Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;

---------

Signed-off-by: Ettore Di Giacinto &lt;mudler@localai.io&gt;
Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: mudler &lt;2420543+mudler@users.noreply.github.com&gt;
Co-authored-by: Ettore Di Giacinto &lt;mudler@users.noreply.github.com&gt;
Co-authored-by: Ettore Di Giacinto &lt;mudler@localai.io&gt;
diff --git a/.github/gallery-agent/go.mod b/.github/gallery-agent/go.mod
@@ -8,7 +8,6 @@ require (
 	github.com/onsi/gomega v1.38.2
 	github.com/sashabaranov/go-openai v1.41.2
 	github.com/tmc/langchaingo v0.1.13
-	gopkg.in/yaml.v3 v3.0.1
 )
 
 require (
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
@@ -2,7 +2,6 @@ package openai
 
 import (
 	"bufio"
-	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -91,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			ID:      id,
 			Created: created,
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
+			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
@@ -111,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 				Usage:   usage,
 			}
@@ -145,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 			}
 			responses <- initialMessage
@@ -169,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}},
 				Object:  "chat.completion.chunk",
 				Usage:   usage,
 			}
@@ -197,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									},
 								},
 							},
-						}}},
+						},
+						Index:        0,
+						FinishReason: nil,
+					}},
 					Object: "chat.completion.chunk",
 				}
 				responses <- initialMessage
@@ -220,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									},
 								},
 							},
-						}}},
+						},
+						Index:        0,
+						FinishReason: nil,
+					}},
 					Object: "chat.completion.chunk",
 				}
 			}
@@ -427,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						if len(ev.Choices[0].Delta.ToolCalls) > 0 {
 							toolsCalled = true
 						}
-						var buf bytes.Buffer
-						enc := json.NewEncoder(&buf)
-						enc.Encode(ev)
-						log.Debug().Msgf("Sending chunk: %s", buf.String())
-						_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
+						respData, err := json.Marshal(ev)
+						if err != nil {
+							log.Debug().Msgf("Failed to marshal response: %v", err)
+							input.Cancel()
+							continue
+						}
+						log.Debug().Msgf("Sending chunk: %s", string(respData))
+						_, err = fmt.Fprintf(w, "data: %s\n\n", string(respData))
 						if err != nil {
 							log.Debug().Msgf("Sending chunk failed: %v", err)
 							input.Cancel()
@@ -443,34 +451,40 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						}
 						log.Error().Msgf("Stream ended with error: %v", err)
 
+						stopReason := FinishReasonStop
 						resp := &schema.OpenAIResponse{
 							ID:      id,
 							Created: created,
 							Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 							Choices: []schema.Choice{
 								{
-									FinishReason: "stop",
+									FinishReason: &stopReason,
 									Index:        0,
 									Delta:        &schema.Message{Content: "Internal error: " + err.Error()},
 								}},
 							Object: "chat.completion.chunk",
 							Usage:  *usage,
 						}
-						respData, _ := json.Marshal(resp)
-
-						w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+						respData, marshalErr := json.Marshal(resp)
+						if marshalErr != nil {
+							log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
+							// Send a simple error message as fallback
+							w.WriteString("data: {\"error\":\"Internal error\"}\n\n")
+						} else {
+							w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+						}
 						w.WriteString("data: [DONE]\n\n")
 						w.Flush()
 
 						return
 					}
 				}
 
-				finishReason := "stop"
+				finishReason := FinishReasonStop
 				if toolsCalled && len(input.Tools) > 0 {
-					finishReason = "tool_calls"
+					finishReason = FinishReasonToolCalls
 				} else if toolsCalled {
-					finishReason = "function_call"
+					finishReason = FinishReasonFunctionCall
 				}
 
 				resp := &schema.OpenAIResponse{
@@ -479,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{
 						{
-							FinishReason: finishReason,
+							FinishReason: &finishReason,
 							Index:        0,
-							Delta:        &schema.Message{Content: &textContentToReturn},
+							Delta:        &schema.Message{},
 						}},
 					Object: "chat.completion.chunk",
 					Usage:  *usage,
@@ -502,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
-					*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+					stopReason := FinishReasonStop
+					*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 					return
 				}
 
@@ -520,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 						return
 					}
 
+					stopReason := FinishReasonStop
 					*c = append(*c, schema.Choice{
-						FinishReason: "stop",
+						FinishReason: &stopReason,
 						Message:      &schema.Message{Role: "assistant", Content: &result}})
 				default:
+					toolCallsReason := FinishReasonToolCalls
 					toolChoice := schema.Choice{
-						FinishReason: "tool_calls",
+						FinishReason: &toolCallsReason,
 						Message: &schema.Message{
 							Role: "assistant",
 						},
@@ -549,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 							)
 						} else {
 							// otherwise we return more choices directly (deprecated)
+							functionCallReason := FinishReasonFunctionCall
 							*c = append(*c, schema.Choice{
-								FinishReason: "function_call",
+								FinishReason: &functionCallReason,
 								Message: &schema.Message{
 									Role:    "assistant",
 									Content: &textContentToReturn,
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
@@ -2,7 +2,6 @@ package openai
 
 import (
 	"bufio"
-	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []schema.Choice{
 					{
-						Index: 0,
-						Text:  s,
+						Index:        0,
+						Text:         s,
+						FinishReason: nil,
 					},
 				},
 				Object: "text_completion",
@@ -140,32 +140,57 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 							log.Debug().Msgf("No choices in the response, skipping")
 							continue
 						}
-						var buf bytes.Buffer
-						enc := json.NewEncoder(&buf)
-						enc.Encode(ev)
+						respData, err := json.Marshal(ev)
+						if err != nil {
+							log.Debug().Msgf("Failed to marshal response: %v", err)
+							continue
+						}
 
-						log.Debug().Msgf("Sending chunk: %s", buf.String())
-						fmt.Fprintf(w, "data: %v\n", buf.String())
+						log.Debug().Msgf("Sending chunk: %s", string(respData))
+						fmt.Fprintf(w, "data: %s\n\n", string(respData))
 						w.Flush()
 					case err := <-ended:
 						if err == nil {
 							break LOOP
 						}
 						log.Error().Msgf("Stream ended with error: %v", err)
-						fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error())
+
+						stopReason := FinishReasonStop
+						errorResp := schema.OpenAIResponse{
+							ID:      id,
+							Created: created,
+							Model:   input.Model,
+							Choices: []schema.Choice{
+								{
+									Index:        0,
+									FinishReason: &stopReason,
+									Text:         "Internal error: " + err.Error(),
+								},
+							},
+							Object: "text_completion",
+						}
+						errorData, marshalErr := json.Marshal(errorResp)
+						if marshalErr != nil {
+							log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
+							// Send a simple error message as fallback
+							fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n")
+						} else {
+							fmt.Fprintf(w, "data: %s\n\n", string(errorData))
+						}
 						w.Flush()
 						break LOOP
 					}
 				}
 
+				stopReason := FinishReasonStop
 				resp := &schema.OpenAIResponse{
 					ID:      id,
 					Created: created,
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{
 						{
 							Index:        0,
-							FinishReason: "stop",
+							FinishReason: &stopReason,
 						},
 					},
 					Object: "text_completion",
@@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 
 			r, tokenUsage, err := ComputeChoices(
 				input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
-					*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
+					stopReason := FinishReasonStop
+					*c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k})
 				}, nil)
 			if err != nil {
 				return err
diff --git a/core/http/endpoints/openai/constants.go b/core/http/endpoints/openai/constants.go
@@ -0,0 +1,8 @@
+package openai
+
+// Finish reason constants for OpenAI API responses
+const (
+	FinishReasonStop         = "stop"
+	FinishReasonToolCalls    = "tool_calls"
+	FinishReasonFunctionCall = "function_call"
+)
diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go
@@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 		result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
 			if !shouldUseFn {
 				// no function is called, just reply and use stop as finish reason
-				*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
+				stopReason := FinishReasonStop
+				*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
 				return
 			}
 
@@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 				}
 
 				if len(input.Tools) > 0 {
-					toolChoice.FinishReason = "tool_calls"
+					toolCallsReason := FinishReasonToolCalls
+					toolChoice.FinishReason = &toolCallsReason
 				}
 
 				for _, ss := range results {
@@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
 						)
 					} else {
 						// otherwise we return more choices directly
+						functionCallReason := FinishReasonFunctionCall
 						*c = append(*c, schema.Choice{
-							FinishReason: "function_call",
+							FinishReason: &functionCallReason,
 							Message: &schema.Message{
 								Role:    "assistant",
 								Content: &textContentToReturn,
diff --git a/core/schema/openai.go b/core/schema/openai.go
@@ -50,7 +50,7 @@ type OpenAIResponse struct {
 
 type Choice struct {
 	Index        int      `json:"index"`
-	FinishReason string   `json:"finish_reason"`
+	FinishReason *string  `json:"finish_reason"`
 	Message      *Message `json:"message,omitempty"`
 	Delta        *Message `json:"delta,omitempty"`
 	Text         string   `json:"text,omitempty"`
diff --git a/docs/go.mod b/docs/go.mod
@@ -1,5 +1,3 @@
 module github.com/McShelby/hugo-theme-relearn.git
 
 go 1.19
-
-require github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect
diff --git a/docs/go.sum b/docs/go.sum
@@ -1,4 +0,0 @@
-github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 h1:SmpwwN3DNzJWbV+IT8gaFu07ENUFpCvKou5BHYUKuVs=
-github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200/go.mod h1:kx8MBj9T7SFR8ZClWvKZPmmUxBaltkoXvnWlZZcSnYA=
-github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI=
-github.com/twbs/bootstrap v5.3.2+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,6 @@ require (`
`8`	`8`	`github.com/onsi/gomega v1.38.2`
`9`	`9`	`github.com/sashabaranov/go-openai v1.41.2`
`10`	`10`	`github.com/tmc/langchaingo v0.1.13`
`11`		`- gopkg.in/yaml.v3 v3.0.1`
`12`	`11`	`)`
`13`	`12`
`14`	`13`	`require (`
-Original file line number
+Diff line change
@@ @@ -1,5 +1,3 @@ @@
 module github.com/McShelby/hugo-theme-relearn.git
 go 1.19
+-
 -require github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect