|
4 | 4 | "encoding/base64" |
5 | 5 | "encoding/json" |
6 | 6 | "fmt" |
| 7 | + "strings" |
7 | 8 |
|
8 | 9 | "github.com/openai/openai-go" |
9 | 10 | "github.com/openai/openai-go/responses" |
@@ -409,52 +410,104 @@ func EncodeToolMessage(message *api.ToolMessage) ([]responses.ResponseInputItemU |
409 | 410 |
|
410 | 411 | // encodeComputerToolResult handles encoding the result of a computer use tool call |
411 | 412 | func encodeComputerToolResult(result *api.ToolResultBlock) (responses.ResponseInputItemUnionParam, error) { |
412 | | - if len(result.Content) != 1 { |
413 | | - return responses.ResponseInputItemUnionParam{}, fmt.Errorf("expected 1 content block for computer use tool result, got %d", len(result.Content)) |
414 | | - } |
| 413 | + // No content at all - this is an error |
| 414 | + if len(result.Content) == 0 { |
| 415 | + return responses.ResponseInputItemUnionParam{}, fmt.Errorf("expected at least 1 content block for computer use tool result, got 0") |
| 416 | + } |
| 417 | + |
| 418 | + // Single content block - try to handle as screenshot |
| 419 | + if len(result.Content) == 1 { |
| 420 | + content := result.Content[0] |
| 421 | + var imageBlock *api.ImageBlock |
| 422 | + switch b := content.(type) { |
| 423 | + case *api.ImageBlock: |
| 424 | + imageBlock = b |
| 425 | + case api.ImageBlock: |
| 426 | + imageBlock = &b |
| 427 | + default: |
| 428 | + // Single non-image block - check if it's text |
| 429 | + if _, ok := content.(*api.TextBlock); ok { |
| 430 | + return encodeTextToolResult(result) |
| 431 | + } |
| 432 | + // Single block that's neither image nor text - this is invalid |
| 433 | + return responses.ResponseInputItemUnionParam{}, fmt.Errorf("computer use tool result has 1 content block of type %s, expected image or text", content.Type()) |
| 434 | + } |
415 | 435 |
|
416 | | - content := result.Content[0] |
417 | | - var imageBlock *api.ImageBlock |
418 | | - switch b := content.(type) { |
419 | | - case *api.ImageBlock: |
420 | | - imageBlock = b |
421 | | - case api.ImageBlock: |
422 | | - imageBlock = &b |
423 | | - default: |
424 | | - return responses.ResponseInputItemUnionParam{}, fmt.Errorf("expected image block for computer use tool result, got %T", content) |
| 436 | + // Create data URL from image data |
| 437 | + // TODO: Add helper methods to the image and file blocks to make this easier |
| 438 | + dataURL := "data:" + imageBlock.MediaType + ";base64," + base64.StdEncoding.EncodeToString(imageBlock.Data) |
| 439 | + |
| 440 | + screenshot := responses.ResponseComputerToolCallOutputScreenshotParam{ |
| 441 | + Type: "computer_screenshot", |
| 442 | + ImageURL: openai.String(dataURL), |
| 443 | + } |
| 444 | + |
| 445 | + // Extract safety checks from provider metadata if available |
| 446 | + var acknowledgedSafetyChecks []responses.ResponseInputItemComputerCallOutputAcknowledgedSafetyCheckParam |
| 447 | + if metadata := GetMetadata(result); metadata != nil { |
| 448 | + for _, check := range metadata.ComputerSafetyChecks { |
| 449 | + acknowledgedSafetyChecks = append(acknowledgedSafetyChecks, responses.ResponseInputItemComputerCallOutputAcknowledgedSafetyCheckParam{ |
| 450 | + ID: check.ID, |
| 451 | + Code: openai.String(check.Code), |
| 452 | + Message: openai.String(check.Message), |
| 453 | + }) |
| 454 | + } |
| 455 | + } |
| 456 | + |
| 457 | + // Create the computer call output parameter |
| 458 | + output := responses.ResponseInputItemComputerCallOutputParam{ |
| 459 | + CallID: result.ToolCallID, |
| 460 | + Output: screenshot, |
| 461 | + AcknowledgedSafetyChecks: acknowledgedSafetyChecks, |
| 462 | + } |
| 463 | + |
| 464 | + return responses.ResponseInputItemUnionParam{ |
| 465 | + OfComputerCallOutput: &output, |
| 466 | + }, nil |
425 | 467 | } |
426 | 468 |
|
427 | | - // Create data URL from image data |
428 | | - // TODO: Add helper methods to the image and file blocks to make this easier |
429 | | - dataURL := "data:" + imageBlock.MediaType + ";base64," + base64.StdEncoding.EncodeToString(imageBlock.Data) |
| 469 | + // Multiple blocks - check if any are text |
| 470 | + hasText := false |
| 471 | + for _, content := range result.Content { |
| 472 | + if _, ok := content.(*api.TextBlock); ok { |
| 473 | + hasText = true |
| 474 | + break |
| 475 | + } |
| 476 | + } |
430 | 477 |
|
431 | | - screenshot := responses.ResponseComputerToolCallOutputScreenshotParam{ |
432 | | - Type: "computer_screenshot", |
433 | | - ImageURL: openai.String(dataURL), |
| 478 | + if !hasText { |
| 479 | + // Multiple blocks but none are text - this is ambiguous |
| 480 | + return responses.ResponseInputItemUnionParam{}, fmt.Errorf("computer use tool result has %d content blocks but no text content", len(result.Content)) |
434 | 481 | } |
435 | 482 |
|
436 | | - // Extract safety checks from provider metadata if available |
437 | | - var acknowledgedSafetyChecks []responses.ResponseInputItemComputerCallOutputAcknowledgedSafetyCheckParam |
438 | | - if metadata := GetMetadata(result); metadata != nil { |
439 | | - for _, check := range metadata.ComputerSafetyChecks { |
440 | | - acknowledgedSafetyChecks = append(acknowledgedSafetyChecks, responses.ResponseInputItemComputerCallOutputAcknowledgedSafetyCheckParam{ |
441 | | - ID: check.ID, |
442 | | - Code: openai.String(check.Code), |
443 | | - Message: openai.String(check.Message), |
444 | | - }) |
| 483 | + // Has text content - use fallback |
| 484 | + return encodeTextToolResult(result) |
| 485 | +} |
| 486 | + |
| 487 | +// encodeTextToolResult handles encoding text-based tool results, with Content[] taking precedence over Result |
| 488 | +func encodeTextToolResult(result *api.ToolResultBlock) (responses.ResponseInputItemUnionParam, error) { |
| 489 | + output := "" |
| 490 | + |
| 491 | + // Check Content[] first - more expressive when available |
| 492 | + if len(result.Content) > 0 { |
| 493 | + for _, content := range result.Content { |
| 494 | + if textBlock, ok := content.(*api.TextBlock); ok { |
| 495 | + output += textBlock.Text + "\n" |
| 496 | + } |
445 | 497 | } |
| 498 | + output = strings.TrimSuffix(output, "\n") |
446 | 499 | } |
447 | 500 |
|
448 | | - // Create the computer call output parameter |
449 | | - output := responses.ResponseInputItemComputerCallOutputParam{ |
450 | | - CallID: result.ToolCallID, |
451 | | - Output: screenshot, |
452 | | - AcknowledgedSafetyChecks: acknowledgedSafetyChecks, |
| 501 | + // If no text content found, use Result field |
| 502 | + if output == "" && result.Result != nil { |
| 503 | + resultJSON, err := json.Marshal(result.Result) |
| 504 | + if err != nil { |
| 505 | + return responses.ResponseInputItemUnionParam{}, fmt.Errorf("failed to marshal tool result: %v", err) |
| 506 | + } |
| 507 | + output = string(resultJSON) |
453 | 508 | } |
454 | 509 |
|
455 | | - return responses.ResponseInputItemUnionParam{ |
456 | | - OfComputerCallOutput: &output, |
457 | | - }, nil |
| 510 | + return responses.ResponseInputItemParamOfFunctionCallOutput(result.ToolCallID, output), nil |
458 | 511 | } |
459 | 512 |
|
460 | 513 | func EncodeToolResultBlock(result *api.ToolResultBlock) (responses.ResponseInputItemUnionParam, error) { |
|
0 commit comments