Skip to content

Commit 01beef6

Browse files
committed
feat: add sample for vl reasoning
1 parent 6e4fafd commit 01beef6

File tree

8 files changed

+142
-12
lines changed

8 files changed

+142
-12
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
using System.Text;
2+
using Cnblogs.DashScope.Core;
3+
4+
namespace Cnblogs.DashScope.Sample.Multimodal;
5+
6+
public class ImageInputSample : ISample
7+
{
8+
/// <inheritdoc />
9+
public string Description => "Chat with image input";
10+
11+
/// <inheritdoc />
12+
public async Task RunAsync(IDashScopeClient client)
13+
{
14+
var messages = new List<MultimodalMessage>();
15+
messages.Add(
16+
MultimodalMessage.User(
17+
[
18+
MultimodalMessageContent.ImageContent(
19+
"https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241022/emyrja/dog_and_girl.jpeg"),
20+
MultimodalMessageContent.ImageContent("https://dashscope.oss-cn-beijing.aliyuncs.com/images/tiger.png"),
21+
MultimodalMessageContent.TextContent("这些图展现了什么内容?")
22+
]));
23+
var completion = client.GetMultimodalGenerationStreamAsync(
24+
new ModelRequest<MultimodalInput, IMultimodalParameters>()
25+
{
26+
Model = "qwen3-vl-plus",
27+
Input = new MultimodalInput() { Messages = messages },
28+
Parameters = new MultimodalParameters()
29+
{
30+
IncrementalOutput = true,
31+
EnableThinking = true,
32+
VlHighResolutionImages = true
33+
}
34+
});
35+
var reply = new StringBuilder();
36+
var reasoning = false;
37+
MultimodalTokenUsage? usage = null;
38+
await foreach (var chunk in completion)
39+
{
40+
var choice = chunk.Output.Choices[0];
41+
if (string.IsNullOrEmpty(choice.Message.ReasoningContent) == false)
42+
{
43+
// reasoning
44+
if (reasoning == false)
45+
{
46+
Console.Write("Reasoning > ");
47+
reasoning = true;
48+
}
49+
50+
Console.Write(choice.Message.ReasoningContent);
51+
continue;
52+
}
53+
54+
if (reasoning)
55+
{
56+
reasoning = false;
57+
Console.WriteLine();
58+
Console.Write("Assistant > ");
59+
}
60+
61+
if (choice.Message.Content.Count == 0)
62+
{
63+
continue;
64+
}
65+
66+
Console.Write(choice.Message.Content[0].Text);
67+
reply.Append(choice.Message.Content[0].Text);
68+
usage = chunk.Usage;
69+
}
70+
71+
Console.WriteLine();
72+
messages.Add(MultimodalMessage.Assistant([MultimodalMessageContent.TextContent(reply.ToString())]));
73+
if (usage != null)
74+
{
75+
Console.WriteLine(
76+
$"Usage: in({usage.InputTokens})/out({usage.OutputTokens})/image({usage.ImageTokens})/reasoning({usage.OutputTokensDetails?.ReasoningTokens})/total({usage.TotalTokens})");
77+
}
78+
}
79+
}

src/Cnblogs.DashScope.Core/IMultimodalParameters.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ public interface IMultimodalParameters
99
IIncrementalOutputParameter,
1010
IPenaltyParameter,
1111
IMaxTokenParameter,
12-
IStopTokenParameter
12+
IStopTokenParameter,
13+
IThinkingParameter
1314
{
1415
/// <summary>
1516
/// Allow higher resolution for inputs. When setting to <c>true</c>, increases the maximum input token from 1280 to 16384. Defaults to <c>false</c>.

src/Cnblogs.DashScope.Core/ITextGenerationParameters.cs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ public interface ITextGenerationParameters
99
IProbabilityParameter,
1010
IPenaltyParameter,
1111
IMaxTokenParameter,
12-
IStopTokenParameter
12+
IStopTokenParameter,
13+
IThinkingParameter
1314
{
1415
/// <summary>
1516
/// The format of the result, must be <c>text</c> or <c>message</c>.
@@ -50,16 +51,6 @@ public interface ITextGenerationParameters
5051
/// </summary>
5152
TextGenerationSearchOptions? SearchOptions { get; set; }
5253

53-
/// <summary>
54-
/// Thinking option. Valid for supported models.(e.g. qwen3)
55-
/// </summary>
56-
bool? EnableThinking { get; }
57-
58-
/// <summary>
59-
/// Maximum length of thinking content. Valid for supported models.(e.g. qwen3)
60-
/// </summary>
61-
int? ThinkingBudget { get; set; }
62-
6354
/// <summary>
6455
/// Include log possibilities in response.
6556
/// </summary>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
namespace Cnblogs.DashScope.Core;
2+
3+
/// <summary>
4+
/// Parameters for thinking.
5+
/// </summary>
6+
public interface IThinkingParameter
7+
{
8+
/// <summary>
9+
/// Thinking option. Valid for supported models.(e.g. qwen3)
10+
/// </summary>
11+
bool? EnableThinking { get; }
12+
13+
/// <summary>
14+
/// Maximum length of thinking content. Valid for supported models.(e.g. qwen3)
15+
/// </summary>
16+
int? ThinkingBudget { get; set; }
17+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace Cnblogs.DashScope.Core;
2+
3+
/// <summary>
4+
/// Token details for multimodal inputs.
5+
/// </summary>
6+
/// <param name="ImageTokens">Token count of image.</param>
7+
/// <param name="TextTokens">Token count of text.</param>
8+
public record MultimodalInputTokenDetails(int? ImageTokens, int? TextTokens);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace Cnblogs.DashScope.Core;
2+
3+
/// <summary>
4+
/// Token details of multimodal outputs.
5+
/// </summary>
6+
/// <param name="ReasoningTokens">Token count of reasoning output.</param>
7+
/// <param name="TextTokens">Token count of text output.</param>
8+
public record MultimodalOutputTokenDetails(int? ReasoningTokens, int? TextTokens);

src/Cnblogs.DashScope.Core/MultimodalParameters.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,10 @@ public class MultimodalParameters : IMultimodalParameters
3737

3838
/// <inheritdoc />
3939
public TextGenerationStop? Stop { get; set; }
40+
41+
/// <inheritdoc />
42+
public bool? EnableThinking { get; set; }
43+
44+
/// <inheritdoc />
45+
public int? ThinkingBudget { get; set; }
4046
}

src/Cnblogs.DashScope.Core/MultimodalTokenUsage.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,24 @@ public class MultimodalTokenUsage
2929
/// The token usage of input video.
3030
/// </summary>
3131
public int? VideoTokens { get; set; }
32+
33+
/// <summary>
34+
/// Count of cached tokens.
35+
/// </summary>
36+
public int? CachedTokens { get; set; }
37+
38+
/// <summary>
39+
/// Count of total tokens.
40+
/// </summary>
41+
public int? TotalTokens { get; set; }
42+
43+
/// <summary>
44+
/// The details of input token usage.
45+
/// </summary>
46+
public MultimodalInputTokenDetails? InputTokensDetails { get; set; }
47+
48+
/// <summary>
49+
/// The details of output token usage.
50+
/// </summary>
51+
public MultimodalOutputTokenDetails? OutputTokensDetails { get; set; }
3252
}

0 commit comments

Comments
 (0)