Skip to content

Commit 38ff3ae

Browse files
authored
Support gpustack (#142)
1 parent 2c940ca commit 38ff3ae

23 files changed

+5136
-1
lines changed

bin/console

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ RubyLLM.configure do |config|
1919
config.bedrock_secret_key = ENV.fetch('AWS_SECRET_ACCESS_KEY', nil)
2020
config.bedrock_region = ENV.fetch('AWS_REGION', nil)
2121
config.bedrock_session_token = ENV.fetch('AWS_SESSION_TOKEN', nil)
22+
config.gpustack_api_base = ENV.fetch('GPUSTACK_API_BASE', nil)
23+
config.gpustack_api_key = ENV.fetch('GPUSTACK_API_KEY', nil)
2224
end
2325

2426
IRB.start(__FILE__)

lib/ruby_llm.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
'deepseek' => 'DeepSeek',
1919
'bedrock' => 'Bedrock',
2020
'openrouter' => 'OpenRouter',
21+
'gpustack' => 'GPUStack',
2122
'pdf' => 'PDF'
2223
)
2324
loader.ignore("#{__dir__}/tasks")
@@ -83,6 +84,7 @@ def logger
8384
RubyLLM::Provider.register :bedrock, RubyLLM::Providers::Bedrock
8485
RubyLLM::Provider.register :openrouter, RubyLLM::Providers::OpenRouter
8586
RubyLLM::Provider.register :ollama, RubyLLM::Providers::Ollama
87+
RubyLLM::Provider.register :gpustack, RubyLLM::Providers::GPUStack
8688

8789
if defined?(Rails::Railtie)
8890
require 'ruby_llm/railtie'

lib/ruby_llm/configuration.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ class Configuration
2424
:bedrock_session_token,
2525
:openrouter_api_key,
2626
:ollama_api_base,
27+
:gpustack_api_base,
28+
:gpustack_api_key,
2729
# Default models
2830
:default_model,
2931
:default_embedding_model,

lib/ruby_llm/providers/gpustack.rb

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# frozen_string_literal: true
2+
3+
module RubyLLM
4+
module Providers
5+
# GPUStack API integration based on Ollama.
6+
module GPUStack
7+
extend OpenAI
8+
extend GPUStack::Chat
9+
extend GPUStack::Models
10+
11+
module_function
12+
13+
def api_base(config)
14+
config.gpustack_api_base
15+
end
16+
17+
def headers(config)
18+
{
19+
'Authorization' => "Bearer #{config.gpustack_api_key}"
20+
}
21+
end
22+
23+
def slug
24+
'gpustack'
25+
end
26+
27+
def local?
28+
true
29+
end
30+
31+
def configuration_requirements
32+
%i[gpustack_api_base gpustack_api_key]
33+
end
34+
end
35+
end
36+
end
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# frozen_string_literal: true
2+
3+
module RubyLLM
4+
module Providers
5+
module GPUStack
6+
# Chat methods of the GPUStack API integration
7+
module Chat
8+
module_function
9+
10+
def format_role(role)
11+
# GPUStack doesn't use the new OpenAI convention for system prompts
12+
role.to_s
13+
end
14+
end
15+
end
16+
end
17+
end
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# frozen_string_literal: true
2+
3+
module RubyLLM
4+
module Providers
5+
module GPUStack
6+
# Models methods of the GPUStack API integration
7+
module Models
8+
module_function
9+
10+
def models_url
11+
'models'
12+
end
13+
14+
def parse_list_models_response(response, slug, _capabilities)
15+
items = response.body['items'] || []
16+
items.map do |model|
17+
Model::Info.new(
18+
id: model['name'],
19+
created_at: model['created_at'] ? Time.parse(model['created_at']) : nil,
20+
display_name: "#{model['source']}/#{model['name']}",
21+
provider: slug,
22+
type: determine_model_type(model),
23+
metadata: {
24+
description: model['description'],
25+
source: model['source'],
26+
huggingface_repo_id: model['huggingface_repo_id'],
27+
ollama_library_model_name: model['ollama_library_model_name'],
28+
backend: model['backend'],
29+
meta: model['meta'],
30+
categories: model['categories']
31+
},
32+
context_window: model.dig('meta', 'n_ctx'),
33+
# Using context window as max tokens since it's not explicitly provided
34+
max_tokens: model.dig('meta', 'n_ctx'),
35+
supports_vision: model.dig('meta', 'support_vision') || false,
36+
supports_functions: model.dig('meta', 'support_tool_calls') || false,
37+
supports_json_mode: true, # Assuming all models support JSON mode
38+
input_price_per_million: 0.0, # Price information not available in new format
39+
output_price_per_million: 0.0 # Price information not available in new format
40+
)
41+
end
42+
end
43+
44+
private
45+
46+
def determine_model_type(model)
47+
return 'embedding' if model['categories']&.include?('embedding')
48+
return 'chat' if model['categories']&.include?('llm')
49+
50+
'other'
51+
end
52+
end
53+
end
54+
end
55+
end

spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gpustack_qwen3_can_handle_multi-turn_conversations.yml

Lines changed: 117 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

spec/fixtures/vcr_cassettes/chat_basic_chat_functionality_gpustack_qwen3_can_have_a_basic_conversation.yml

Lines changed: 49 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)