Skip to content

Commit 4a8a5f5

Browse files
arnodirlamaltxtech
authored andcommitted
Add models supports_video? helper; Docs: Clarify vision vs video support
1 parent a218e70 commit 4a8a5f5

File tree

4 files changed

+18
-3
lines changed

4 files changed

+18
-3
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,14 @@ RubyLLM gives you one beautiful API for all of them. Same interface whether you'
3434

3535
```ruby
3636
# Just ask questions
37-
chat = RubyLLM.chat
37+
chat = RubyLLM.chat(model: "gemini-2.0-flash")
3838
chat.ask "What's the best way to learn Ruby?"
3939
```
4040

4141
```ruby
4242
# Analyze any file type
4343
chat.ask "What's in this image?", with: "ruby_conf.jpg"
44+
chat.ask "What's happening in this video?", with: "presentation.mp4"
4445
chat.ask "Describe this meeting", with: "meeting.wav"
4546
chat.ask "Summarize this document", with: "contract.pdf"
4647
chat.ask "Explain this code", with: "app.rb"

lib/ruby_llm/model/info.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ def supports_vision?
5656
modalities.input.include?('image')
5757
end
5858

59+
def supports_video?
60+
modalities.input.include?('video')
61+
end
62+
5963
def supports_functions?
6064
function_calling?
6165
end

lib/ruby_llm/providers/gemini/capabilities.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ def supports_vision?(model_id)
5252
model_id.match?(/gemini|flash|pro|imagen/)
5353
end
5454

55+
def supports_video?(model_id)
56+
model_id.match?(/gemini/)
57+
end
58+
5559
def supports_functions?(model_id)
5660
return false if model_id.match?(/text-embedding|embedding-001|aqa|flash-lite|imagen|gemini-2\.0-flash-lite/)
5761

@@ -214,10 +218,10 @@ def modalities_for(model_id)
214218

215219
if supports_vision?(model_id)
216220
modalities[:input] << 'image'
217-
modalities[:input] << 'video'
218221
modalities[:input] << 'pdf'
219222
end
220223

224+
modalities[:input] << 'video' if supports_video?(model_id)
221225
modalities[:input] << 'audio' if model_id.match?(/audio/)
222226
modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
223227
modalities[:output] = ['image'] if model_id.match?(/imagen/)

spec/ruby_llm/models_spec.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,17 @@
3636

3737
# There should be models from at least OpenAI and Anthropic
3838
expect(provider_counts.keys).to include('openai', 'anthropic')
39+
end
3940

40-
# Select only models with vision support
41+
it 'filters by vision support' do
4142
vision_models = RubyLLM.models.select(&:supports_vision?)
4243
expect(vision_models).to all(have_attributes(supports_vision?: true))
4344
end
45+
46+
it 'filters by video support' do
47+
video_models = RubyLLM.models.select(&:supports_video?)
48+
expect(video_models).to all(have_attributes(supports_video?: true))
49+
end
4450
end
4551

4652
describe 'finding models' do

0 commit comments

Comments
 (0)