Skip to content

Commit 92d6572

Browse files
rlazothatfiredev
andauthored
[ALF] Add LiveAPI video sample (#2724)
* [ALF] Add LiveAPI video sample * Actually runs this time * Point to the right screen * feat: Implement camera frame capture and logging - Modified StreamRealtimeVideoScreen to reduce the camera view to half the screen. - Implemented a frame analyzer in CameraView to capture frames once per second. - Added logging for the size of the captured frame's byte array. * Additional changes not captured by the previous commit * Capture audio * Reorg code * Permissions correctly requested * Format new files using ktfmt * Better naming * Suppress unnecessary lints * bump versions * additional lint fixes * Add entries to top level libs.versions.toml * Update libs.versions.toml Use the newest firebase-bom * Update firebaseBom and firebase-ai version references * Update firebase-ai/app/build.gradle.kts Co-authored-by: Rosário P. Fernandes <rosariopf@google.com> * Fix manifest * Update firebase-ai/app/src/main/java/com/google/firebase/quickstart/ai/FirebaseAISamples.kt Co-authored-by: Rosário P. Fernandes <rosariopf@google.com> * Remove duplicated entries Updated versions for firebaseBom and kotlin. * Fixed toml file * Yet another fix to the toml file * Fix error introduced during merge * Missing closing bracket --------- Co-authored-by: Rosário P. Fernandes <rosariopf@google.com>
1 parent 4018b71 commit 92d6572

File tree

13 files changed

+316
-86
lines changed

13 files changed

+316
-86
lines changed

firebase-ai/app/build.gradle.kts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ dependencies {
6767
// Webkit
6868
implementation(libs.androidx.webkit)
6969

70+
// CameraX (for video with the Gemini Live API)
71+
implementation(libs.androidx.camera.core)
72+
implementation(libs.androidx.camera.camera2)
73+
implementation(libs.androidx.camera.lifecycle)
74+
implementation(libs.androidx.camera.view)
75+
implementation(libs.androidx.camera.extensions)
76+
7077
// Material for XML-based theme
7178
implementation(libs.material)
7279

firebase-ai/app/src/main/AndroidManifest.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
<uses-permission android:name="android.permission.RECORD_AUDIO" />
77
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
88
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
9+
<uses-permission android:name="android.permission.CAMERA" />
10+
11+
<uses-feature android:name="android.hardware.camera" />
12+
<uses-feature android:name="android.hardware.microphone" />
13+
914
<application
1015
android:allowBackup="true"
1116
android:dataExtractionRules="@xml/data_extraction_rules"
@@ -29,4 +34,4 @@
2934
</activity>
3035
</application>
3136

32-
</manifest>
37+
</manifest>

firebase-ai/app/src/main/java/com/google/firebase/quickstart/ai/FirebaseAISamples.kt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ val FIREBASE_AI_SAMPLES = listOf(
275275
description = "Use bidirectional streaming to get information about" +
276276
" weather conditions for a specific US city on a specific date",
277277
navRoute = "stream",
278+
backend = GenerativeBackend.vertexAI(),
279+
modelName = "gemini-2.0-flash-live-preview-04-09",
278280
categories = listOf(Category.LIVE_API, Category.AUDIO, Category.FUNCTION_CALLING),
279281
tools = listOf(
280282
Tool.functionDeclarations(
@@ -298,6 +300,36 @@ val FIREBASE_AI_SAMPLES = listOf(
298300
text("What was the weather in Boston, MA on October 17, 2024?")
299301
}
300302
),
303+
Sample(
304+
title = "Gemini Live (Video input)",
305+
description = "Use bidirectional streaming to chat with Gemini using your" +
306+
" phone's camera",
307+
navRoute = "streamVideo",
308+
backend = GenerativeBackend.vertexAI(),
309+
modelName = "gemini-2.0-flash-live-preview-04-09",
310+
categories = listOf(Category.LIVE_API, Category.VIDEO, Category.FUNCTION_CALLING),
311+
tools = listOf(
312+
Tool.functionDeclarations(
313+
listOf(
314+
FunctionDeclaration(
315+
"fetchWeather",
316+
"Get the weather conditions for a specific US city on a specific date.",
317+
mapOf(
318+
"city" to Schema.string("The US city of the location."),
319+
"state" to Schema.string("The US state of the location."),
320+
"date" to Schema.string(
321+
"The date for which to get the weather." +
322+
" Date must be in the format: YYYY-MM-DD."
323+
),
324+
),
325+
)
326+
)
327+
)
328+
),
329+
initialPrompt = content {
330+
text("What was the weather in Boston, MA on October 17, 2024?")
331+
}
332+
),
301333
Sample(
302334
title = "Weather Chat",
303335
description = "Use function calling to get the weather conditions" +

firebase-ai/app/src/main/java/com/google/firebase/quickstart/ai/MainActivity.kt

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.google.firebase.quickstart.ai
22

33
import android.Manifest
4+
import android.annotation.SuppressLint
45
import android.content.pm.PackageManager
56
import android.graphics.Bitmap
67
import android.graphics.BitmapFactory
@@ -31,6 +32,8 @@ import androidx.navigation.compose.rememberNavController
3132
import com.google.firebase.ai.type.toImagenInlineImage
3233
import com.google.firebase.quickstart.ai.feature.live.StreamRealtimeRoute
3334
import com.google.firebase.quickstart.ai.feature.live.StreamRealtimeScreen
35+
import com.google.firebase.quickstart.ai.feature.live.StreamRealtimeVideoRoute
36+
import com.google.firebase.quickstart.ai.feature.live.StreamRealtimeVideoScreen
3437
import com.google.firebase.quickstart.ai.feature.media.imagen.ImagenRoute
3538
import com.google.firebase.quickstart.ai.feature.media.imagen.ImagenScreen
3639
import com.google.firebase.quickstart.ai.feature.text.ChatRoute
@@ -44,10 +47,7 @@ class MainActivity : ComponentActivity() {
4447
@OptIn(ExperimentalMaterial3Api::class)
4548
override fun onCreate(savedInstanceState: Bundle?) {
4649
super.onCreate(savedInstanceState)
47-
if(ContextCompat.checkSelfPermission(this,
48-
Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
49-
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
50-
}
50+
5151
enableEdgeToEdge()
5252
catImage = BitmapFactory.decodeResource(applicationContext.resources, R.drawable.cat)
5353
setContent {
@@ -92,6 +92,9 @@ class MainActivity : ComponentActivity() {
9292
"stream" -> {
9393
navController.navigate(StreamRealtimeRoute(it.id))
9494
}
95+
"streamVideo" -> {
96+
navController.navigate(StreamRealtimeVideoRoute(it.id))
97+
}
9598
"text" -> {
9699
navController.navigate(TextGenRoute(it.id))
97100
}
@@ -107,10 +110,18 @@ class MainActivity : ComponentActivity() {
107110
composable<ImagenRoute> {
108111
ImagenScreen()
109112
}
110-
// Stream Realtime Samples
113+
// The permission is checked by the @RequiresPermission annotation on the
114+
// StreamRealtimeScreen composable.
115+
@SuppressLint("MissingPermission")
111116
composable<StreamRealtimeRoute> {
112117
StreamRealtimeScreen()
113118
}
119+
// The permission is checked by the @RequiresPermission annotation on the
120+
// StreamRealtimeVideoScreen composable.
121+
@SuppressLint("MissingPermission")
122+
composable<StreamRealtimeVideoRoute> {
123+
StreamRealtimeVideoScreen()
124+
}
114125
composable<TextGenRoute> {
115126
TextGenScreen()
116127
}
Lines changed: 36 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,33 @@
1-
package com.google.firebase.quickstart.ai.feature.media.imagen
1+
package com.google.firebase.quickstart.ai.feature.live
22

3-
import android.Manifest
3+
import android.annotation.SuppressLint
44
import android.graphics.Bitmap
5-
import androidx.annotation.RequiresPermission
65
import androidx.lifecycle.SavedStateHandle
76
import androidx.lifecycle.ViewModel
87
import androidx.lifecycle.viewModelScope
98
import androidx.navigation.toRoute
109
import com.google.firebase.Firebase
1110
import com.google.firebase.ai.FirebaseAI
12-
import com.google.firebase.ai.ImagenModel
13-
import com.google.firebase.ai.LiveGenerativeModel
14-
import com.google.firebase.ai.ai
1511
import com.google.firebase.ai.type.FunctionCallPart
1612
import com.google.firebase.ai.type.FunctionResponsePart
17-
import com.google.firebase.ai.type.GenerativeBackend
18-
import com.google.firebase.ai.type.ImagenAspectRatio
19-
import com.google.firebase.ai.type.ImagenImageFormat
20-
import com.google.firebase.ai.type.ImagenPersonFilterLevel
21-
import com.google.firebase.ai.type.ImagenSafetyFilterLevel
22-
import com.google.firebase.ai.type.ImagenSafetySettings
23-
import com.google.firebase.ai.type.InlineDataPart
24-
import com.google.firebase.ai.type.LiveServerContent
25-
import com.google.firebase.ai.type.LiveServerMessage
13+
import com.google.firebase.ai.type.InlineData
2614
import com.google.firebase.ai.type.LiveSession
2715
import com.google.firebase.ai.type.PublicPreviewAPI
2816
import com.google.firebase.ai.type.ResponseModality
2917
import com.google.firebase.ai.type.SpeechConfig
30-
import com.google.firebase.ai.type.TextPart
31-
import com.google.firebase.ai.type.Tool
3218
import com.google.firebase.ai.type.Voice
33-
import com.google.firebase.ai.type.asTextOrNull
34-
import com.google.firebase.ai.type.imagenGenerationConfig
3519
import com.google.firebase.ai.type.liveGenerationConfig
3620
import com.google.firebase.app
3721
import com.google.firebase.quickstart.ai.FIREBASE_AI_SAMPLES
38-
import com.google.firebase.quickstart.ai.feature.live.StreamRealtimeRoute
39-
import com.google.firebase.quickstart.ai.feature.text.functioncalling.WeatherRepository
4022
import com.google.firebase.quickstart.ai.feature.text.functioncalling.WeatherRepository.Companion.fetchWeather
41-
import kotlinx.coroutines.CoroutineScope
42-
import kotlinx.coroutines.Dispatchers
43-
import kotlinx.coroutines.flow.MutableStateFlow
44-
import kotlinx.coroutines.flow.StateFlow
23+
import java.io.ByteArrayOutputStream
4524
import kotlinx.coroutines.launch
4625
import kotlinx.coroutines.runBlocking
4726
import kotlinx.serialization.json.JsonObject
4827
import kotlinx.serialization.json.jsonPrimitive
4928

5029
@OptIn(PublicPreviewAPI::class)
51-
class BidiViewModel(
52-
savedStateHandle: SavedStateHandle
53-
) : ViewModel() {
30+
class BidiViewModel(savedStateHandle: SavedStateHandle) : ViewModel() {
5431
private val sampleId = savedStateHandle.toRoute<StreamRealtimeRoute>().sampleId
5532
private val sample = FIREBASE_AI_SAMPLES.first { it.id == sampleId }
5633

@@ -63,41 +40,54 @@ class BidiViewModel(
6340
// Change this to ContentModality.TEXT if you want text output.
6441
responseModality = ResponseModality.AUDIO
6542
}
43+
6644
@OptIn(PublicPreviewAPI::class)
67-
val liveModel = FirebaseAI.getInstance(Firebase.app, sample.backend).liveModel(
68-
"gemini-live-2.5-flash",
69-
generationConfig = liveGenerationConfig,
70-
tools = sample.tools
71-
)
72-
runBlocking {
73-
liveSession = liveModel.connect()
74-
}
45+
val liveModel =
46+
FirebaseAI.getInstance(Firebase.app, sample.backend)
47+
.liveModel(
48+
modelName = sample.modelName ?: "gemini-live-2.5-flash",
49+
generationConfig = liveGenerationConfig,
50+
tools = sample.tools,
51+
)
52+
runBlocking { liveSession = liveModel.connect() }
7553
}
7654

77-
fun handler(fetchWeatherCall: FunctionCallPart) : FunctionResponsePart {
78-
val response:JsonObject
55+
fun handler(fetchWeatherCall: FunctionCallPart): FunctionResponsePart {
56+
val response: JsonObject
7957
fetchWeatherCall.let {
8058
val city = it.args["city"]?.jsonPrimitive?.content
8159
val state = it.args["state"]?.jsonPrimitive?.content
8260
val date = it.args["date"]?.jsonPrimitive?.content
8361
runBlocking {
84-
response = if(!city.isNullOrEmpty() and !state.isNullOrEmpty() and date.isNullOrEmpty()) {
85-
fetchWeather(city!!, state!!, date!!)
86-
} else {
87-
JsonObject(emptyMap())
88-
}
62+
response =
63+
if (!city.isNullOrEmpty() and !state.isNullOrEmpty() and date.isNullOrEmpty()) {
64+
fetchWeather(city!!, state!!, date!!)
65+
} else {
66+
JsonObject(emptyMap())
67+
}
8968
}
9069
}
91-
return FunctionResponsePart("fetchWeather", response, fetchWeatherCall.id)
70+
return FunctionResponsePart("fetchWeather", response, fetchWeatherCall.id)
9271
}
93-
@RequiresPermission(Manifest.permission.RECORD_AUDIO)
72+
73+
// The permission check is handled by the view that calls this function.
74+
@SuppressLint("MissingPermission")
9475
suspend fun startConversation() {
95-
liveSession.startAudioConversation(::handler)
76+
liveSession.startAudioConversation(::handler)
9677
}
9778

9879
fun endConversation() {
9980
liveSession.stopAudioConversation()
10081
}
10182

83+
fun sendVideoFrame(frame: Bitmap) {
84+
viewModelScope.launch {
85+
// Directly compress the Bitmap to a ByteArray
86+
val byteArrayOutputStream = ByteArrayOutputStream()
87+
frame.compress(Bitmap.CompressFormat.JPEG, 80, byteArrayOutputStream)
88+
val jpegBytes = byteArrayOutputStream.toByteArray()
10289

90+
liveSession.sendVideoRealtime(InlineData(jpegBytes, "image/jpeg"))
91+
}
92+
}
10393
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package com.google.firebase.quickstart.ai.feature.live
2+
3+
import android.annotation.SuppressLint
4+
import android.graphics.Bitmap
5+
import androidx.camera.core.CameraSelector
6+
import androidx.camera.core.ImageAnalysis
7+
import androidx.camera.core.ImageProxy
8+
import androidx.camera.core.Preview
9+
import androidx.camera.lifecycle.ProcessCameraProvider
10+
import androidx.camera.view.PreviewView
11+
import androidx.compose.runtime.Composable
12+
import androidx.compose.runtime.remember
13+
import androidx.compose.ui.Modifier
14+
import androidx.compose.ui.platform.LocalContext
15+
import androidx.compose.ui.platform.LocalLifecycleOwner
16+
import androidx.compose.ui.viewinterop.AndroidView
17+
import androidx.core.content.ContextCompat
18+
import androidx.lifecycle.LifecycleOwner
19+
import kotlin.time.Duration.Companion.seconds
20+
21+
@Composable
22+
fun CameraView(
23+
modifier: Modifier = Modifier,
24+
cameraSelector: CameraSelector = CameraSelector.DEFAULT_BACK_CAMERA,
25+
onFrameCaptured: (Bitmap) -> Unit,
26+
) {
27+
val context = LocalContext.current
28+
val lifecycleOwner = LocalLifecycleOwner.current
29+
val cameraProviderFuture = remember { ProcessCameraProvider.getInstance(context) }
30+
31+
AndroidView(
32+
factory = { ctx ->
33+
val previewView = PreviewView(ctx)
34+
val executor = ContextCompat.getMainExecutor(ctx)
35+
cameraProviderFuture.addListener(
36+
{
37+
val cameraProvider = cameraProviderFuture.get()
38+
bindPreview(
39+
lifecycleOwner,
40+
previewView,
41+
cameraProvider,
42+
cameraSelector,
43+
onFrameCaptured,
44+
)
45+
},
46+
executor,
47+
)
48+
previewView
49+
},
50+
modifier = modifier,
51+
)
52+
}
53+
54+
private fun bindPreview(
55+
lifecycleOwner: LifecycleOwner,
56+
previewView: PreviewView,
57+
cameraProvider: ProcessCameraProvider,
58+
cameraSelector: CameraSelector,
59+
onFrameCaptured: (Bitmap) -> Unit,
60+
) {
61+
val preview =
62+
Preview.Builder().build().also { it.setSurfaceProvider(previewView.surfaceProvider) }
63+
64+
val imageAnalysis =
65+
ImageAnalysis.Builder()
66+
.setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
67+
.build()
68+
.also {
69+
it.setAnalyzer(
70+
ContextCompat.getMainExecutor(previewView.context),
71+
SnapshotFrameAnalyzer(onFrameCaptured),
72+
)
73+
}
74+
75+
cameraProvider.unbindAll()
76+
cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, preview, imageAnalysis)
77+
}
78+
79+
// Calls the [onFrameCaptured] callback with the captured frame every second.
80+
private class SnapshotFrameAnalyzer(private val onFrameCaptured: (Bitmap) -> Unit) :
81+
ImageAnalysis.Analyzer {
82+
private var lastFrameTimestamp = 0L
83+
private val interval = 1.seconds // 1 second
84+
85+
@SuppressLint("UnsafeOptInUsageError")
86+
override fun analyze(image: ImageProxy) {
87+
val currentTimestamp = System.currentTimeMillis()
88+
if (lastFrameTimestamp == 0L) {
89+
lastFrameTimestamp = currentTimestamp
90+
}
91+
92+
if (currentTimestamp - lastFrameTimestamp >= interval.inWholeMilliseconds) {
93+
onFrameCaptured(image.toBitmap())
94+
lastFrameTimestamp = currentTimestamp
95+
}
96+
image.close()
97+
}
98+
}

firebase-ai/app/src/main/java/com/google/firebase/quickstart/ai/feature/live/StreamRealtimeScreen.kt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ import androidx.compose.runtime.remember
3232
import androidx.compose.runtime.mutableStateOf
3333

3434
import androidx.lifecycle.viewmodel.compose.viewModel
35-
import com.google.firebase.quickstart.ai.feature.media.imagen.BidiViewModel
36-
import com.google.firebase.quickstart.ai.feature.media.imagen.ImagenViewModel
3735
import kotlinx.coroutines.CoroutineScope
3836
import kotlinx.coroutines.Dispatchers
3937
import kotlinx.coroutines.launch

0 commit comments

Comments
 (0)