From 690d31870d6c8e4341958ad13c1f5308df6367ab Mon Sep 17 00:00:00 2001 From: Stefan Date: Wed, 12 Jun 2024 11:08:29 +0200 Subject: [PATCH] Clean up the project to be a generic starter template --- app/api/assemblyToken/route.ts | 20 ----- app/api/lemurRequest/route.ts | 31 -------- components/CallLayout.tsx | 131 +-------------------------------- helpers/createMicrophone.ts | 47 ------------ helpers/createTranscriber.ts | 74 ------------------- helpers/getAssemblyToken.ts | 11 --- helpers/mergeBuffers.ts | 6 -- package.json | 1 - yarn.lock | 9 +-- 9 files changed, 2 insertions(+), 328 deletions(-) delete mode 100644 app/api/assemblyToken/route.ts delete mode 100644 app/api/lemurRequest/route.ts delete mode 100644 helpers/createMicrophone.ts delete mode 100644 helpers/createTranscriber.ts delete mode 100644 helpers/getAssemblyToken.ts delete mode 100644 helpers/mergeBuffers.ts diff --git a/app/api/assemblyToken/route.ts b/app/api/assemblyToken/route.ts deleted file mode 100644 index ba87c27..0000000 --- a/app/api/assemblyToken/route.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { AssemblyAI } from 'assemblyai'; - -export async function POST() { - const apiKey = process.env.ASSEMBLY_API_KEY; - if (!apiKey) { - return Response.error(); - } - - const assemblyClient = new AssemblyAI({ apiKey: apiKey }); - - const token = await assemblyClient.realtime.createTemporaryToken({ - expires_in: 3_600_000_000, - }); - - const response = { - token: token, - }; - - return Response.json(response); -} diff --git a/app/api/lemurRequest/route.ts b/app/api/lemurRequest/route.ts deleted file mode 100644 index be55b7c..0000000 --- a/app/api/lemurRequest/route.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { AssemblyAI } from 'assemblyai'; - -export async function POST(request: Request) { - const apiKey = process.env.ASSEMBLY_API_KEY; - if (!apiKey) { - return Response.error(); - } - - const client = new AssemblyAI({ apiKey: apiKey }); - const body = await request.json(); - - const prompt = body?.prompt; - - if (!prompt) { - return Response.error(); - } - const initialPrompt = - 'You act as an assistant during a video call. You get a question and I want you to answer it directly without repeating it. If you do not know the answer, clearly state that.'; - const lemurResponse = await client.lemur.task({ - prompt: initialPrompt, - // My first prompt idea: 'You act as an assistant during a video call. You get a question and I want you to answer it directly without repeating it. If you do not know the answer, clearly state that.', - input_text: prompt, - }); - - const response = { - prompt: prompt, - response: lemurResponse.response, - }; - - return Response.json(response); -} diff --git a/components/CallLayout.tsx b/components/CallLayout.tsx index dca0f17..ee96e8d 100644 --- a/components/CallLayout.tsx +++ b/components/CallLayout.tsx @@ -1,8 +1,5 @@ 'use client'; -import Image from 'next/image'; -import { createMicrophone } from '@/helpers/createMicrophone'; -import { createTranscriber } from '@/helpers/createTranscriber'; import { CallingState } from '@stream-io/video-client'; import { useCallStateHooks, @@ -11,86 +8,12 @@ import { CallControls, } from '@stream-io/video-react-sdk'; import '@stream-io/video-react-sdk/dist/css/styles.css'; -import { useCallback, useEffect, useState } from 'react'; -import robotImage from '../assets/robot.png'; -import llamaImage from '../assets/llama.png'; -import { RealtimeTranscriber } from 'assemblyai'; export default function CallLayout(): JSX.Element { - // Text to display what is transcribed from AssemblyAI - const [transcribedText, setTranscribedText] = useState(''); - const [robotActive, setRobotActive] = useState(false); - const [llamaActive, setLlamaActive] = useState(false); - const [llamaResponse, setLlamaResponse] = useState(''); - const [transcriber, setTranscriber] = useState< - RealtimeTranscriber | undefined - >(undefined); - const [mic, setMic] = useState< - | { - startRecording(onAudioCallback: any): Promise; - stopRecording(): void; - } - | undefined - >(undefined); - // Collecting data from the Stream SDK using hooks - const { useCallCallingState, useParticipantCount, useMicrophoneState } = - useCallStateHooks(); + const { useCallCallingState, useParticipantCount } = useCallStateHooks(); const participantCount = useParticipantCount(); const callingState = useCallCallingState(); - const { mediaStream } = useMicrophoneState(); - - const processPrompt = useCallback(async function processPrompt( - prompt: string - ) { - const response = await fetch('/api/lemurRequest', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ prompt: prompt }), - }); - - const responseBody = await response.json(); - const lemurResponse = responseBody.response; - console.log(lemurResponse); - setLlamaResponse(lemurResponse); - - setTimeout(() => { - setLlamaResponse(''); - setLlamaActive(false); - setTranscribedText(''); - }, 7000); - }, - []); - - const initializeAssemblyAI = useCallback( - async function initializeAssemblyAI() { - const transcriber = await createTranscriber( - setTranscribedText, - setLlamaActive, - processPrompt - ); - - if (!transcriber) { - console.error('Transcriber is not created'); - return; - } - await transcriber.connect(); - - if (!mediaStream) { - console.error('No media stream found'); - return; - } - const mic = createMicrophone(mediaStream); - console.log('Mic: ', mic, ', starting recording'); - mic.startRecording((audioData: any) => { - // console.log('[Option 2] Audio data: ', audioData); - transcriber.sendAudio(audioData); - }); - setMic(mic); - setTranscriber(transcriber); - }, - [mediaStream, processPrompt] - ); if (callingState !== CallingState.JOINED) { return ( @@ -105,62 +28,10 @@ export default function CallLayout(): JSX.Element {

Participants: {participantCount}

- {llamaResponse && ( -
- {llamaResponse} -
- )} -
-

- {transcribedText} -

-
-
- llama -
-
); - - async function switchRobot(isActive: boolean) { - if (isActive) { - console.log('Robot is active'); - mic?.stopRecording(); - await transcriber?.close(false); - setMic(undefined); - setTranscriber(undefined); - setRobotActive(false); - } else { - console.log('Robot is inactive'); - await initializeAssemblyAI(); - console.log('Initialized Assembly AI'); - setRobotActive(true); - } - } } diff --git a/helpers/createMicrophone.ts b/helpers/createMicrophone.ts deleted file mode 100644 index 0fd80bc..0000000 --- a/helpers/createMicrophone.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { mergeBuffers } from './mergeBuffers'; - -export function createMicrophone(stream: MediaStream) { - let audioWorkletNode; - let audioContext: AudioContext; - let source; - let audioBufferQueue = new Int16Array(0); - return { - async startRecording(onAudioCallback: any) { - audioContext = new AudioContext({ - sampleRate: 16_000, - latencyHint: 'balanced', - }); - source = audioContext.createMediaStreamSource(stream); - - await audioContext.audioWorklet.addModule('audio-processor.js'); - audioWorkletNode = new AudioWorkletNode(audioContext, 'audio-processor'); - - source.connect(audioWorkletNode); - audioWorkletNode.connect(audioContext.destination); - audioWorkletNode.port.onmessage = (event) => { - const currentBuffer = new Int16Array(event.data.audio_data); - audioBufferQueue = mergeBuffers(audioBufferQueue, currentBuffer); - - const bufferDuration = - (audioBufferQueue.length / audioContext.sampleRate) * 1000; - - // wait until we have 100ms of audio data - if (bufferDuration >= 100) { - const totalSamples = Math.floor(audioContext.sampleRate * 0.1); - - const finalBuffer = new Uint8Array( - audioBufferQueue.subarray(0, totalSamples).buffer - ); - - audioBufferQueue = audioBufferQueue.subarray(totalSamples); - if (onAudioCallback) onAudioCallback(finalBuffer); - } - }; - }, - stopRecording() { - stream?.getTracks().forEach((track) => track.stop()); - audioContext?.close(); - audioBufferQueue = new Int16Array(0); - }, - }; -} diff --git a/helpers/createTranscriber.ts b/helpers/createTranscriber.ts deleted file mode 100644 index d3232dc..0000000 --- a/helpers/createTranscriber.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { RealtimeTranscriber, RealtimeTranscript } from 'assemblyai'; -import { getAssemblyToken } from './getAssemblyToken'; -import { Dispatch, SetStateAction } from 'react'; - -export async function createTranscriber( - setTranscribedText: Dispatch>, - setLlamaActive: Dispatch>, - processPrompt: (prompt: string) => void -): Promise { - const token = await getAssemblyToken(); - console.log('Assembly token: ', token); - if (!token) { - console.error('No token found'); - return; - } - const transcriber = new RealtimeTranscriber({ - sampleRate: 16_000, - token: token, - wordBoost: ['Llama'], - endUtteranceSilenceThreshold: 1000, - // encoding: 'pcm_mulaw', - }); - - transcriber.on('open', ({ sessionId }) => { - console.log(`Transcriber opened with session ID: ${sessionId}`); - }); - - transcriber.on('error', (error: Error) => { - console.error('Transcriber error:', error); - // TODO: close transcriber - // await transcriber.close(); - }); - - transcriber.on('close', (code: number, reason: string) => { - console.log(`Transcriber closed with code ${code} and reason: ${reason}`); - // TODO: clean up - // transcriber = null; - }); - - const texts: any = {}; - transcriber.on('transcript', (transcript: RealtimeTranscript) => { - if (!transcript.text) { - // console.error('Transcript is empty'); - return; - } - - // Detect if we're asking something for the LLM - setLlamaActive(transcript.text.toLowerCase().indexOf('llama') > 0); - - if (transcript.message_type === 'PartialTranscript') { - // console.log('[Transcript] Partial:', transcript.text); - let msg = ''; - texts[transcript.audio_start] = transcript.text; - const keys = Object.keys(texts); - // keys.sort((a, b) => a - b); - for (const key of keys) { - if (texts[key]) { - msg += ` ${texts[key]}`; - } - } - console.log('[Transcript] Msg: ', msg); - setTranscribedText(transcript.text); - } else { - console.log('[Transcript] Final:', transcript.text); - setTranscribedText(transcript.text); - if (transcript.text.toLowerCase().indexOf('llama') > 0) { - console.log('Setting prompt to: ', transcript.text); - processPrompt(transcript.text); - } - } - }); - - return transcriber; -} diff --git a/helpers/getAssemblyToken.ts b/helpers/getAssemblyToken.ts deleted file mode 100644 index a41426d..0000000 --- a/helpers/getAssemblyToken.ts +++ /dev/null @@ -1,11 +0,0 @@ -export async function getAssemblyToken(): Promise { - const response = await fetch('/api/assemblyToken', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - cache: 'no-store', - }); - - const responseBody = await response.json(); - const token = responseBody.token; - return token; -} diff --git a/helpers/mergeBuffers.ts b/helpers/mergeBuffers.ts deleted file mode 100644 index 34c563c..0000000 --- a/helpers/mergeBuffers.ts +++ /dev/null @@ -1,6 +0,0 @@ -export function mergeBuffers(lhs: any, rhs: any) { - const mergedBuffer = new Int16Array(lhs.length + rhs.length); - mergedBuffer.set(lhs, 0); - mergedBuffer.set(rhs, lhs.length); - return mergedBuffer; -} diff --git a/package.json b/package.json index 7e87777..9ecd109 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,6 @@ "dependencies": { "@stream-io/node-sdk": "^0.1.13", "@stream-io/video-react-sdk": "^0.6.10", - "assemblyai": "^4.4.1", "next": "14.2.2", "react": "^18", "react-dom": "^18" diff --git a/yarn.lock b/yarn.lock index 836fec1..4ca16f7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -605,13 +605,6 @@ arraybuffer.prototype.slice@^1.0.3: is-array-buffer "^3.0.4" is-shared-array-buffer "^1.0.2" -assemblyai@^4.4.1: - version "4.4.1" - resolved "https://registry.yarnpkg.com/assemblyai/-/assemblyai-4.4.1.tgz#9534a604cc189377fcea6d8149427483367c5da5" - integrity sha512-GzpbSRhZ0VlHNTQTOKxOrod3Ckw4AZVQQlnE5sJeCuAhTXJLLi5SE6U4vOapTsxf3lO7+93tdNTlWBrkwYO4Nw== - dependencies: - ws "^8.16.0" - ast-types-flow@^0.0.8: version "0.0.8" resolved "https://registry.yarnpkg.com/ast-types-flow/-/ast-types-flow-0.0.8.tgz#0a85e1c92695769ac13a428bb653e7538bea27d6" @@ -3039,7 +3032,7 @@ wrappy@1: resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ== -ws@^8.14.2, ws@^8.16.0: +ws@^8.14.2: version "8.16.0" resolved "https://registry.yarnpkg.com/ws/-/ws-8.16.0.tgz#d1cd774f36fbc07165066a60e40323eab6446fd4" integrity sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==