|
| 1 | +/** |
| 2 | + * System context generator for Meeting Minutes real-time translation |
| 3 | + * Analyzes transcript history to generate context for improved translation accuracy |
| 4 | + */ |
| 5 | + |
| 6 | +import { MODELS } from '../../hooks/useModel'; |
| 7 | +import { Transcript, Model } from 'generative-ai-use-cases'; |
| 8 | + |
| 9 | +// Real-time segment interface (minimal subset needed for context generation) |
| 10 | +interface RealtimeSegmentForContext { |
| 11 | + isPartial: boolean; |
| 12 | + transcripts: Transcript[]; |
| 13 | + startTime: number; |
| 14 | +} |
| 15 | + |
| 16 | +// Language code mapping for context generation |
| 17 | +const LANGUAGE_NAME_MAPPING: { [key: string]: string } = { |
| 18 | + 'ja-JP': 'Japanese', |
| 19 | + 'en-US': 'English', |
| 20 | + 'en-GB': 'English', |
| 21 | + 'zh-CN': 'Chinese', |
| 22 | + 'zh-TW': 'Chinese', |
| 23 | + 'ko-KR': 'Korean', |
| 24 | + 'th-TH': 'Thai', |
| 25 | + 'vi-VN': 'Vietnamese', |
| 26 | + // Language family fallbacks |
| 27 | + ja: 'Japanese', |
| 28 | + en: 'English', |
| 29 | + zh: 'Chinese', |
| 30 | + ko: 'Korean', |
| 31 | + th: 'Thai', |
| 32 | + vi: 'Vietnamese', |
| 33 | +}; |
| 34 | + |
| 35 | +/** |
| 36 | + * Get language name from language code |
| 37 | + * @param languageCode - Language code (e.g., 'ja-JP') |
| 38 | + * @returns Language name (e.g., 'Japanese') |
| 39 | + */ |
| 40 | +export const getLanguageNameFromCode = (languageCode: string): string => { |
| 41 | + // Try exact match first |
| 42 | + if (LANGUAGE_NAME_MAPPING[languageCode]) { |
| 43 | + return LANGUAGE_NAME_MAPPING[languageCode]; |
| 44 | + } |
| 45 | + |
| 46 | + // Try language family match (e.g., 'fr-FR' -> 'fr') |
| 47 | + const languageFamily = languageCode.split('-')[0]; |
| 48 | + if (LANGUAGE_NAME_MAPPING[languageFamily]) { |
| 49 | + return LANGUAGE_NAME_MAPPING[languageFamily]; |
| 50 | + } |
| 51 | + |
| 52 | + // Return the language code as-is if no mapping found |
| 53 | + // This allows Bedrock to handle unknown languages gracefully |
| 54 | + return languageCode; |
| 55 | +}; |
| 56 | + |
| 57 | +/** |
| 58 | + * Extract transcript text from segments |
| 59 | + * @param segments - Array of realtime segments |
| 60 | + * @returns Concatenated transcript text |
| 61 | + */ |
| 62 | +export const extractTranscriptText = ( |
| 63 | + segments: RealtimeSegmentForContext[] |
| 64 | +): string => { |
| 65 | + return segments |
| 66 | + .filter((segment) => !segment.isPartial && segment.transcripts.length > 0) |
| 67 | + .sort((a, b) => a.startTime - b.startTime) |
| 68 | + .map((segment) => |
| 69 | + segment.transcripts.map((transcript) => transcript.transcript).join(' ') |
| 70 | + ) |
| 71 | + .join(' ') |
| 72 | + .trim(); |
| 73 | +}; |
| 74 | + |
| 75 | +/** |
| 76 | + * Check if context generation should proceed |
| 77 | + * @param isTranslationEnabled - Whether real-time translation is enabled |
| 78 | + * @param isRecording - Whether currently recording |
| 79 | + * @param segments - Array of segments |
| 80 | + * @returns True if context generation should proceed |
| 81 | + */ |
| 82 | +export const shouldGenerateContext = ( |
| 83 | + isTranslationEnabled: boolean, |
| 84 | + isRecording: boolean, |
| 85 | + segments: RealtimeSegmentForContext[] |
| 86 | +): boolean => { |
| 87 | + if (!isTranslationEnabled || !isRecording || segments.length === 0) { |
| 88 | + return false; |
| 89 | + } |
| 90 | + |
| 91 | + const transcriptText = extractTranscriptText(segments); |
| 92 | + return transcriptText.length >= 50; // Default minimum length |
| 93 | +}; |
| 94 | + |
| 95 | +/** |
| 96 | + * Create system prompt for context generation |
| 97 | + * @param targetLanguageName - Target language name |
| 98 | + * @returns System prompt string |
| 99 | + */ |
| 100 | +export const createContextGenerationPrompt = ( |
| 101 | + targetLanguageName: string |
| 102 | +): string => { |
| 103 | + return `You are an AI assistant that analyzes meeting transcripts to generate context for translation improvement. |
| 104 | +Based on the provided transcript, generate a brief context (2-3 sentences) about what kind of meeting this is, the main topics being discussed, and any technical terms or domain-specific language being used. |
| 105 | +Focus on information that would help improve translation accuracy. |
| 106 | +Respond in ${targetLanguageName}.`; |
| 107 | +}; |
| 108 | + |
| 109 | +/** |
| 110 | + * Generate system context from transcript segments |
| 111 | + * @param segments - Array of realtime segments |
| 112 | + * @param targetLanguage - Target language code for context generation |
| 113 | + * @param predict - Prediction function from useChatApi |
| 114 | + * @returns Promise resolving to generated context or null if failed |
| 115 | + */ |
| 116 | +export const generateSystemContext = async ( |
| 117 | + segments: RealtimeSegmentForContext[], |
| 118 | + targetLanguage: string, |
| 119 | + predict: (params: { |
| 120 | + model: Model; |
| 121 | + messages: Array<{ role: 'system' | 'user'; content: string }>; |
| 122 | + id: string; |
| 123 | + }) => Promise<string> |
| 124 | +): Promise<string | null> => { |
| 125 | + try { |
| 126 | + // Extract and validate transcript text |
| 127 | + const transcriptText = extractTranscriptText(segments); |
| 128 | + if (transcriptText.length < 50) { |
| 129 | + // Default minimum length |
| 130 | + return null; |
| 131 | + } |
| 132 | + |
| 133 | + // Get first available model |
| 134 | + const { modelIds } = MODELS; |
| 135 | + const firstModelId = modelIds[0]; |
| 136 | + |
| 137 | + if (!firstModelId) { |
| 138 | + console.error('No models available for system context generation'); |
| 139 | + return null; |
| 140 | + } |
| 141 | + |
| 142 | + // Dynamically import and get model |
| 143 | + const { findModelByModelId } = await import('../../hooks/useModel'); |
| 144 | + const model = findModelByModelId(firstModelId); |
| 145 | + |
| 146 | + if (!model) { |
| 147 | + console.error('Model not found:', firstModelId); |
| 148 | + return null; |
| 149 | + } |
| 150 | + |
| 151 | + // Prepare messages for context generation |
| 152 | + const targetLanguageName = getLanguageNameFromCode(targetLanguage); |
| 153 | + const systemPrompt = createContextGenerationPrompt(targetLanguageName); |
| 154 | + |
| 155 | + const messages = [ |
| 156 | + { |
| 157 | + role: 'system' as const, |
| 158 | + content: systemPrompt, |
| 159 | + }, |
| 160 | + { |
| 161 | + role: 'user' as const, |
| 162 | + content: `Please analyze this meeting transcript and provide context for translation improvement:\n\n${transcriptText}`, |
| 163 | + }, |
| 164 | + ]; |
| 165 | + |
| 166 | + // Generate context |
| 167 | + const result = await predict({ |
| 168 | + model, |
| 169 | + messages, |
| 170 | + id: '/meeting-context', |
| 171 | + }); |
| 172 | + |
| 173 | + return result.trim(); |
| 174 | + } catch (error) { |
| 175 | + console.error('Failed to generate system context:', error); |
| 176 | + return null; |
| 177 | + } |
| 178 | +}; |
| 179 | + |
| 180 | +/** |
| 181 | + * Create a context generation function with preset target language |
| 182 | + * @param targetLanguage - Target language code |
| 183 | + * @param predict - Prediction function |
| 184 | + * @returns Configured context generation function |
| 185 | + */ |
| 186 | +export const createContextGenerator = ( |
| 187 | + targetLanguage: string, |
| 188 | + predict: (params: { |
| 189 | + model: Model; |
| 190 | + messages: Array<{ role: 'system' | 'user'; content: string }>; |
| 191 | + id: string; |
| 192 | + }) => Promise<string> |
| 193 | +) => { |
| 194 | + return async ( |
| 195 | + segments: RealtimeSegmentForContext[] |
| 196 | + ): Promise<string | null> => { |
| 197 | + return generateSystemContext(segments, targetLanguage, predict); |
| 198 | + }; |
| 199 | +}; |
| 200 | + |
| 201 | +/** |
| 202 | + * Get context from recent segments for translation |
| 203 | + * @param segments - Array of realtime segments |
| 204 | + * @param maxSegments - Maximum number of recent segments to include (default: 10) |
| 205 | + * @returns Recent segments context as string |
| 206 | + */ |
| 207 | +export const getRecentSegmentsContext = ( |
| 208 | + segments: RealtimeSegmentForContext[], |
| 209 | + maxSegments: number = 10 |
| 210 | +): string => { |
| 211 | + const recentSegments = segments |
| 212 | + .filter((segment) => !segment.isPartial && segment.transcripts.length > 0) |
| 213 | + .sort((a, b) => a.startTime - b.startTime) |
| 214 | + .slice(-maxSegments); // Get last N segments |
| 215 | + |
| 216 | + return recentSegments |
| 217 | + .map((segment) => |
| 218 | + segment.transcripts.map((transcript) => transcript.transcript).join(' ') |
| 219 | + ) |
| 220 | + .join(' ') |
| 221 | + .trim(); |
| 222 | +}; |
0 commit comments