Skip to content

Commit ee21b20

Browse files
Sugi275sugusugiaws
andauthored
議事録生成機能に含まれる、リアルタイム翻訳機能の改善 (#1289)
Co-authored-by: sugusugi <sugusugi@amazon.co.jp>
1 parent b61a3a6 commit ee21b20

File tree

11 files changed

+1672
-380
lines changed

11 files changed

+1672
-380
lines changed

packages/web/public/locales/translation/en.yaml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ chat:
4040
title: Chat
4141
view_prompt_examples: View Prompt Examples
4242
common:
43+
arrow:
4344
cancel: Cancel
4445
clear: Clear
4546
close: Close
@@ -679,6 +680,7 @@ mcp_chat:
679680
title: MCP Chat
680681
meetingMinutes:
681682
auto_generate: Auto Generate
683+
bidirectional: Bidirectional Translation
682684
clear_minutes: Clear Minutes
683685
custom_prompt: Custom Prompt
684686
custom_prompt_placeholder: Enter your custom prompt for generating meeting minutes...
@@ -695,6 +697,8 @@ meetingMinutes:
695697
generation_frequency: Generation Frequency
696698
generation_success: Meeting minutes generated successfully
697699
language: Transcription Language
700+
language_1: Language 1
701+
language_2: Language 2
698702
language_auto: Auto Detect
699703
language_chinese: Chinese
700704
language_english: English
@@ -720,8 +724,14 @@ meetingMinutes:
720724
style_newspaper: Newspaper
721725
style_summary: Summary
722726
style_transcription: Transcription
727+
target_language: Real-time Translation Language
723728
title: Meeting Minutes Generator
724729
transcript: Transcript
730+
transcription_language: Transcription Language
731+
translation_language: Translation Language
732+
translation_model: Real-time Translation Model
733+
translation_type: Translation Type
734+
unidirectional: Unidirectional Translation
725735
model:
726736
parameters:
727737
reasoning_budget: Token budget for extended thinking
@@ -886,20 +896,21 @@ transcribe:
886896
stop_recording: Stop Recording
887897
supported_files: mp3, mp4, wav, flac, ogg, amr, webm, m4a files are available
888898
title: Speech Recognition
899+
voice_transcription: Voice Transcription
889900
translate:
890901
additional_context: Additional context
891902
additional_context_placeholder: You can enter additional points to consider (e.g., casualness, etc.)
892903
auto_detect_language: Auto detect language
893904
auto_translate: Auto translate
894905
contextHelp: Context information helps improve translation accuracy by providing background information about the meeting.
895906
continue_output: Continue output
907+
detectedLanguage: Detected Language
896908
enter_text: Enter text
897-
model: Real-time Translation Model
898909
realtimeTranslation: Real-time Translation
910+
realtime_translation: Real-time Translation
899911
result_placeholder: Translation results will be displayed here
900912
systemGeneratedContext: System-generated Context
901913
systemGeneratedContextPlaceholder: Auto-generated context based on the meeting content
902-
target_language: Real-time Translation Language
903914
text_to_translate: Text to translate
904915
title: Translate
905916
translating: Translating...

packages/web/public/locales/translation/ja.yaml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ chat:
3838
title: チャット
3939
view_prompt_examples: プロンプト例を見る
4040
common:
41+
arrow:
4142
cancel: キャンセル
4243
clear: クリア
4344
close: 閉じる
@@ -535,6 +536,7 @@ mcp_chat:
535536
title: MCP チャット
536537
meetingMinutes:
537538
auto_generate: 自動生成
539+
bidirectional: 双方向翻訳
538540
clear_minutes: 議事録をクリア
539541
custom_prompt: カスタムプロンプト
540542
custom_prompt_placeholder: 議事録生成用のカスタムプロンプトを入力してください...
@@ -552,6 +554,8 @@ meetingMinutes:
552554
generation_frequency: 生成頻度
553555
generation_success: 議事録が正常に生成されました
554556
language: 文字起こし言語
557+
language_1: 言語1
558+
language_2: 言語2
555559
language_auto: 自動検出
556560
language_chinese: 中国語
557561
language_english: 英語
@@ -577,8 +581,14 @@ meetingMinutes:
577581
style_newspaper: 新聞
578582
style_summary: 要約
579583
style_transcription: 文字起こし
584+
target_language: 翻訳言語
580585
title: 議事録生成
581586
transcript: 文字起こし
587+
transcription_language: 文字起こし言語
588+
translation_language: 翻訳言語
589+
translation_model: リアルタイム翻訳モデル
590+
translation_type: 翻訳方式
591+
unidirectional: 片方向翻訳
582592
model:
583593
parameters:
584594
reasoning_budget: 深く考えるトークン数上限
@@ -718,20 +728,21 @@ transcribe:
718728
stop_recording: 録音を停止する
719729
supported_files: mp3, mp4, wav, flac, ogg, amr, webm, m4a ファイルが利用可能です
720730
title: 音声認識
731+
voice_transcription: 音声文字起こし
721732
translate:
722733
additional_context: 追加コンテキスト
723734
additional_context_placeholder: 追加で考慮してほしい点を入力することができます(カジュアルさ等)
724735
auto_detect_language: 言語を自動検出
725736
auto_translate: 自動翻訳
726737
contextHelp: コンテキスト情報は会議の背景情報を提供することで翻訳精度を向上させます。
727738
continue_output: 続きを出力
739+
detectedLanguage: 検出言語
728740
enter_text: 入力してください
729-
model: リアルタイム翻訳モデル
730741
realtimeTranslation: リアルタイム翻訳
742+
realtime_translation: リアルタイム翻訳
731743
result_placeholder: 翻訳結果がここに表示されます
732744
systemGeneratedContext: システム自動コンテキスト
733745
systemGeneratedContextPlaceholder: 会議内容に基づいて自動生成されたコンテキスト
734-
target_language: リアルタイム翻訳言語
735746
text_to_translate: 翻訳したい文章
736747
title: 翻訳
737748
translating: 翻訳中...
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/**
2+
* System context generator for Meeting Minutes real-time translation
3+
* Analyzes transcript history to generate context for improved translation accuracy
4+
*/
5+
6+
import { MODELS } from '../../hooks/useModel';
7+
import { Transcript, Model } from 'generative-ai-use-cases';
8+
9+
// Real-time segment interface (minimal subset needed for context generation)
10+
interface RealtimeSegmentForContext {
11+
isPartial: boolean;
12+
transcripts: Transcript[];
13+
startTime: number;
14+
}
15+
16+
// Language code mapping for context generation
17+
const LANGUAGE_NAME_MAPPING: { [key: string]: string } = {
18+
'ja-JP': 'Japanese',
19+
'en-US': 'English',
20+
'en-GB': 'English',
21+
'zh-CN': 'Chinese',
22+
'zh-TW': 'Chinese',
23+
'ko-KR': 'Korean',
24+
'th-TH': 'Thai',
25+
'vi-VN': 'Vietnamese',
26+
// Language family fallbacks
27+
ja: 'Japanese',
28+
en: 'English',
29+
zh: 'Chinese',
30+
ko: 'Korean',
31+
th: 'Thai',
32+
vi: 'Vietnamese',
33+
};
34+
35+
/**
36+
* Get language name from language code
37+
* @param languageCode - Language code (e.g., 'ja-JP')
38+
* @returns Language name (e.g., 'Japanese')
39+
*/
40+
export const getLanguageNameFromCode = (languageCode: string): string => {
41+
// Try exact match first
42+
if (LANGUAGE_NAME_MAPPING[languageCode]) {
43+
return LANGUAGE_NAME_MAPPING[languageCode];
44+
}
45+
46+
// Try language family match (e.g., 'fr-FR' -> 'fr')
47+
const languageFamily = languageCode.split('-')[0];
48+
if (LANGUAGE_NAME_MAPPING[languageFamily]) {
49+
return LANGUAGE_NAME_MAPPING[languageFamily];
50+
}
51+
52+
// Return the language code as-is if no mapping found
53+
// This allows Bedrock to handle unknown languages gracefully
54+
return languageCode;
55+
};
56+
57+
/**
58+
* Extract transcript text from segments
59+
* @param segments - Array of realtime segments
60+
* @returns Concatenated transcript text
61+
*/
62+
export const extractTranscriptText = (
63+
segments: RealtimeSegmentForContext[]
64+
): string => {
65+
return segments
66+
.filter((segment) => !segment.isPartial && segment.transcripts.length > 0)
67+
.sort((a, b) => a.startTime - b.startTime)
68+
.map((segment) =>
69+
segment.transcripts.map((transcript) => transcript.transcript).join(' ')
70+
)
71+
.join(' ')
72+
.trim();
73+
};
74+
75+
/**
76+
* Check if context generation should proceed
77+
* @param isTranslationEnabled - Whether real-time translation is enabled
78+
* @param isRecording - Whether currently recording
79+
* @param segments - Array of segments
80+
* @returns True if context generation should proceed
81+
*/
82+
export const shouldGenerateContext = (
83+
isTranslationEnabled: boolean,
84+
isRecording: boolean,
85+
segments: RealtimeSegmentForContext[]
86+
): boolean => {
87+
if (!isTranslationEnabled || !isRecording || segments.length === 0) {
88+
return false;
89+
}
90+
91+
const transcriptText = extractTranscriptText(segments);
92+
return transcriptText.length >= 50; // Default minimum length
93+
};
94+
95+
/**
96+
* Create system prompt for context generation
97+
* @param targetLanguageName - Target language name
98+
* @returns System prompt string
99+
*/
100+
export const createContextGenerationPrompt = (
101+
targetLanguageName: string
102+
): string => {
103+
return `You are an AI assistant that analyzes meeting transcripts to generate context for translation improvement.
104+
Based on the provided transcript, generate a brief context (2-3 sentences) about what kind of meeting this is, the main topics being discussed, and any technical terms or domain-specific language being used.
105+
Focus on information that would help improve translation accuracy.
106+
Respond in ${targetLanguageName}.`;
107+
};
108+
109+
/**
110+
* Generate system context from transcript segments
111+
* @param segments - Array of realtime segments
112+
* @param targetLanguage - Target language code for context generation
113+
* @param predict - Prediction function from useChatApi
114+
* @returns Promise resolving to generated context or null if failed
115+
*/
116+
export const generateSystemContext = async (
117+
segments: RealtimeSegmentForContext[],
118+
targetLanguage: string,
119+
predict: (params: {
120+
model: Model;
121+
messages: Array<{ role: 'system' | 'user'; content: string }>;
122+
id: string;
123+
}) => Promise<string>
124+
): Promise<string | null> => {
125+
try {
126+
// Extract and validate transcript text
127+
const transcriptText = extractTranscriptText(segments);
128+
if (transcriptText.length < 50) {
129+
// Default minimum length
130+
return null;
131+
}
132+
133+
// Get first available model
134+
const { modelIds } = MODELS;
135+
const firstModelId = modelIds[0];
136+
137+
if (!firstModelId) {
138+
console.error('No models available for system context generation');
139+
return null;
140+
}
141+
142+
// Dynamically import and get model
143+
const { findModelByModelId } = await import('../../hooks/useModel');
144+
const model = findModelByModelId(firstModelId);
145+
146+
if (!model) {
147+
console.error('Model not found:', firstModelId);
148+
return null;
149+
}
150+
151+
// Prepare messages for context generation
152+
const targetLanguageName = getLanguageNameFromCode(targetLanguage);
153+
const systemPrompt = createContextGenerationPrompt(targetLanguageName);
154+
155+
const messages = [
156+
{
157+
role: 'system' as const,
158+
content: systemPrompt,
159+
},
160+
{
161+
role: 'user' as const,
162+
content: `Please analyze this meeting transcript and provide context for translation improvement:\n\n${transcriptText}`,
163+
},
164+
];
165+
166+
// Generate context
167+
const result = await predict({
168+
model,
169+
messages,
170+
id: '/meeting-context',
171+
});
172+
173+
return result.trim();
174+
} catch (error) {
175+
console.error('Failed to generate system context:', error);
176+
return null;
177+
}
178+
};
179+
180+
/**
181+
* Create a context generation function with preset target language
182+
* @param targetLanguage - Target language code
183+
* @param predict - Prediction function
184+
* @returns Configured context generation function
185+
*/
186+
export const createContextGenerator = (
187+
targetLanguage: string,
188+
predict: (params: {
189+
model: Model;
190+
messages: Array<{ role: 'system' | 'user'; content: string }>;
191+
id: string;
192+
}) => Promise<string>
193+
) => {
194+
return async (
195+
segments: RealtimeSegmentForContext[]
196+
): Promise<string | null> => {
197+
return generateSystemContext(segments, targetLanguage, predict);
198+
};
199+
};
200+
201+
/**
202+
* Get context from recent segments for translation
203+
* @param segments - Array of realtime segments
204+
* @param maxSegments - Maximum number of recent segments to include (default: 10)
205+
* @returns Recent segments context as string
206+
*/
207+
export const getRecentSegmentsContext = (
208+
segments: RealtimeSegmentForContext[],
209+
maxSegments: number = 10
210+
): string => {
211+
const recentSegments = segments
212+
.filter((segment) => !segment.isPartial && segment.transcripts.length > 0)
213+
.sort((a, b) => a.startTime - b.startTime)
214+
.slice(-maxSegments); // Get last N segments
215+
216+
return recentSegments
217+
.map((segment) =>
218+
segment.transcripts.map((transcript) => transcript.transcript).join(' ')
219+
)
220+
.join(' ')
221+
.trim();
222+
};

0 commit comments

Comments
 (0)