Spaces:
Runtime error
Runtime error
allow uer to enable / disable multilingual feature on whiper
Browse files
app.py
CHANGED
|
@@ -143,7 +143,7 @@ def get_diarization_pipe():
|
|
| 143 |
|
| 144 |
|
| 145 |
# —————— Whisper Transcription ——————
|
| 146 |
-
def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
|
| 147 |
"""
|
| 148 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CPU.
|
| 149 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
|
@@ -172,6 +172,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
|
|
| 172 |
language=None if language == "auto" else language,
|
| 173 |
vad_filter=True,
|
| 174 |
batch_size=16,
|
|
|
|
| 175 |
)
|
| 176 |
os.unlink(tmp.name)
|
| 177 |
text = converter.convert("".join(s.text for s in segments).strip())
|
|
@@ -182,7 +183,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path):
|
|
| 182 |
|
| 183 |
|
| 184 |
@spaces.GPU
|
| 185 |
-
def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
|
| 186 |
"""
|
| 187 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CUDA.
|
| 188 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
|
@@ -212,6 +213,7 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
|
|
| 212 |
language=None if language == "auto" else language,
|
| 213 |
vad_filter=True,
|
| 214 |
batch_size=16,
|
|
|
|
| 215 |
)
|
| 216 |
os.unlink(tmp.name)
|
| 217 |
text = converter.convert("".join(s.text for s in segments).strip())
|
|
@@ -219,12 +221,12 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path):
|
|
| 219 |
yield "", format_diarization_html(snippets)
|
| 220 |
return
|
| 221 |
|
| 222 |
-
def transcribe_fwhisper_stream(model_id, language, audio_path, device_sel):
|
| 223 |
"""Dispatch to CPU or GPU streaming generators, preserving two-value yields."""
|
| 224 |
if device_sel == "GPU" and torch.cuda.is_available():
|
| 225 |
-
yield from _transcribe_fwhisper_gpu_stream(model_id, language, audio_path)
|
| 226 |
else:
|
| 227 |
-
yield from _transcribe_fwhisper_cpu_stream(model_id, language, audio_path)
|
| 228 |
|
| 229 |
# —————— SenseVoice Transcription ——————
|
| 230 |
def _transcribe_sense_cpu_stream(model_id: str, language: str, audio_path: str,
|
|
@@ -324,6 +326,7 @@ with Demo:
|
|
| 324 |
whisper_dd = gr.Dropdown(choices=WHISPER_MODELS, value=WHISPER_MODELS[0], label="Whisper Model")
|
| 325 |
whisper_lang = gr.Dropdown(choices=WHISPER_LANGUAGES, value="auto", label="Whisper Language")
|
| 326 |
device_radio = gr.Radio(choices=["GPU","CPU"], value="GPU", label="Device")
|
|
|
|
| 327 |
btn_w = gr.Button("Transcribe with Faster-Whisper")
|
| 328 |
|
| 329 |
with gr.Column():
|
|
@@ -353,7 +356,7 @@ with Demo:
|
|
| 353 |
# wire the callbacks into those shared boxes
|
| 354 |
btn_w.click(
|
| 355 |
fn=transcribe_fwhisper_stream,
|
| 356 |
-
inputs=[whisper_dd, whisper_lang, audio_input, device_radio],
|
| 357 |
outputs=[out_w, out_w_d]
|
| 358 |
)
|
| 359 |
btn_s.click(
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
# —————— Whisper Transcription ——————
|
| 146 |
+
def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_multilingual_en):
|
| 147 |
"""
|
| 148 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CPU.
|
| 149 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
|
|
|
| 172 |
language=None if language == "auto" else language,
|
| 173 |
vad_filter=True,
|
| 174 |
batch_size=16,
|
| 175 |
+
multilingual=whisper_multilingual_en,
|
| 176 |
)
|
| 177 |
os.unlink(tmp.name)
|
| 178 |
text = converter.convert("".join(s.text for s in segments).strip())
|
|
|
|
| 183 |
|
| 184 |
|
| 185 |
@spaces.GPU
|
| 186 |
+
def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_multilingual_en):
|
| 187 |
"""
|
| 188 |
Generator-based streaming transcription with accumulation using Faster-Whisper on CUDA.
|
| 189 |
Yields (accumulated_text, diar_html) tuples for Gradio streaming.
|
|
|
|
| 213 |
language=None if language == "auto" else language,
|
| 214 |
vad_filter=True,
|
| 215 |
batch_size=16,
|
| 216 |
+
multilingual=whisper_multilingual_en,
|
| 217 |
)
|
| 218 |
os.unlink(tmp.name)
|
| 219 |
text = converter.convert("".join(s.text for s in segments).strip())
|
|
|
|
| 221 |
yield "", format_diarization_html(snippets)
|
| 222 |
return
|
| 223 |
|
| 224 |
+
def transcribe_fwhisper_stream(model_id, language, audio_path, device_sel, whisper_multilingual_en):
|
| 225 |
"""Dispatch to CPU or GPU streaming generators, preserving two-value yields."""
|
| 226 |
if device_sel == "GPU" and torch.cuda.is_available():
|
| 227 |
+
yield from _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_multilingual_en)
|
| 228 |
else:
|
| 229 |
+
yield from _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_multilingual_en)
|
| 230 |
|
| 231 |
# —————— SenseVoice Transcription ——————
|
| 232 |
def _transcribe_sense_cpu_stream(model_id: str, language: str, audio_path: str,
|
|
|
|
| 326 |
whisper_dd = gr.Dropdown(choices=WHISPER_MODELS, value=WHISPER_MODELS[0], label="Whisper Model")
|
| 327 |
whisper_lang = gr.Dropdown(choices=WHISPER_LANGUAGES, value="auto", label="Whisper Language")
|
| 328 |
device_radio = gr.Radio(choices=["GPU","CPU"], value="GPU", label="Device")
|
| 329 |
+
whisper_multilingual_en = gr.Checkbox(label="Multilingual", value=True)
|
| 330 |
btn_w = gr.Button("Transcribe with Faster-Whisper")
|
| 331 |
|
| 332 |
with gr.Column():
|
|
|
|
| 356 |
# wire the callbacks into those shared boxes
|
| 357 |
btn_w.click(
|
| 358 |
fn=transcribe_fwhisper_stream,
|
| 359 |
+
inputs=[whisper_dd, whisper_lang, audio_input, device_radio, whisper_multilingual_en],
|
| 360 |
outputs=[out_w, out_w_d]
|
| 361 |
)
|
| 362 |
btn_s.click(
|