audiototext / app.py
zhuohany1206's picture
提交信息
b18d8c0
import gradio as gr
import torch
import soundfile as sf
from transformers import AutoProcessor
from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM
# 模型名称
MODEL_NAME = "Qwen/Qwen2-Audio-7B"
# 选择设备
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# 加载处理器和模型
processor = AutoProcessor.from_pretrained(MODEL_NAME)
model = Qwen2ForCausalLM.from_pretrained(MODEL_NAME)
model.to(DEVICE)
model.eval()
# 转文字函数
def transcribe(audio_file):
try:
# 读取音频
speech, rate = sf.read(audio_file.name)
# 处理输入 - Qwen2-Audio使用audios参数
inputs = processor(audios=speech, sampling_rate=rate, return_tensors="pt").to(DEVICE)
# 推理生成
generated_ids = model.generate(**inputs, max_length=512)
# 解码
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
return transcription[0]
except Exception as e:
return f"出错了: {e}"
# Gradio 界面
with gr.Blocks() as demo:
gr.Markdown("## Qwen2-Audio 7B 音频转文字 Demo")
with gr.Row():
audio_input = gr.Audio(source="upload", type="file", label="上传音频文件")
output_text = gr.Textbox(label="识别结果")
transcribe_btn = gr.Button("开始转文字")
transcribe_btn.click(transcribe, inputs=audio_input, outputs=output_text)
# 启动
demo.launch()