import gradio as gr import argparse import functools import numpy as np import torch from infer_contrast import run from utils.reader import load_audio from utils.utility import add_arguments, print_arguments STYLE = """ """ OUTPUT_OK = ( STYLE + """

Speaker1 和 Speaker2

是同一人

相似度为:

{:.1f}%

(相似度为70%以上才能被认为是同一人)
""" ) OUTPUT_FAIL = ( STYLE + """

Speaker1 和 Speaker2

不是同一人

相似度为:

{:.1f}%

(相似度为70%以上才能被认为是同一人)
""" ) THRESHOLD = 0.70 def voiceRecognition(audio1,audio2): score = run(audio1,audio2) if score >= THRESHOLD: output = OUTPUT_OK.format(score * 100) else: output = OUTPUT_FAIL.format(score * 100) return output title = "Voice Recognition" description = "This voice recognition demo(Chinese Format) is a simple implementation based on ResNet. It used ArcFace Loss and an open source Chinese voice corpus - zhvoice." inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"), gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")] output = gr.outputs.HTML(label="") article = ( "

" "💻 Code Repository | " "🎙️ zhvoice Dataset | " "

" ) examples = [ ["samples/李云龙1.wav", "samples/李云龙2.wav"], ["samples/马保国1.wav", "samples/马保国2.wav"], ["samples/周杰伦1.wav", "samples/周杰伦2.wav"], ["samples/海绵宝宝1.wav", "samples/派大星.wav"], ["samples/海绵宝宝1.wav", "samples/海绵宝宝2.wav"], ["samples/周星驰.wav", "samples/吴孟达.wav"]] interface = gr.Interface( fn=voiceRecognition, inputs=inputs, outputs=output, title=title, description=description, examples=examples, examples_per_page=3, article=article, enable_queue=True) interface.launch(debug=True,share=True)