import gradio as gr from transformers import pipeline import torch import os pipe = pipeline('audio-classification', model='mrfakename/styletts2-detector', device='cuda' if torch.cuda.is_available() else 'cpu') #pipe_turbo = pipeline('audio-classification', model='mrfakename/styletts2-detector-turbo', device='cuda' if torch.cuda.is_available() else 'cpu', token=os.getenv('HF_TOKEN')) ABOUT = """ # 🤔 Did StyleTTS 2 Generate It? [Model](https://huggingface.co/mrfakename/styletts2-detector) An audio classification model based on Whisper to detect StyleTTS 2 audio. Please share incorrect results in the Community tab! **NOTE: Not affiliated with the author(s) of StyleTTS 2 in any way.** """ DISCLAIMER = """ ## Disclaimer The author(s) of this model cannot guarantee complete accuracy. False positives or negatives may occur. Usage of this model should not replace other precautions, such as invisible watermarking or audio watermarking. This model has been trained on outputs from the StyleTTS 2 base model, not fine-tunes. The model may not identify fine-tunes properly. The author(s) of this model disclaim all liability related to or in connection with the usage of this model. """ def classify(audio, model): if model == "turbo": result = pipe_turbo(audio) else: result = pipe(audio) res = {} for r in result: res[r['label']] = r['score'] return res with gr.Blocks() as demo: gr.Markdown(ABOUT) aud = gr.Audio(label="Upload audio...", interactive=True, type="filepath") #model = gr.Radio(["default", "turbo"], label="Model", info="Which model do you want to use? Default is lightweight and efficient, Turbo is more robust and powerful.", value="default", interactive=True) btn = gr.Button("Classify", variant="primary") res = gr.Label(label="Results...") #btn.click(classify, inputs=[aud, model], outputs=res) btn.click(classify, inputs=[aud], outputs=res) gr.Markdown(DISCLAIMER) demo.queue(default_concurrency_limit=20, max_size=20, api_open=False).launch(show_api=False)