Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from gradio_client import Client # ๊ฐ์ : gradio_client ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ์ฌ์ฉ ๊ฐ๋ฅํ๋ค. | |
# ์ด๋ฏธ์ง ์ธ์ ํ์ดํ๋ผ์ธ ๋ก๋ | |
image_model = pipeline("image-classification", model="google/vit-base-patch16-224") | |
def generate_voice(prompt): | |
# Tango API๋ฅผ ์ฌ์ฉํ์ฌ ์์ฑ ์์ฑ | |
client = Client("https://declare-lab-tango.hf.space/") | |
result = client.predict( | |
prompt, # ์ด๋ฏธ์ง ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ๋กฌํํธ๋ก ์ฌ์ฉ | |
100, # Steps | |
1, # Guidance Scale | |
api_name="/predict" # API ์๋ํฌ์ธํธ ๊ฒฝ๋ก | |
) | |
# Tango API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ | |
# ์: result์์ ์์ฑ ํ์ผ URL ๋๋ ๋ฐ์ดํฐ ์ถ์ถ | |
return result | |
def classify_and_generate_voice(uploaded_image): | |
# ์ด๋ฏธ์ง ๋ถ๋ฅ | |
predictions = image_model(uploaded_image) | |
top_prediction = predictions[0]['label'] # ๊ฐ์ฅ ํ๋ฅ ์ด ๋์ ๋ถ๋ฅ ๊ฒฐ๊ณผ | |
# ์์ฑ ์์ฑ | |
voice_result = generate_voice(top_prediction) | |
# ๋ฐํ๋ ์์ฑ ๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํฐํ์ด์ค๋ก ์ ๋ฌ | |
# ์: voice_result['url'] ๋๋ voice_result['audio_data'] ๋ฑ | |
return top_prediction, voice_result | |
# Gradio ์ธํฐํ์ด์ค ์์ฑ | |
iface = gr.Interface( | |
fn=classify_and_generate_voice, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Label(), gr.Audio()], | |
title="msVision_3", | |
description="์ด๋ฏธ์ง๋ฅผ ์ ๋ก๋ํ๋ฉด, ์ฌ๋ฌผ์ ์ธ์ํ๊ณ ํด๋นํ๋ ์์ฑ์ ์์ฑํฉ๋๋ค.(recognizes the object and generate voice)", | |
examples=["dog.jpg", "cat.jpg"] # ์์ ๋ ๋ถ๋ถ: ์ฝค๋ง ์ถ๊ฐ | |
) | |
# ์ธํฐํ์ด์ค ์คํ | |
iface.launch() | |