Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from gradio_client import Client | |
# ์ด๋ฏธ์ง ์ธ์ ํ์ดํ๋ผ์ธ ๋ก๋ | |
image_model = pipeline("image-classification", model="google/vit-base-patch16-224") | |
def generate_music(prompt): | |
# Initialize the client with your API endpoint | |
client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/") | |
# Call the predict method with the correct parameters | |
result = client.predict( | |
prompt, # The main text input for your music generation | |
5, # Duration in seconds | |
0, # Guidance scale | |
5, # Seed for generating music | |
1, # Number of waveforms to generate | |
api_name="/text2audio" # Specify the API name if required | |
) | |
# Assuming the result includes the information you need directly | |
print(result) | |
return result | |
# Example usage | |
prompt = "A serene and peaceful melody to relax." | |
music_result = generate_music(prompt) | |
def generate_voice(prompt): | |
# Tango API๋ฅผ ์ฌ์ฉํ์ฌ ์์ฑ ์์ฑ | |
client = Client("https://declare-lab-tango.hf.space/") | |
result = client.predict( | |
prompt, # ์ด๋ฏธ์ง ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ๋กฌํํธ๋ก ์ฌ์ฉ | |
100, # Steps | |
1, # Guidance Scale | |
api_name="/predict" # API ์๋ํฌ์ธํธ ๊ฒฝ๋ก | |
) | |
# Tango API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ | |
# ์: result์์ ์์ฑ ํ์ผ URL ๋๋ ๋ฐ์ดํฐ ์ถ์ถ | |
return result | |
def classify_and_generate_voice(uploaded_image): | |
# ์ด๋ฏธ์ง ๋ถ๋ฅ | |
predictions = image_model(uploaded_image) | |
top_prediction = predictions[0]['label'] # ๊ฐ์ฅ ํ๋ฅ ์ด ๋์ ๋ถ๋ฅ ๊ฒฐ๊ณผ | |
# ์์ฑ ์์ฑ | |
voice_result = generate_voice("this is " + top_prediction) | |
# ์์ ์์ฑ | |
music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".") | |
# ๋ฐํ๋ ์์ฑ ๋ฐ ์์ ๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํฐํ์ด์ค๋ก ์ ๋ฌ | |
# ์: voice_result['url'] ๋๋ voice_result['audio_data'] ๋ฑ | |
return top_prediction, voice_result, music_result | |
# Gradio ์ธํฐํ์ด์ค ์์ฑ | |
iface = gr.Interface( | |
fn=classify_and_generate_voice, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Label(), gr.Audio(), gr.Audio()], | |
title="msVision_3", | |
description="์ด๋ฏธ์ง๋ฅผ ์ ๋ก๋ํ๋ฉด, ์ฌ๋ฌผ์ ์ธ์ํ๊ณ ํด๋นํ๋ ์์ฑ ๋ฐ ์์ ์ ์์ฑํฉ๋๋ค.(recognizes object and generate Voice&Music)", | |
examples=["dog.jpg","cafe.jpg","seoul.png"] | |
) | |
# ์ธํฐํ์ด์ค ์คํ | |
iface.launch() | |