Spaces:
Running
Running
File size: 2,506 Bytes
2bd9468 7047a68 dedab71 9902a40 8770d52 2bd9468 ba5f8a7 668d494 0f49e19 3bf7aef 668d494 0f49e19 668d494 0f49e19 3bf7aef 668d494 0f49e19 3b24c11 46ad89d 668d494 3377e03 a6d7b81 1edfb40 a6d7b81 87c119f 3377e03 8770d52 a6d7b81 3377e03 59ff24b ebcd803 59ff24b ebcd803 a6d7b81 7d07c61 ad7babb a6d7b81 23708c8 3377e03 23708c8 7d07c61 872e164 5174dc4 0ad2ed2 23708c8 2bd9468 3382a71 a6d7b81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
from transformers import pipeline
from gradio_client import Client
# ์ด๋ฏธ์ง ์ธ์ ํ์ดํ๋ผ์ธ ๋ก๋
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
def generate_music(prompt):
# Initialize the client with your API endpoint
client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/")
# Call the predict method with the correct parameters
result = client.predict(
prompt, # The main text input for your music generation
5, # Duration in seconds
0, # Guidance scale
5, # Seed for generating music
1, # Number of waveforms to generate
api_name="/text2audio" # Specify the API name if required
)
# Assuming the result includes the information you need directly
print(result)
return result
# Example usage
prompt = "A serene and peaceful melody to relax."
music_result = generate_music(prompt)
def generate_voice(prompt):
# Tango API๋ฅผ ์ฌ์ฉํ์ฌ ์์ฑ ์์ฑ
client = Client("https://declare-lab-tango.hf.space/")
result = client.predict(
prompt, # ์ด๋ฏธ์ง ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ๋กฌํํธ๋ก ์ฌ์ฉ
100, # Steps
1, # Guidance Scale
api_name="/predict" # API ์๋ํฌ์ธํธ ๊ฒฝ๋ก
)
# Tango API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
# ์: result์์ ์์ฑ ํ์ผ URL ๋๋ ๋ฐ์ดํฐ ์ถ์ถ
return result
def classify_and_generate_voice(uploaded_image):
# ์ด๋ฏธ์ง ๋ถ๋ฅ
predictions = image_model(uploaded_image)
top_prediction = predictions[0]['label'] # ๊ฐ์ฅ ํ๋ฅ ์ด ๋์ ๋ถ๋ฅ ๊ฒฐ๊ณผ
# ์์ฑ ์์ฑ
voice_result = generate_voice("this is " + top_prediction)
# ์์
์์ฑ
music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".")
# ๋ฐํ๋ ์์ฑ ๋ฐ ์์
๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํฐํ์ด์ค๋ก ์ ๋ฌ
# ์: voice_result['url'] ๋๋ voice_result['audio_data'] ๋ฑ
return top_prediction, voice_result, music_result
# Gradio ์ธํฐํ์ด์ค ์์ฑ
iface = gr.Interface(
fn=classify_and_generate_voice,
inputs=gr.Image(type="pil"),
outputs=[gr.Label(), gr.Audio(), gr.Audio()],
title="msVision_3",
description="์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๋ฉด, ์ฌ๋ฌผ์ ์ธ์ํ๊ณ ํด๋นํ๋ ์์ฑ ๋ฐ ์์
์ ์์ฑํฉ๋๋ค.(recognizes object and generate Voice&Music)",
examples=["dog.jpg","cafe.jpg","seoul.png"]
)
# ์ธํฐํ์ด์ค ์คํ
iface.launch()
|