Spaces:
Runtime error
Runtime error
File size: 2,616 Bytes
2bd9468 a6d7b81 dedab71 9902a40 8770d52 2bd9468 ba5f8a7 ebcd803 ba5f8a7 59ff24b ba5f8a7 db7dc29 b8bb042 ba5f8a7 ebcd803 3ba852d ba5f8a7 ebcd803 3377e03 a6d7b81 87c119f 3377e03 8770d52 a6d7b81 3377e03 59ff24b ebcd803 59ff24b a6d7b81 ebcd803 a6d7b81 1e442f4 ad7babb a6d7b81 23708c8 3377e03 23708c8 4d95222 872e164 5174dc4 23708c8 2bd9468 ad7babb 3382a71 a6d7b81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from transformers import pipeline
from gradio_client import Client # ๊ฐ์ : gradio_client ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ์ฌ์ฉ ๊ฐ๋ฅํ๋ค.
# ์ด๋ฏธ์ง ์ธ์ ํ์ดํ๋ผ์ธ ๋ก๋
image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
def generate_music(prompt):
# audioldm API ์ฌ์ฉํ์ฌ ์์
์์ฑ API ํธ์ถ
client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
result = client.predict(
"playing piano.", # str in 'Input text' Textbox component
"Low quality.", # str in 'Negative prompt' Textbox component
5, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
5.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
5, # int | float in 'Seed' Number component
3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
fn_index=1
)
print(result)
#audio_result = extract_audio(result)
return result
def generate_voice(prompt):
# Tango API๋ฅผ ์ฌ์ฉํ์ฌ ์์ฑ ์์ฑ
client = Client("https://declare-lab-tango.hf.space/")
result = client.predict(
prompt, # ์ด๋ฏธ์ง ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ๋กฌํํธ๋ก ์ฌ์ฉ
100, # Steps
1, # Guidance Scale
api_name="/predict" # API ์๋ํฌ์ธํธ ๊ฒฝ๋ก
)
# Tango API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
# ์: result์์ ์์ฑ ํ์ผ URL ๋๋ ๋ฐ์ดํฐ ์ถ์ถ
return result
def classify_and_generate_voice(uploaded_image):
# ์ด๋ฏธ์ง ๋ถ๋ฅ
predictions = image_model(uploaded_image)
top_prediction = predictions[0]['label'] # ๊ฐ์ฅ ํ๋ฅ ์ด ๋์ ๋ถ๋ฅ ๊ฒฐ๊ณผ
# ์์ฑ ์์ฑ
voice_result = generate_voice("this is " + top_prediction)
# ์์
์์ฑ
music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".")
# ๋ฐํ๋ ์์ฑ ๋ฐ ์์
๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํฐํ์ด์ค๋ก ์ ๋ฌ
# ์: voice_result['url'] ๋๋ voice_result['audio_data'] ๋ฑ
return caption, top_prediction, voice_result, music_result
# Gradio ์ธํฐํ์ด์ค ์์ฑ
iface = gr.Interface(
fn=classify_and_generate_voice,
inputs=gr.Image(type="pil"),
outputs=[ gr.Label(), gr.Audio(), gr.Audio()],
title="msVision_3",
description="์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๋ฉด, ์ฌ๋ฌผ์ ์ธ์ํ๊ณ ํด๋นํ๋ ์์ฑ ๋ฐ ์์
์ ์์ฑํฉ๋๋ค.(recognizes object and generate Voice&Music)",
examples=["dog.jpg", "cat.png", "cafe.jpg"]
)
# ์ธํฐํ์ด์ค ์คํ
iface.launch()
|