seawolf2357 commited on
Commit
3377e03
ยท
verified ยท
1 Parent(s): 158913f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -22
app.py CHANGED
@@ -1,44 +1,42 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import requests
4
- import os # os ๋ชจ๋“ˆ์„ ์ž„ํฌํŠธํ•ฉ๋‹ˆ๋‹ค.
5
 
6
  # ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
7
  image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
8
 
9
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ Hugging Face API ํ† ํฐ ๋กœ๋“œ
10
- hugging_face_auth_token = os.getenv("HUGGING_FACE_AUTH_TOKEN")
11
-
12
- def get_audiogen(prompt):
13
- # ์˜ค๋””์˜ค ์ƒ์„ฑ ๋ชจ๋ธ API ํ˜ธ์ถœ
14
- headers = {"Authorization": f"Bearer {hugging_face_auth_token}"}
15
- response = requests.post(
16
- "https://api-inference.huggingface.co/models/fffiloni/audiogen",
17
- headers=headers,
18
- json={"inputs": prompt, "parameters": {"length": 10}, "options": {"use_cache": False}}
19
  )
20
- result = response.json()
21
- # ์—ฌ๊ธฐ์—์„œ result ์ฒ˜๋ฆฌ ๋กœ์ง์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
22
  return result
23
 
24
- def classify_and_generate_audio(uploaded_image):
25
  # ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
26
  predictions = image_model(uploaded_image)
27
  top_prediction = predictions[0]['label'] # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ
28
 
29
- # ์˜ค๋””์˜ค ์ƒ์„ฑ
30
- audio_result = get_audiogen(top_prediction)
31
 
32
- # audio_result๋ฅผ ์ฒ˜๋ฆฌํ•˜์—ฌ Gradio๊ฐ€ ์žฌ์ƒํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜
33
- return top_prediction, audio_result
 
34
 
35
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
36
  iface = gr.Interface(
37
- fn=classify_and_generate_audio,
38
  inputs=gr.Image(type="pil"),
39
  outputs=[gr.Label(), gr.Audio()],
40
- title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์˜ค๋””์˜ค ์ƒ์„ฑ",
41
- description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์ด๋ฏธ์ง€๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋ฌด์—‡์ธ์ง€ ์„ค๋ช…ํ•˜๊ณ , ํ•ด๋‹นํ•˜๋Š” ์˜ค๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
42
  )
43
 
44
  # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from gradio_client import Client # ๊ฐ€์ •: gradio_client ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•˜๋‹ค.
 
4
 
5
  # ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
6
  image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
7
 
8
+ def generate_voice(prompt):
9
+ # Tango API๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์Œ์„ฑ ์ƒ์„ฑ
10
+ client = Client("https://declare-lab-tango.hf.space/")
11
+ result = client.predict(
12
+ prompt, # ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ๋ฅผ ํ”„๋กฌํ”„ํŠธ๋กœ ์‚ฌ์šฉ
13
+ 100, # Steps
14
+ 1, # Guidance Scale
15
+ api_name="/predict" # API ์—”๋“œํฌ์ธํŠธ ๊ฒฝ๋กœ
 
 
16
  )
17
+ # Tango API ํ˜ธ์ถœ ๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ
18
+ # ์˜ˆ: result์—์„œ ์Œ์„ฑ ํŒŒ์ผ URL ๋˜๋Š” ๋ฐ์ดํ„ฐ ์ถ”์ถœ
19
  return result
20
 
21
+ def classify_and_generate_voice(uploaded_image):
22
  # ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
23
  predictions = image_model(uploaded_image)
24
  top_prediction = predictions[0]['label'] # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ
25
 
26
+ # ์Œ์„ฑ ์ƒ์„ฑ
27
+ voice_result = generate_voice(top_prediction)
28
 
29
+ # ๋ฐ˜ํ™˜๋œ ์Œ์„ฑ ๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํ„ฐํŽ˜์ด์Šค๋กœ ์ „๋‹ฌ
30
+ # ์˜ˆ: voice_result['url'] ๋˜๋Š” voice_result['audio_data'] ๋“ฑ
31
+ return top_prediction, voice_result
32
 
33
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
34
  iface = gr.Interface(
35
+ fn=classify_and_generate_voice,
36
  inputs=gr.Image(type="pil"),
37
  outputs=[gr.Label(), gr.Audio()],
38
+ title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์Œ์„ฑ ์ƒ์„ฑ",
39
+ description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์Œ์„ฑ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
40
  )
41
 
42
  # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰