seawolf2357 commited on
Commit
8770d52
ยท
verified ยท
1 Parent(s): 0f88190

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -25
app.py CHANGED
@@ -1,40 +1,43 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import io
4
 
5
  # ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
6
- model = pipeline("image-classification", model="google/vit-base-patch16-224")
7
 
8
- # ์นดํ…Œ๊ณ ๋ฆฌ์— ๋”ฐ๋ฅธ ์‚ฌ์šด๋“œ ํŒŒ์ผ์˜ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
9
- sound_files = {
10
- "dog": "/path/to/dog_bark.mp3",
11
- "cat": "/path/to/cat_meow.mp3",
12
- # ... ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ์— ๋Œ€ํ•œ ์‚ฌ์šด๋“œ ํŒŒ์ผ ๊ฒฝ๋กœ ์ถ”๊ฐ€
13
- }
 
 
 
 
 
14
 
15
- def classify_image(uploaded_image):
16
- predictions = model(uploaded_image)
17
- # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ์˜ˆ์ธก ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ ธ์˜ด
18
- top_prediction = predictions[0]['label']
19
 
20
- # ์˜ˆ์ธก ๊ฒฐ๊ณผ์— ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ ํŒŒ์ผ์˜ ๋ฐ”์ดํŠธ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ˜ํ™˜
21
- sound_path = sound_files.get(top_prediction)
22
- if sound_path:
23
- with open(sound_path, "rb") as audio_file:
24
- audio_data = audio_file.read()
25
- return top_prediction, audio_data
26
- else:
27
- # ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ ํŒŒ์ผ์ด ์—†๋Š” ๊ฒฝ์šฐ ๋นˆ ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ ๋ฐ˜ํ™˜
28
- return top_prediction, None
29
 
30
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
31
  iface = gr.Interface(
32
- fn=classify_image,
33
  inputs=gr.Image(type="pil"),
34
- outputs=[gr.Label(), gr.Audio(format="mp3")],
35
- title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์‚ฌ์šด๋“œ ์žฌ์ƒ",
36
- description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์‚ฌ๋ฌผ์„ ์ธ์‹ํ•˜๊ณ  ํ•ด๋‹นํ•˜๋Š” ์‚ฌ์šด๋“œ๋ฅผ ์žฌ์ƒํ•ฉ๋‹ˆ๋‹ค."
37
  )
38
 
39
  # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
40
  iface.launch()
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import requests
4
 
5
  # ์ด๋ฏธ์ง€ ์ธ์‹ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
6
+ image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
7
 
8
+ def get_audiogen(prompt):
9
+ # ์˜ค๋””์˜ค ์ƒ์„ฑ ๋ชจ๋ธ API ํ˜ธ์ถœ
10
+ response = requests.post(
11
+ "https://api-inference.huggingface.co/models/fffiloni/audiogen",
12
+ headers={"Authorization": "/infer"},
13
+ json={"inputs": prompt, "parameters": {"length": 10}, "options": {"use_cache": False}}
14
+ )
15
+ result = response.json()
16
+ # ์—ฌ๊ธฐ์—์„œ result ์ฒ˜๋ฆฌ ๋กœ์ง์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
17
+ # ์˜ˆ: ์ƒ์„ฑ๋œ ์˜ค๋””์˜ค ํŒŒ์ผ์˜ URL์„ ๋ฐ˜ํ™˜ํ•˜๊ฑฐ๋‚˜, ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ ์ž์ฒด๋ฅผ ๋ฐ˜ํ™˜ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
18
+ return result
19
 
20
+ def classify_and_generate_audio(uploaded_image):
21
+ # ์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜
22
+ predictions = image_model(uploaded_image)
23
+ top_prediction = predictions[0]['label'] # ๊ฐ€์žฅ ํ™•๋ฅ ์ด ๋†’์€ ๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ
24
 
25
+ # ์˜ค๋””์˜ค ์ƒ์„ฑ
26
+ audio_result = get_audiogen(top_prediction)
27
+
28
+ # audio_result๋ฅผ ์ฒ˜๋ฆฌํ•˜์—ฌ Gradio๊ฐ€ ์žฌ์ƒํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
29
+ # ์˜ˆ: audio_result['url'] ๋˜๋Š” audio_result['audio_data'] ๋“ฑ
30
+ return top_prediction, audio_result
 
 
 
31
 
32
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
33
  iface = gr.Interface(
34
+ fn=classify_and_generate_audio,
35
  inputs=gr.Image(type="pil"),
36
+ outputs=[gr.Label(), gr.Audio()],
37
+ title="์ด๋ฏธ์ง€ ๋ถ„๋ฅ˜ ๋ฐ ์˜ค๋””์˜ค ์ƒ์„ฑ",
38
+ description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด, ์ด๋ฏธ์ง€๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋ฌด์—‡์ธ์ง€ ์„ค๋ช…ํ•˜๊ณ , ํ•ด๋‹นํ•˜๋Š” ์˜ค๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค."
39
  )
40
 
41
  # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
42
  iface.launch()
43
+