msVision_3

Runtime error

App Files Files Community

seawolf2357 commited on Feb 26, 2024

Commit

8770d52

verified ·

1 Parent(s): 0f88190

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -25

app.py CHANGED Viewed

@@ -1,40 +1,43 @@
 import gradio as gr
 from transformers import pipeline
-import io
 # 이미지 인식 파이프라인 로드
-model = pipeline("image-classification", model="google/vit-base-patch16-224")
-# 카테고리에 따른 사운드 파일의 경로를 정의
-sound_files = {
-    "dog": "/path/to/dog_bark.mp3",
-    "cat": "/path/to/cat_meow.mp3",
-    # ... 각 카테고리에 대한 사운드 파일 경로 추가
-}
-def classify_image(uploaded_image):
-    predictions = model(uploaded_image)
-    # 가장 확률이 높은 예측 결과를 가져옴
-    top_prediction = predictions[0]['label']
-    # 예측 결과에 해당하는 사운드 파일의 바이트 데이터를 반환
-    sound_path = sound_files.get(top_prediction)
-    if sound_path:
-        with open(sound_path, "rb") as audio_file:
-            audio_data = audio_file.read()
-        return top_prediction, audio_data
-    else:
-        # 해당하는 사운드 파일이 없는 경우 빈 오디오 데이터 반환
-        return top_prediction, None
 # Gradio 인터페이스 생성
 iface = gr.Interface(
-    fn=classify_image,
     inputs=gr.Image(type="pil"),
-    outputs=[gr.Label(), gr.Audio(format="mp3")],
-    title="이미지 분류 및 사운드 재생",
-    description="이미지를 업로드하면, 사물을 인식하고 해당하는 사운드를 재생합니다."
 )
 # 인터페이스 실행
 iface.launch()

 import gradio as gr
 from transformers import pipeline
+import requests
 # 이미지 인식 파이프라인 로드
+image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
+def get_audiogen(prompt):
+    # 오디오 생성 모델 API 호출
+    response = requests.post(
+        "https://api-inference.huggingface.co/models/fffiloni/audiogen",
+        headers={"Authorization": "/infer"},
+        json={"inputs": prompt, "parameters": {"length": 10}, "options": {"use_cache": False}}
+    )
+    result = response.json()
+    # 여기에서 result 처리 로직을 구현합니다.
+    # 예: 생성된 오디오 파일의 URL을 반환하거나, 오디오 데이터 자체를 반환할 수 있습니다.
+    return result
+def classify_and_generate_audio(uploaded_image):
+    # 이미지 분류
+    predictions = image_model(uploaded_image)
+    top_prediction = predictions[0]['label']  # 가장 확률이 높은 분류 결과
+    # 오디오 생성
+    audio_result = get_audiogen(top_prediction)
+    # audio_result를 처리하여 Gradio가 재생할 수 있는 형식으로 반환합니다.
+    # 예: audio_result['url'] 또는 audio_result['audio_data'] 등
+    return top_prediction, audio_result
 # Gradio 인터페이스 생성
 iface = gr.Interface(
+    fn=classify_and_generate_audio,
     inputs=gr.Image(type="pil"),
+    outputs=[gr.Label(), gr.Audio()],
+    title="이미지 분류 및 오디오 생성",
+    description="이미지를 업로드하면, 이미지를 분석하여 무엇인지 설명하고, 해당하는 오디오를 생성합니다."
 )
 # 인터페이스 실행
 iface.launch()