msVision_3

Runtime error

App Files Files Community

seawolf2357 commited on Feb 26, 2024

Commit

a6d7b81

verified ·

1 Parent(s): e57e37e

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -8

app.py CHANGED Viewed

@@ -1,32 +1,43 @@
 import gradio as gr
 from transformers import pipeline
-# gradio_client 라이브러리 사용 가정
 # 이미지 인식 파이프라인 로드
 image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
 def generate_voice(prompt):
-    # Tango API를 사용하여 음성 생성 (가정)
-    return "https://example.com/generated_voice.mp3"  # 예시 음성 파일 URL 반환
 def classify_and_generate_voice(uploaded_image):
     # 이미지 분류
     predictions = image_model(uploaded_image)
-    top_prediction = predictions[0]['label']
     # 음성 생성
     voice_result = generate_voice(top_prediction)
     return top_prediction, voice_result
-# Gradio 인터페이스 생성 및 예시 이미지 설정
 iface = gr.Interface(
     fn=classify_and_generate_voice,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Label(), gr.Audio()],
-    examples=[["dog.jpg"]],  # 예시 이미지 경로를 리스트로 추가
     title="이미지 분류 및 음성 생성",
     description="이미지를 업로드하면, 사물을 인식하고 해당하는 음성을 생성합니다."
 )
 # 인터페이스 실행
-if __name__ == "__main__":
-    iface.launch(share=True)

 import gradio as gr
 from transformers import pipeline
+from gradio_client import Client  # 가정: gradio_client 라이브러리가 사용 가능하다.
 # 이미지 인식 파이프라인 로드
 image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
 def generate_voice(prompt):
+    # Tango API를 사용하여 음성 생성
+    client = Client("https://declare-lab-tango.hf.space/")
+    result = client.predict(
+        prompt,  # 이미지 분류 결과를 프롬프트로 사용
+        100,  # Steps
+        1,  # Guidance Scale
+        api_name="/predict"  # API 엔드포인트 경로
+    )
+    # Tango API 호출 결과 처리
+    # 예: result에서 음성 파일 URL 또는 데이터 추출
+    return result
 def classify_and_generate_voice(uploaded_image):
     # 이미지 분류
     predictions = image_model(uploaded_image)
+    top_prediction = predictions[0]['label']  # 가장 확률이 높은 분류 결과
     # 음성 생성
     voice_result = generate_voice(top_prediction)
+    # 반환된 음성 결과를 Gradio 인터페이스로 전달
+    # 예: voice_result['url'] 또는 voice_result['audio_data'] 등
     return top_prediction, voice_result
+# Gradio 인터페이스 생성
 iface = gr.Interface(
     fn=classify_and_generate_voice,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Label(), gr.Audio()],
     title="이미지 분류 및 음성 생성",
     description="이미지를 업로드하면, 사물을 인식하고 해당하는 음성을 생성합니다."
 )
 # 인터페이스 실행
+iface.launch()