msVision_3

Runtime error

App Files Files Community

seawolf2357 commited on Feb 26, 2024

Commit

3377e03

verified ·

1 Parent(s): 158913f

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -22

app.py CHANGED Viewed

@@ -1,44 +1,42 @@
 import gradio as gr
 from transformers import pipeline
-import requests
-import os  # os 모듈을 임포트합니다.
 # 이미지 인식 파이프라인 로드
 image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
-# 환경 변수에서 Hugging Face API 토큰 로드
-hugging_face_auth_token = os.getenv("HUGGING_FACE_AUTH_TOKEN")
-def get_audiogen(prompt):
-    # 오디오 생성 모델 API 호출
-    headers = {"Authorization": f"Bearer {hugging_face_auth_token}"}
-    response = requests.post(
-        "https://api-inference.huggingface.co/models/fffiloni/audiogen",
-        headers=headers,
-        json={"inputs": prompt, "parameters": {"length": 10}, "options": {"use_cache": False}}
     )
-    result = response.json()
-    # 여기에서 result 처리 로직을 구현합니다.
     return result
-def classify_and_generate_audio(uploaded_image):
     # 이미지 분류
     predictions = image_model(uploaded_image)
     top_prediction = predictions[0]['label']  # 가장 확률이 높은 분류 결과
-    # 오디오 생성
-    audio_result = get_audiogen(top_prediction)
-    # audio_result를 처리하여 Gradio가 재생할 수 있는 형식으로 반환
-    return top_prediction, audio_result
 # Gradio 인터페이스 생성
 iface = gr.Interface(
-    fn=classify_and_generate_audio,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Label(), gr.Audio()],
-    title="이미지 분류 및 오디오 생성",
-    description="이미지를 업로드하면, 이미지를 분석하여 무엇인지 설명하고, 해당하는 오디오를 생성합니다."
 )
 # 인터페이스 실행

 import gradio as gr
 from transformers import pipeline
+from gradio_client import Client  # 가정: gradio_client 라이브러리가 사용 가능하다.
 # 이미지 인식 파이프라인 로드
 image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
+def generate_voice(prompt):
+    # Tango API를 사용하여 음성 생성
+    client = Client("https://declare-lab-tango.hf.space/")
+    result = client.predict(
+        prompt,  # 이미지 분류 결과를 프롬프트로 사용
+        100,  # Steps
+        1,  # Guidance Scale
+        api_name="/predict"  # API 엔드포인트 경로
     )
+    # Tango API 호출 결과 처리
+    # 예: result에서 음성 파일 URL 또는 데이터 추출
     return result
+def classify_and_generate_voice(uploaded_image):
     # 이미지 분류
     predictions = image_model(uploaded_image)
     top_prediction = predictions[0]['label']  # 가장 확률이 높은 분류 결과
+    # 음성 생성
+    voice_result = generate_voice(top_prediction)
+    # 반환된 음성 결과를 Gradio 인터페이스로 전달
+    # 예: voice_result['url'] 또는 voice_result['audio_data'] 등
+    return top_prediction, voice_result
 # Gradio 인터페이스 생성
 iface = gr.Interface(
+    fn=classify_and_generate_voice,
     inputs=gr.Image(type="pil"),
     outputs=[gr.Label(), gr.Audio()],
+    title="이미지 분류 및 음성 생성",
+    description="이미지를 업로드하면, 사물을 인식하고 해당하는 음성을 생성합니다."
 )
 # 인터페이스 실행