msVision_3

Running

App Files Files Community

seawolf2357 commited on Mar 12, 2024

Commit

3b24c11

verified ·

1 Parent(s): 0048511

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -9

app.py CHANGED Viewed

@@ -6,17 +6,20 @@ from gradio_client import Client
 image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
 def generate_music(prompt):
     client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/")
     result = client.predict(
-        prompt="Howdy!",  # 'Input your text here' 텍스트 박스 컴포넌트
-        duration=5,  # 'Duration (seconds)' 슬라이더 컴포넌트에서의 값 범위 (5 ~ 15)
-        guidance_scale=0,  # 'Guidance scale' 슬라이더 컴포넌트에서의 값 범위 (0 ~ 6)
-        seed=5,  # 'Seed' 숫자 컴포넌트의 값
-        num_waveforms=1,  # 'Number waveforms to generate' 슬라이더 컴포넌트에서의 값 범위 (1 ~ 3)
-        api_name="/text2audio"  # API 엔드포인트 경로
     )
     print(result)
 def generate_voice(prompt):
     # Tango API를 사용하여 음성 생성
@@ -35,12 +38,10 @@ def classify_and_generate_voice(uploaded_image):
     # 이미지 분류
     predictions = image_model(uploaded_image)
     top_prediction = predictions[0]['label']  # 가장 확률이 높은 분류 결과
     # 음성 생성
     voice_result = generate_voice("this is " + top_prediction)
     # 음악 생성
     music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".")
     # 반환된 음성 및 음악 결과를 Gradio 인터페이스로 전달
     # 예: voice_result['url'] 또는 voice_result['audio_data'] 등
     return top_prediction, voice_result, music_result

 image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
 def generate_music(prompt):
+    # audioldm API 사용하여 음악 생성 API 호출
     client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/")
     result = client.predict(
+        "playing piano.",	# str in 'Input text' Textbox component
+        "Low quality.",	# str in 'Negative prompt' Textbox component
+        5,	# int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
+        5.5,	# int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
+        5,	# int | float in 'Seed' Number component
+        3,	# int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
+        api_name="/text2audio"
     )
     print(result)
+    #audio_result = extract_audio(result)
+    return result
 def generate_voice(prompt):
     # Tango API를 사용하여 음성 생성
     # 이미지 분류
     predictions = image_model(uploaded_image)
     top_prediction = predictions[0]['label']  # 가장 확률이 높은 분류 결과
     # 음성 생성
     voice_result = generate_voice("this is " + top_prediction)
     # 음악 생성
     music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".")
     # 반환된 음성 및 음악 결과를 Gradio 인터페이스로 전달
     # 예: voice_result['url'] 또는 voice_result['audio_data'] 등
     return top_prediction, voice_result, music_result