seawolf2357 commited on
Commit
3b24c11
Β·
verified Β·
1 Parent(s): 0048511

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -6,17 +6,20 @@ from gradio_client import Client
6
  image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
7
 
8
  def generate_music(prompt):
 
9
  client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/")
10
  result = client.predict(
11
- prompt="Howdy!", # 'Input your text here' ν…μŠ€νŠΈ λ°•μŠ€ μ»΄ν¬λ„ŒνŠΈ
12
- duration=5, # 'Duration (seconds)' μŠ¬λΌμ΄λ” μ»΄ν¬λ„ŒνŠΈμ—μ„œμ˜ κ°’ λ²”μœ„ (5 ~ 15)
13
- guidance_scale=0, # 'Guidance scale' μŠ¬λΌμ΄λ” μ»΄ν¬λ„ŒνŠΈμ—μ„œμ˜ κ°’ λ²”μœ„ (0 ~ 6)
14
- seed=5, # 'Seed' 숫자 μ»΄ν¬λ„ŒνŠΈμ˜ κ°’
15
- num_waveforms=1, # 'Number waveforms to generate' μŠ¬λΌμ΄λ” μ»΄ν¬λ„ŒνŠΈμ—μ„œμ˜ κ°’ λ²”μœ„ (1 ~ 3)
16
- api_name="/text2audio" # API μ—”λ“œν¬μΈνŠΈ 경둜
 
17
  )
18
  print(result)
19
-
 
20
 
21
  def generate_voice(prompt):
22
  # Tango APIλ₯Ό μ‚¬μš©ν•˜μ—¬ μŒμ„± 생성
@@ -35,12 +38,10 @@ def classify_and_generate_voice(uploaded_image):
35
  # 이미지 λΆ„λ₯˜
36
  predictions = image_model(uploaded_image)
37
  top_prediction = predictions[0]['label'] # κ°€μž₯ ν™•λ₯ μ΄ 높은 λΆ„λ₯˜ κ²°κ³Ό
38
-
39
  # μŒμ„± 생성
40
  voice_result = generate_voice("this is " + top_prediction)
41
  # μŒμ•… 생성
42
  music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".")
43
-
44
  # λ°˜ν™˜λœ μŒμ„± 및 μŒμ•… κ²°κ³Όλ₯Ό Gradio μΈν„°νŽ˜μ΄μŠ€λ‘œ 전달
45
  # 예: voice_result['url'] λ˜λŠ” voice_result['audio_data'] λ“±
46
  return top_prediction, voice_result, music_result
 
6
  image_model = pipeline("image-classification", model="google/vit-base-patch16-224")
7
 
8
  def generate_music(prompt):
9
+ # audioldm API μ‚¬μš©ν•˜μ—¬ μŒμ•… 생성 API 호좜
10
  client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/")
11
  result = client.predict(
12
+ "playing piano.", # str in 'Input text' Textbox component
13
+ "Low quality.", # str in 'Negative prompt' Textbox component
14
+ 5, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
15
+ 5.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
16
+ 5, # int | float in 'Seed' Number component
17
+ 3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
18
+ api_name="/text2audio"
19
  )
20
  print(result)
21
+ #audio_result = extract_audio(result)
22
+ return result
23
 
24
  def generate_voice(prompt):
25
  # Tango APIλ₯Ό μ‚¬μš©ν•˜μ—¬ μŒμ„± 생성
 
38
  # 이미지 λΆ„λ₯˜
39
  predictions = image_model(uploaded_image)
40
  top_prediction = predictions[0]['label'] # κ°€μž₯ ν™•λ₯ μ΄ 높은 λΆ„λ₯˜ κ²°κ³Ό
 
41
  # μŒμ„± 생성
42
  voice_result = generate_voice("this is " + top_prediction)
43
  # μŒμ•… 생성
44
  music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".")
 
45
  # λ°˜ν™˜λœ μŒμ„± 및 μŒμ•… κ²°κ³Όλ₯Ό Gradio μΈν„°νŽ˜μ΄μŠ€λ‘œ 전달
46
  # 예: voice_result['url'] λ˜λŠ” voice_result['audio_data'] λ“±
47
  return top_prediction, voice_result, music_result