https://huggingface.co/spaces/fffiloni/YuE

#3
Files changed (3) hide show
  1. app.py +17 -32
  2. inference/infer.py +2 -6
  3. requirements.txt +1 -1
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import subprocess
3
  import os
4
- import re
5
  import shutil
6
  import tempfile
7
 
@@ -67,19 +66,11 @@ def empty_output_folder(output_dir):
67
  print(f"Error deleting file {file_path}: {e}")
68
 
69
  # Function to create a temporary file with string content
70
- def create_temp_file(content, prefix, suffix=".txt"):
71
- temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=prefix, suffix=suffix)
72
- content = content.strip() + "\n\n" # Add extra newline at end
73
- content = content.replace("\r\n", "\n").replace("\r", "\n")
74
- temp_file.write(content)
75
- temp_file.close()
76
-
77
- # Debug: Print file contents
78
- print(f"\nContent written to {prefix}{suffix}:")
79
- print(content)
80
- print("---")
81
-
82
- return temp_file.name
83
 
84
  def get_last_mp3_file(output_dir):
85
  # List all files in the output directory
@@ -103,8 +94,8 @@ def get_last_mp3_file(output_dir):
103
 
104
  def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
105
  # Create temporary files
106
- genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
107
- lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
108
 
109
  print(f"Genre TXT path: {genre_txt_path}")
110
  print(f"Lyrics TXT path: {lyrics_txt_path}")
@@ -124,10 +115,11 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
124
  "--genre_txt", f"{genre_txt_path}",
125
  "--lyrics_txt", f"{lyrics_txt_path}",
126
  "--run_n_segments", str(num_segments),
127
- "--stage2_batch_size", "16",
128
  "--output_dir", f"{output_dir}",
129
  "--cuda_idx", "0",
130
- "--max_new_tokens", str(max_new_tokens)
 
131
  ]
132
 
133
  # Set up environment variables for CUDA with optimized settings
@@ -155,17 +147,15 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
155
 
156
  if last_mp3:
157
  print("Last .mp3 file:", last_mp3)
158
- instrumental_mp3_path = "./output/vocoder/stems/instrumental.mp3"
159
- vocal_mp3_path = "./output/vocoder/stems/vocal.mp3"
160
- return last_mp3, instrumental_mp3_path, vocal_mp3_path
161
  else:
162
- return None, None, None
163
  else:
164
  print("Output folder is empty.")
165
- raise gr.Error(f"Error occurred: Output folder is empty.")
166
  except subprocess.CalledProcessError as e:
167
  print(f"Error occurred: {e}")
168
- raise gr.Error(f"Error occurred: {e}")
169
  finally:
170
  # Clean up temporary files
171
  os.remove(genre_txt_path)
@@ -215,22 +205,17 @@ with gr.Blocks() as demo:
215
  )
216
  lyrics_txt = gr.Textbox(
217
  label="Lyrics", lines=12,
218
- placeholder="""
219
- Type the lyrics here...
220
- At least 2 segments, Annotate your segments with brackets, [verse] [chorus] [bridge]""",
221
  info="Text containing the lyrics for the music generation. These lyrics will be processed and split into structured segments to guide the generation process."
222
  )
223
 
224
  with gr.Column():
225
 
226
- num_segments = gr.Number(label="Number of Segments", value=2, interactive=False)
227
  max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
228
 
229
  submit_btn = gr.Button("Submit")
230
  music_out = gr.Audio(label="Audio Result")
231
- with gr.Accordion("Vocal & Instrumental", open=False):
232
- instrumental = gr.Audio(label="Intrumental")
233
- vocal = gr.Audio(label="Vocal")
234
 
235
  gr.Examples(
236
  examples = [
@@ -273,6 +258,6 @@ Living out my dreams with this mic and a deal"""
273
  submit_btn.click(
274
  fn = infer,
275
  inputs = [genre_txt, lyrics_txt, num_segments, max_new_tokens],
276
- outputs = [music_out, instrumental, vocal]
277
  )
278
  demo.queue().launch(show_api=False, show_error=True)
 
1
  import gradio as gr
2
  import subprocess
3
  import os
 
4
  import shutil
5
  import tempfile
6
 
 
66
  print(f"Error deleting file {file_path}: {e}")
67
 
68
  # Function to create a temporary file with string content
69
+ def create_temp_file(content, suffix=".txt"):
70
+ fd, path = tempfile.mkstemp(suffix=suffix)
71
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
72
+ f.write(content)
73
+ return path
 
 
 
 
 
 
 
 
74
 
75
  def get_last_mp3_file(output_dir):
76
  # List all files in the output directory
 
94
 
95
  def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
96
  # Create temporary files
97
+ genre_txt_path = create_temp_file(genre_txt_content, ".txt")
98
+ lyrics_txt_path = create_temp_file(lyrics_txt_content, ".txt")
99
 
100
  print(f"Genre TXT path: {genre_txt_path}")
101
  print(f"Lyrics TXT path: {lyrics_txt_path}")
 
115
  "--genre_txt", f"{genre_txt_path}",
116
  "--lyrics_txt", f"{lyrics_txt_path}",
117
  "--run_n_segments", str(num_segments),
118
+ "--stage2_batch_size", "4",
119
  "--output_dir", f"{output_dir}",
120
  "--cuda_idx", "0",
121
+ "--max_new_tokens", str(max_new_tokens),
122
+ "--disable_offload_model"
123
  ]
124
 
125
  # Set up environment variables for CUDA with optimized settings
 
147
 
148
  if last_mp3:
149
  print("Last .mp3 file:", last_mp3)
150
+ return last_mp3
 
 
151
  else:
152
+ return None
153
  else:
154
  print("Output folder is empty.")
155
+ return None
156
  except subprocess.CalledProcessError as e:
157
  print(f"Error occurred: {e}")
158
+ return None
159
  finally:
160
  # Clean up temporary files
161
  os.remove(genre_txt_path)
 
205
  )
206
  lyrics_txt = gr.Textbox(
207
  label="Lyrics", lines=12,
208
+ placeholder="Type the lyrics here...",
 
 
209
  info="Text containing the lyrics for the music generation. These lyrics will be processed and split into structured segments to guide the generation process."
210
  )
211
 
212
  with gr.Column():
213
 
214
+ num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
215
  max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
216
 
217
  submit_btn = gr.Button("Submit")
218
  music_out = gr.Audio(label="Audio Result")
 
 
 
219
 
220
  gr.Examples(
221
  examples = [
 
258
  submit_btn.click(
259
  fn = infer,
260
  inputs = [genre_txt, lyrics_txt, num_segments, max_new_tokens],
261
+ outputs = [music_out]
262
  )
263
  demo.queue().launch(show_api=False, show_error=True)
inference/infer.py CHANGED
@@ -76,7 +76,7 @@ print(f"Using device: {device}")
76
  mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
77
  model = AutoModelForCausalLM.from_pretrained(
78
  stage1_model,
79
- torch_dtype=torch.float16,
80
  attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
81
  )
82
  model.to(device)
@@ -120,18 +120,15 @@ stage1_output_set = []
120
  # Tips:
121
  # genre tags support instrumental,genre,mood,vocal timbr and vocal gender
122
  # all kinds of tags are needed
123
- # Ensure files exist
124
  with open(args.genre_txt) as f:
125
  genres = f.read().strip()
126
- print(genres)
127
  with open(args.lyrics_txt) as f:
128
  lyrics = split_lyrics(f.read())
129
- print(lyrics)
130
  # intruction
131
  full_lyrics = "\n".join(lyrics)
132
  prompt_texts = [f"Generate music from the given lyrics segment by segment.\n[Genre] {genres}\n{full_lyrics}"]
133
  prompt_texts += lyrics
134
- print(prompt_texts)
135
 
136
  random_id = uuid.uuid4()
137
  output_seq = None
@@ -144,7 +141,6 @@ start_of_segment = mmtokenizer.tokenize('[start_of_segment]')
144
  end_of_segment = mmtokenizer.tokenize('[end_of_segment]')
145
  # Format text prompt
146
  run_n_segments = min(args.run_n_segments+1, len(lyrics))
147
- print(f"RUN N SEGMENTS: {run_n_segments}")
148
  for i, p in enumerate(tqdm(prompt_texts[:run_n_segments])):
149
  section_text = p.replace('[start_of_segment]', '').replace('[end_of_segment]', '')
150
  guidance_scale = 1.5 if i <=1 else 1.2
 
76
  mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
77
  model = AutoModelForCausalLM.from_pretrained(
78
  stage1_model,
79
+ torch_dtype=torch.bfloat16,
80
  attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
81
  )
82
  model.to(device)
 
120
  # Tips:
121
  # genre tags support instrumental,genre,mood,vocal timbr and vocal gender
122
  # all kinds of tags are needed
 
123
  with open(args.genre_txt) as f:
124
  genres = f.read().strip()
 
125
  with open(args.lyrics_txt) as f:
126
  lyrics = split_lyrics(f.read())
 
127
  # intruction
128
  full_lyrics = "\n".join(lyrics)
129
  prompt_texts = [f"Generate music from the given lyrics segment by segment.\n[Genre] {genres}\n{full_lyrics}"]
130
  prompt_texts += lyrics
131
+
132
 
133
  random_id = uuid.uuid4()
134
  output_seq = None
 
141
  end_of_segment = mmtokenizer.tokenize('[end_of_segment]')
142
  # Format text prompt
143
  run_n_segments = min(args.run_n_segments+1, len(lyrics))
 
144
  for i, p in enumerate(tqdm(prompt_texts[:run_n_segments])):
145
  section_text = p.replace('[start_of_segment]', '').replace('[end_of_segment]', '')
146
  guidance_scale = 1.5 if i <=1 else 1.2
requirements.txt CHANGED
@@ -3,7 +3,7 @@ torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118
3
  omegaconf
4
  einops
5
  numpy<2
6
- git+https://github.com/KingNish24/transformers.git@yue-patch
7
  sentencepiece
8
  tqdm
9
  tensorboard
 
3
  omegaconf
4
  einops
5
  numpy<2
6
+ transformers
7
  sentencepiece
8
  tqdm
9
  tensorboard