Spaces:
Running
on
Zero
Running
on
Zero
Commit
•
ee4aecd
1
Parent(s):
72c65b6
for loop
Browse files
app.py
CHANGED
@@ -29,10 +29,6 @@ model = ParlerTTSForConditionalGeneration.from_pretrained(
|
|
29 |
jenny_repo_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
|
30 |
).to(device)
|
31 |
|
32 |
-
model = ParlerTTSForConditionalGeneration.from_pretrained(
|
33 |
-
repo_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
|
34 |
-
).to(device)
|
35 |
-
|
36 |
client = InferenceClient()
|
37 |
|
38 |
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
@@ -213,6 +209,7 @@ def generate_base(subject, setting, ):
|
|
213 |
response = client.chat_completion(messages, max_tokens=2048, seed=random.randint(1, 5000))
|
214 |
gr.Info("Story Generated", duration=3)
|
215 |
story = response.choices[0].message.content
|
|
|
216 |
|
217 |
model_input = story.replace("\n", " ").strip()
|
218 |
model_input = nltk.sent_tokenize(model_input)
|
@@ -221,29 +218,31 @@ def generate_base(subject, setting, ):
|
|
221 |
play_steps = int(frame_rate * play_steps_in_s)
|
222 |
streamer = ParlerTTSStreamer(model, device=device, play_steps=play_steps)
|
223 |
|
224 |
-
description = "
|
225 |
inputs = tokenizer(description, return_tensors="pt").to(device)
|
226 |
-
prompt = tokenizer(story, return_tensors="pt").to(device)
|
227 |
-
|
228 |
-
generation_kwargs = dict(
|
229 |
-
input_ids=inputs.input_ids,
|
230 |
-
prompt_input_ids=prompt.input_ids,
|
231 |
-
streamer=streamer,
|
232 |
-
do_sample=True,
|
233 |
-
temperature=1.0,
|
234 |
-
min_new_tokens=10,
|
235 |
-
)
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
|
241 |
-
|
242 |
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
|
249 |
with gr.Blocks() as block:
|
|
|
29 |
jenny_repo_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
|
30 |
).to(device)
|
31 |
|
|
|
|
|
|
|
|
|
32 |
client = InferenceClient()
|
33 |
|
34 |
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
|
|
209 |
response = client.chat_completion(messages, max_tokens=2048, seed=random.randint(1, 5000))
|
210 |
gr.Info("Story Generated", duration=3)
|
211 |
story = response.choices[0].message.content
|
212 |
+
yield story, None
|
213 |
|
214 |
model_input = story.replace("\n", " ").strip()
|
215 |
model_input = nltk.sent_tokenize(model_input)
|
|
|
218 |
play_steps = int(frame_rate * play_steps_in_s)
|
219 |
streamer = ParlerTTSStreamer(model, device=device, play_steps=play_steps)
|
220 |
|
221 |
+
description = "Jenny speaks at an average pace with a calm delivery in a very confined sounding environment with clear audio quality."
|
222 |
inputs = tokenizer(description, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
+
gr.Info("Reading story", duration=3)
|
225 |
+
|
226 |
+
for sentence in model_input:
|
227 |
|
228 |
+
prompt = tokenizer(sentence, return_tensors="pt").to(device)
|
229 |
|
230 |
+
generation_kwargs = dict(
|
231 |
+
input_ids=inputs.input_ids,
|
232 |
+
prompt_input_ids=prompt.input_ids,
|
233 |
+
streamer=streamer,
|
234 |
+
do_sample=True,
|
235 |
+
temperature=1.0,
|
236 |
+
min_new_tokens=10,
|
237 |
+
)
|
238 |
+
|
239 |
+
set_seed(SEED)
|
240 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
241 |
+
thread.start()
|
242 |
+
|
243 |
+
for new_audio in streamer:
|
244 |
+
print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
|
245 |
+
yield story, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
|
246 |
|
247 |
|
248 |
with gr.Blocks() as block:
|