Spaces:
Running
Running
import json | |
import os | |
import asyncio | |
from moviepy.editor import AudioFileClip, concatenate_audioclips | |
from huggingface_hub import InferenceClient | |
import torch | |
import edge_tts | |
import tempfile | |
import gradio as gr | |
# Initialize Hugging Face Inference Client | |
Client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3") | |
generator = torch.Generator().manual_seed(42) | |
async def text_to_speech(text, voice, filename): | |
communicate = edge_tts.Communicate(text, voice) | |
await communicate.save(filename) | |
async def generate_conversation(script): | |
title = script['title'] | |
content = script['content'] | |
temp_files = [] | |
tasks = [] | |
for key, text in content.items(): | |
speaker = key.split('_')[0] # Extract the speaker name | |
index = key.split('_')[1] # Extract the dialogue index | |
voice = "en-US-JennyNeural" if speaker == "Alice" else "en-US-GuyNeural" | |
# Create temporary file for each speaker's dialogue | |
temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) | |
temp_files.append(temp_file.name) | |
filename = temp_file.name | |
tasks.append(text_to_speech(text, voice, filename)) | |
print(f"Generated audio for {speaker}_{index}: {filename}") | |
await asyncio.gather(*tasks) | |
# Combine the audio files using moviepy | |
audio_clips = [AudioFileClip(temp_file) for temp_file in temp_files] | |
combined = concatenate_audioclips(audio_clips) | |
# Create temporary file for the combined output | |
temp_output_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) | |
output_filename = temp_output_file.name | |
# Save the combined file | |
combined.write_audiofile(output_filename) | |
print(f"Combined audio saved as: {output_filename}") | |
# Clean up temporary files | |
for temp_file in temp_files: | |
os.remove(temp_file) | |
print(f"Deleted temporary file: {temp_file}") | |
return output_filename | |
# Function to generate podcast based on user input | |
def generate_podcast(topic, seed): | |
system_instructions = '''[SYSTEM] You are an educational podcast generator. You have to create a podcast between Alice and Bob that gives an overview of the topic given by the user. | |
Please provide the script in the following JSON format: | |
{ | |
"title": "[string]", | |
"content": { | |
"Alice_0": "[string]", | |
"BOB_0": "[string]", | |
... | |
} | |
} | |
Be concise. | |
''' | |
text = f" Topic: {topic}" | |
formatted_prompt = system_instructions + text | |
stream = Client.text_generation(formatted_prompt, max_new_tokens=1024, seed=seed, stream=True, details=True, return_full_text=False) | |
generated_script = "" | |
for response in stream: | |
if not response.token.text == "</s>": | |
generated_script += response.token.text | |
# Generate the podcast | |
script_json = json.loads(generated_script) # Use the generated script as input | |
output_filename = asyncio.run(generate_conversation(script_json)) | |
print("Output File:"+output_filename) | |
# Read the generated audio file | |
with open(output_filename, "rb") as f: | |
audio_bytes = f.read() | |
# Clean up the final output temporary file | |
os.remove(output_filename) | |
print(f"Deleted temporary file: {output_filename}") | |
return audio_bytes | |
DESCRIPTION = """ # <center><b>PODGEN 📻</b></center> | |
### <center>Generate a podcast on any topic</center> | |
### <center>Use the Power of llms to understand any topic better</center> | |
""" | |
with gr.Blocks(css="style.css") as demo: | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(): | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=999999, | |
step=1, | |
value=0, | |
visible=False | |
) | |
input = gr.Textbox(label="Topic", placeholder="Enter a topic") | |
output = gr.Audio(label="Podgen", type="filepath", | |
interactive=False, | |
autoplay=True, | |
elem_classes="audio") | |
gr.Interface( | |
batch=True, | |
max_batch_size=10, | |
fn=generate_podcast, | |
inputs=[input, seed], | |
outputs=[output], live=True) | |
if __name__ == "__main__": | |
demo.queue(max_size=200).launch() | |