import gradio as gr import spaces import os, torch, io import json os.system("python -m unidic download") import httpx # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.") from melo.api import TTS import tempfile import wave from pydub import AudioSegment from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, ) quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16 ) model = AutoModelForCausalLM.from_pretrained( "NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config, ) tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B") terminators = [tok.eos_token_id, tok.convert_tokens_to_ids("<|eot_id|>")] def fetch_text(url): print("Entered Webpage Extraction") prefix_url = "https://r.jina.ai/" url = prefix_url + url response = httpx.get(url, timeout=60.0) return response.text @spaces.GPU def synthesize(article_url, progress=gr.Progress()): text = fetch_text(article_url) device = "cuda" if torch.cuda.is_available() else "cpu" template = """ { "conversation": [ {"speaker": "", "text": ""}, {"speaker": "", "text": ""} ] } """ chat = [ { "role": "user", "content": f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template \n {template}", } ] messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) model_inputs = tok([messages], return_tensors="pt").to(device) text = model.generate( model_inputs, max_new_tokens=1024, do_sample=True, temperature=0.9, eos_token_id=terminators, ) speed = 1.0 models = { "EN": TTS(language="EN", device=device), } speakers = ["EN-Default", "EN-US"] combined_audio = AudioSegment.empty() conversation = json.loads(text) for i, turn in enumerate(conversation["conversation"]): bio = io.BytesIO() text = turn["text"] speaker = speakers[i % 2] speaker_id = models["EN"].hps.data.spk2id[speaker] models["EN"].tts_to_file( text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format="wav" ) bio.seek(0) audio_segment = AudioSegment.from_file(bio, format="wav") combined_audio += audio_segment final_audio_path = "final.mp3" combined_audio.export(final_audio_path, format="mp3") return final_audio_path with gr.Blocks() as demo: gr.Markdown("# Not Ready to USE") gr.Markdown("# Turn Any Article into Podcast") gr.Markdown("## Easily convert articles from URLs into listenable audio Podcast.") with gr.Group(): text = gr.Textbox(label="Article Link") btn = gr.Button("Podcasitfy", variant="primary") aud = gr.Audio(interactive=False) btn.click(synthesize, inputs=[text], outputs=[aud]) demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)