import gradio as gr from pytube import YouTube from moviepy.editor import AudioFileClip import os from dotenv import load_dotenv from deepgram import DeepgramClient from deepgram import ClientOptionsFromEnv from deepgram import PrerecordedOptions from deepgram import FileSource def download_video(video_url, path='./'): yt = YouTube(video_url) video = yt.streams.filter(only_audio=True).first() out_file = video.download(output_path=path) return out_file def extract_audio(video_path): audio_path = video_path.replace(".mp4", ".wav") clip = AudioFileClip(video_path) clip.write_audiofile(audio_path) return audio_path def transcribe_audio(audio_path): # Environment Setup (Ensure environment variables are set appropriately) load_dotenv() # Deepgram API Key Setup deepgram_api_key = os.getenv('DEEPGRAM_API_KEY') # Ensure DG_KEY is set in your environment variables deepgram = DeepgramClient("", ClientOptionsFromEnv()) with open(audio_path, 'rb') as file: audio_data = file.read() payload: FileSource = { "buffer": audio_data, } options = PrerecordedOptions( model="nova-2", smart_format=True, utterances=True, punctuate=True, diarize=True, detect_language =True, sentiment=True, summarize=True, topics=True, intents=True ) response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options) #response_buffer = deepgram.listen.prerecorded(payload, options) # STEP 3: Call the analyze_text method with the text payload and options print(response.to_json(indent=4)) print("") transcripts = response['results']['channels'][0]['alternatives'][0]['transcript'] return transcripts def process_video(video_url, api_key): video_path = download_video(video_url) audio_path = extract_audio(video_path) os.environ["DEEPGRAM_API_KEY"] = api_key transcription = transcribe_audio(audio_path) return transcription # Define Gradio interface iface = gr.Interface( fn=process_video, inputs=["text", "text"], # Accept video URL and API key as inputs outputs="text", title="Video to Text Transcription", description="Enter a YouTube video URL and your Deepgram API key to extract audio and transcribe it." ) if __name__ == "__main__": iface.launch(debug=True)