import gradio as gr import torch import os import subprocess import json from threading import Thread from transformers import AutoTokenizer, AutoModelForCausalLM import spaces import moviepy.editor as mp import time import langdetect import uuid from dotenv import load_dotenv import whisper load_dotenv() HF_TOKEN = os.getenv("HF_TOKEN") print("Starting the program...") model_path = "internlm/internlm2_5-7b-chat" print(f"Loading model {model_path}...") #tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) #model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda() #model = model.eval() print("Model successfully loaded.") model = whisper.load_model("base") print("Model successfully loaded.") def generate_unique_filename(extension): return f"{uuid.uuid4()}{extension}" def cleanup_files(*files): for file in files: if file and os.path.exists(file): os.remove(file) print(f"Removed file: {file}") def transcribe_audio(file_path): print(f"Starting transcription of file: {file_path}") temp_audio = None if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')): print("Video file detected. Extracting audio...") try: video = mp.VideoFileClip(file_path) temp_audio = generate_unique_filename(".mp3") video.audio.write_audiofile(temp_audio) print(f"temp_audio : {temp_audio}") model = whisper.load_model("base") print(f"transcription1") transcription = model.transcribe(temp_audio) print(f"transcription {transcription}") if "text" in transcription: result = transcription["text"] else: result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]) #file_path = temp_audio except Exception as e: print(f"Error extracting audio from video: {e}") raise print(f"Does the file exist? {os.path.exists(file_path)}") print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes") try: print(f"Reading transcription file: {file_path}") #with open(file_path, 'r') as file: #file_contents = file.read() print(f"File content: {file_path}") #time.sleep(5) except ConnectionResetError as e: print(f"Connection error occurred: {e}") except Exception as e: print(f"Error output: {e}") print("Transcription completed.") # Cleanup if temp_audio: cleanup_files(temp_audio) return result def generate_summary_stream(transcription): print("Starting summary generation...") print(f"Transcription length: {len(transcription)} characters") detected_language = langdetect.detect(transcription) prompt = f"""Summarize the following video transcription in 200-300 words. The summary should be in the same language as the transcription, which is detected as {detected_language}. Please ensure that the summary captures the main points and key ideas of the transcription: {transcription[:300000]}...""" response, history = model.chat(tokenizer, prompt, history=[]) print(f"Final summary generated: {response[:100]}...") print("Summary generation completed.") return response def process_uploaded_video(video_path): print(f"Processing uploaded video: {video_path}") try: print("Starting transcription...") transcription = transcribe_audio(video_path) print(f"Transcription completed. Length: {len(transcription)} characters") return transcription, None except Exception as e: print(f"Error processing video: {e}") return f"Processing error: {str(e)}", None print("Setting up Gradio interface...") with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🎥 Video Transcription and Smart Summary Upload a video to get a transcription and AI-generated summary. """ ) with gr.Tabs(): with gr.TabItem("📤 Video Upload"): video_input = gr.Video(label="Drag and drop or click to upload") video_button = gr.Button("🚀 Process Video", variant="primary") with gr.Row(): with gr.Column(): transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True) with gr.Column(): summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True) summary_button = gr.Button("📝 Generate Summary", variant="secondary") gr.Markdown( """ ### How to use: 1. Upload a video. 2. Click 'Process' to get the transcription. 3. Click 'Generate Summary' to get a summary of the content. *Note: Processing may take a few minutes depending on the video length.* """ ) def process_video_and_update(video): if video is None: return "No video uploaded.", "Please upload a video." print(f"Video received: {video}") transcription, _ = process_uploaded_video(video) print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...") return transcription or "Transcription error", "" video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output]) summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output]) print("Launching Gradio interface...") demo.launch()