import tempfile
import gradio as gr  
import googleapiclient.discovery
import re
import yt_dlp
import whisper
from pydub import AudioSegment
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
import openai
import json
import os
from pytube import YouTube
from pytrends.request import TrendReq
import torch
from urllib.parse import urlparse, parse_qs


def extract_video_id(url):
    """Extracts the video ID from a YouTube URL."""
    try:
        parsed_url = urlparse(url)
        if "youtube.com" in parsed_url.netloc:
            query_params = parse_qs(parsed_url.query)
            return query_params.get('v', [None])[0]
        elif "youtu.be" in parsed_url.netloc:
            return parsed_url.path.strip("/")
        else:
            print("Invalid YouTube URL.")
            return None
    except Exception as e:
        print(f"Error parsing URL: {e}")
        return None

def get_video_duration(video_id, api_key):
    """Fetches the video duration in minutes."""
    try:
        youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)
        request = youtube.videos().list(part="contentDetails", id=video_id)
        response = request.execute()
        if response["items"]:
            duration = response["items"][0]["contentDetails"]["duration"]
            match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', duration)
            hours = int(match.group(1)) if match.group(1) else 0
            minutes = int(match.group(2)) if match.group(2) else 0
            seconds = int(match.group(3)) if match.group(3) else 0
            return hours * 60 + minutes + seconds / 60
        else:
            print("No video details found.")
            return None
    except Exception as e:
        print(f"Error fetching video duration: {e}")
        return None

def download_and_transcribe_with_whisper(youtube_url):
    try:
        # Temporary directory for storing the downloaded audio
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_audio_file = os.path.join(temp_dir, "audio.mp4")  # Pytube downloads in mp4 format

            # Download audio using pytube
            yt = YouTube(youtube_url)
            audio_stream = yt.streams.filter(only_audio=True).first()  # Get the first available audio stream
            audio_stream.download(output_path=temp_dir, filename="audio.mp4")  # Download audio to temp dir

            # Convert the downloaded audio (mp4) to wav for Whisper
            audio = AudioSegment.from_file(temp_audio_file)
            wav_file = os.path.join(temp_dir, "audio.wav")
            audio.export(wav_file, format="wav")

            # Run Whisper transcription
            model = whisper.load_model("turbo")
            result = model.transcribe(wav_file)
            transcript = result['text']
            return transcript

    except Exception as e:
        print(f"Error during transcription: {e}")
        return None

def get_transcript_from_youtube_api(video_id, video_length):
    """Fetches transcript using YouTube API if available."""
    try:
        # Fetch available transcripts
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

        # Look for manually created transcripts first
        for transcript in transcript_list:
            if not transcript.is_generated:  # This checks for manually created transcripts
                manual_transcript = transcript.fetch()
                # Check if manual_transcript is iterable (should be a list)
                if isinstance(manual_transcript, list):
                    full_transcript = " ".join([segment['text'] for segment in manual_transcript])
                    return full_transcript  # Return manual transcript immediately
                else:
                    print("Manual transcript is not iterable.")
                    return None

        # If no manual transcript found, proceed to auto-generated transcript
        if video_length > 15:
            # Video is longer than 15 minutes, so use auto-generated transcript
            print("Video is longer than 15 minutes, using auto-generated transcript.")
            auto_transcript = transcript_list.find_generated_transcript(['en'])
            if auto_transcript:
                # Extract the text from the auto-generated transcript
                full_transcript = " ".join([segment['text'] for segment in auto_transcript.fetch()])
                return full_transcript  # Return auto-generated transcript
            else:
                print("No auto-generated transcript available.")
                return None

        else:
            # Video is shorter than 15 minutes, use Whisper for transcription
            print("Video is shorter than 15 minutes, using Whisper for transcription.")
            return None  # This will be handled by Whisper in your main function

    except Exception as e:
        print(f"Error fetching transcript: {e}")
        return None


def get_transcript(youtube_url, api_key):
    """Gets transcript from YouTube API or Whisper if unavailable."""
    video_id = youtube_url.split("v=")[-1]  # Extract the video ID from URL
    video_length = get_video_duration(video_id, api_key)

    if video_length is not None:
        print(f"Video length: {video_length} minutes.")

        # Fetch transcript using YouTube API
        transcript = get_transcript_from_youtube_api(video_id, video_length)

        # If a transcript is found from YouTube, use it
        if transcript:
            print("Transcript found.")
            return transcript
        else:
            # No transcript found from YouTube API, proceed with Whisper
            print("No transcript found on YouTube, using Whisper for transcription.")
            return download_and_transcribe_with_whisper(youtube_url)  # Use Whisper for short videos
    else:
        print("Error fetching video duration.")
        return None

def summarize_text_huggingface(text):
    """Summarizes text using a Hugging Face summarization model."""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
    max_input_length = 1024
    chunk_overlap = 100
    text_chunks = [
        text[i:i + max_input_length]
        for i in range(0, len(text), max_input_length - chunk_overlap)
    ]
    summaries = [
        summarizer(chunk, max_length=100, min_length=50, do_sample=False)[0]['summary_text']
        for chunk in text_chunks
    ]
    return " ".join(summaries)

def generate_optimized_content(api_key, summarized_transcript):
    openai.api_key = api_key

    prompt = f"""
    Analyze the following summarized YouTube video transcript and:
    1. Extract the top 10 keywords.
    2. Generate an optimized title (less than 65 characters).
    3. Create an engaging description.
    4. Generate related tags for the video.

    Summarized Transcript:
    {summarized_transcript}

    Provide the results in the following JSON format:
    {{
        "keywords": ["keyword1", "keyword2", ..., "keyword10"],
        "title": "Generated Title",
        "description": "Generated Description",
        "tags": ["tag1", "tag2", ..., "tag10"]
    }}
    """

    try:
        # Use the updated OpenAI API format for chat completions
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an SEO expert."},
                {"role": "user", "content": prompt}
            ]
        )
        # Extract and parse the response
        response_content = response['choices'][0]['message']['content']
        content = json.loads(response_content)
        return content

    except Exception as e:
        print(f"Error generating content: {e}")
        return None
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Add all your functions like `extract_video_id()`, `get_transcript()`, etc.

# Gradio Function for YouTube SEO
def youtube_seo_pipeline(youtube_url):
    print("Starting the SEO pipeline...")  # Debugging line
    
    if not YOUTUBE_API_KEY or not OPENAI_API_KEY:
        return "API keys missing! Please check environment variables."
    
    print("Extracting video ID...")
    video_id = extract_video_id(youtube_url)
    if not video_id:
        return "Invalid YouTube URL."
    
    print(f"Video ID: {video_id}")

    print("Fetching transcript...")
    transcript = get_transcript(youtube_url, YOUTUBE_API_KEY)
    print(transcript)
    if not transcript:
        return "Failed to fetch transcript. Try another video."
    
    print("Summarizing transcript...")
    summarized_text = summarize_text_huggingface(transcript)
    print(f"Summarized Text: {summarized_text[:200]}...")  # Show only the first 200 chars
    
    print("Generating optimized content...")
    optimized_content = generate_optimized_content(OPENAI_API_KEY, summarized_text)
    
    if optimized_content:
        return json.dumps(optimized_content, indent=4)
    else:
        return "Failed to generate SEO content."

# Define Gradio Interface
iface = gr.Interface(
    fn=youtube_seo_pipeline,
    inputs="text",
    outputs="text",
    title="YouTube SEO Optimizer",
    description="Enter a YouTube video URL to fetch and optimize SEO content (title, description, tags, and keywords)."
)

# Launch Gradio App
if __name__ == "__main__":
    iface.launch()