import streamlit as st
from transformers import pipeline, AutoTokenizer
from PyPDF2 import PdfReader
import docx
import plotly.graph_objects as go

# Page configuration
st.set_page_config(layout="wide")
st.title("📄 AI Content Analyzer")
st.markdown("Upload PDF/Word files to detect AI-generated content")

# Constants
MAX_WORDS = 1000  # Maximum words to analyze
WARNING_THRESHOLD = 1200  # Warning threshold for large files

# Load AI detection model
@st.cache_resource
def load_model():
    model_name = "roberta-base-openai-detector"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return pipeline("text-classification", model=model_name, tokenizer=tokenizer)

detector = load_model()

def count_words(text):
    return len(text.split())

def create_gauge(score):
    fig = go.Figure(go.Indicator(
        mode = "gauge+number",
        value = score,
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': "AI Content Probability", 'font': {'size': 20}},
        gauge = {
            'axis': {'range': [None, 100], 'tickwidth': 1},
            'bar': {'color': "darkblue"},
            'steps': [
                {'range': [0, 50], 'color': 'green'},
                {'range': [50, 75], 'color': 'yellow'},
                {'range': [75, 100], 'color': 'red'}]
        }))
    st.plotly_chart(fig, use_container_width=True)

# File uploader
uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"])

if uploaded_file:
    # Extract text
    text = ""
    if uploaded_file.name.endswith(".pdf"):
        reader = PdfReader(uploaded_file)
        text = " ".join([page.extract_text() or "" for page in reader.pages])
    else:
        doc = docx.Document(uploaded_file)
        text = " ".join([para.text for para in doc.paragraphs])

    word_count = count_words(text)
    
    # Word limit warning
    if word_count > WARNING_THRESHOLD:
        st.warning(f"⚠️ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)")

    if st.button("Analyze Content"):
        if word_count < 50:
            st.error("❌ Insufficient text for analysis (minimum 50 words required)")
        else:
            # Process first 1000 words
            processed_text = " ".join(text.split()[:MAX_WORDS])
            
            # Perform analysis
            result = detector(processed_text)
            ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
            
            # Display results
            st.subheader("Analysis Results")
            create_gauge(ai_prob)
            
            col1, col2 = st.columns(2)
            with col1:
                st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}")
            with col2:
                st.metric("AI Probability", f"{ai_prob:.1f}%")
            
            with st.expander("View Text Sample"):
                st.text(processed_text[:1000] + ("..." if word_count>1000 else ""))