import streamlit as st from transformers import pipeline, AutoTokenizer from PyPDF2 import PdfReader import docx import plotly.graph_objects as go # Page configuration st.set_page_config(layout="wide") st.title("📄 AI Content Analyzer") st.markdown("Upload PDF/Word files to detect AI-generated content") # Constants MAX_WORDS = 1000 # Maximum words to analyze WARNING_THRESHOLD = 1200 # Warning threshold for large files # Load AI detection model @st.cache_resource def load_model(): model_name = "roberta-base-openai-detector" tokenizer = AutoTokenizer.from_pretrained(model_name) return pipeline("text-classification", model=model_name, tokenizer=tokenizer) detector = load_model() def count_words(text): return len(text.split()) def create_gauge(score): fig = go.Figure(go.Indicator( mode = "gauge+number", value = score, domain = {'x': [0, 1], 'y': [0, 1]}, title = {'text': "AI Content Probability", 'font': {'size': 20}}, gauge = { 'axis': {'range': [None, 100], 'tickwidth': 1}, 'bar': {'color': "darkblue"}, 'steps': [ {'range': [0, 50], 'color': 'green'}, {'range': [50, 75], 'color': 'yellow'}, {'range': [75, 100], 'color': 'red'}] })) st.plotly_chart(fig, use_container_width=True) # File uploader uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"]) if uploaded_file: # Extract text text = "" if uploaded_file.name.endswith(".pdf"): reader = PdfReader(uploaded_file) text = " ".join([page.extract_text() or "" for page in reader.pages]) else: doc = docx.Document(uploaded_file) text = " ".join([para.text for para in doc.paragraphs]) word_count = count_words(text) # Word limit warning if word_count > WARNING_THRESHOLD: st.warning(f"⚠️ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)") if st.button("Analyze Content"): if word_count < 50: st.error("❌ Insufficient text for analysis (minimum 50 words required)") else: # Process first 1000 words processed_text = " ".join(text.split()[:MAX_WORDS]) # Perform analysis result = detector(processed_text) ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100 # Display results st.subheader("Analysis Results") create_gauge(ai_prob) col1, col2 = st.columns(2) with col1: st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}") with col2: st.metric("AI Probability", f"{ai_prob:.1f}%") with st.expander("View Text Sample"): st.text(processed_text[:1000] + ("..." if word_count>1000 else ""))