Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline, AutoTokenizer | |
from PyPDF2 import PdfReader | |
import docx | |
import plotly.graph_objects as go | |
# Page configuration | |
st.set_page_config(layout="wide") | |
st.title("π AI Content Analyzer") | |
st.markdown("Upload PDF/Word files to detect AI-generated content") | |
# Constants | |
MAX_WORDS = 1000 # Maximum words to analyze | |
WARNING_THRESHOLD = 1200 # Warning threshold for large files | |
# Load AI detection model | |
def load_model(): | |
model_name = "roberta-base-openai-detector" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
return pipeline("text-classification", model=model_name, tokenizer=tokenizer) | |
detector = load_model() | |
def count_words(text): | |
return len(text.split()) | |
def create_gauge(score): | |
fig = go.Figure(go.Indicator( | |
mode = "gauge+number", | |
value = score, | |
domain = {'x': [0, 1], 'y': [0, 1]}, | |
title = {'text': "AI Content Probability", 'font': {'size': 20}}, | |
gauge = { | |
'axis': {'range': [None, 100], 'tickwidth': 1}, | |
'bar': {'color': "darkblue"}, | |
'steps': [ | |
{'range': [0, 50], 'color': 'green'}, | |
{'range': [50, 75], 'color': 'yellow'}, | |
{'range': [75, 100], 'color': 'red'}] | |
})) | |
st.plotly_chart(fig, use_container_width=True) | |
# File uploader | |
uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"]) | |
if uploaded_file: | |
# Extract text | |
text = "" | |
if uploaded_file.name.endswith(".pdf"): | |
reader = PdfReader(uploaded_file) | |
text = " ".join([page.extract_text() or "" for page in reader.pages]) | |
else: | |
doc = docx.Document(uploaded_file) | |
text = " ".join([para.text for para in doc.paragraphs]) | |
word_count = count_words(text) | |
# Word limit warning | |
if word_count > WARNING_THRESHOLD: | |
st.warning(f"β οΈ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)") | |
if st.button("Analyze Content"): | |
if word_count < 50: | |
st.error("β Insufficient text for analysis (minimum 50 words required)") | |
else: | |
# Process first 1000 words | |
processed_text = " ".join(text.split()[:MAX_WORDS]) | |
# Perform analysis | |
result = detector(processed_text) | |
ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100 | |
# Display results | |
st.subheader("Analysis Results") | |
create_gauge(ai_prob) | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}") | |
with col2: | |
st.metric("AI Probability", f"{ai_prob:.1f}%") | |
with st.expander("View Text Sample"): | |
st.text(processed_text[:1000] + ("..." if word_count>1000 else "")) |