Spaces:

Kavinda2000003
/

GK_AI_Detector

Sleeping

App Files Files Community

GK_AI_Detector / app.py

Kavinda2000003

Update app.py

fb25f09 verified 19 days ago

raw

history blame contribute delete

2.99 kB

	import streamlit as st
	from transformers import pipeline, AutoTokenizer
	from PyPDF2 import PdfReader
	import docx
	import plotly.graph_objects as go

	# Page configuration
	st.set_page_config(layout="wide")
	st.title("📄 AI Content Analyzer")
	st.markdown("Upload PDF/Word files to detect AI-generated content")

	# Constants
	MAX_WORDS = 1000 # Maximum words to analyze
	WARNING_THRESHOLD = 1200 # Warning threshold for large files

	# Load AI detection model
	@st.cache_resource
	def load_model():
	model_name = "roberta-base-openai-detector"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	return pipeline("text-classification", model=model_name, tokenizer=tokenizer)

	detector = load_model()

	def count_words(text):
	return len(text.split())

	def create_gauge(score):
	fig = go.Figure(go.Indicator(
	mode = "gauge+number",
	value = score,
	domain = {'x': [0, 1], 'y': [0, 1]},
	title = {'text': "AI Content Probability", 'font': {'size': 20}},
	gauge = {
	'axis': {'range': [None, 100], 'tickwidth': 1},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [0, 50], 'color': 'green'},
	{'range': [50, 75], 'color': 'yellow'},
	{'range': [75, 100], 'color': 'red'}]
	}))
	st.plotly_chart(fig, use_container_width=True)

	# File uploader
	uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"])

	if uploaded_file:
	# Extract text
	text = ""
	if uploaded_file.name.endswith(".pdf"):
	reader = PdfReader(uploaded_file)
	text = " ".join([page.extract_text() or "" for page in reader.pages])
	else:
	doc = docx.Document(uploaded_file)
	text = " ".join([para.text for para in doc.paragraphs])

	word_count = count_words(text)

	# Word limit warning
	if word_count > WARNING_THRESHOLD:
	st.warning(f"⚠️ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)")

	if st.button("Analyze Content"):
	if word_count < 50:
	st.error("❌ Insufficient text for analysis (minimum 50 words required)")
	else:
	# Process first 1000 words
	processed_text = " ".join(text.split()[:MAX_WORDS])

	# Perform analysis
	result = detector(processed_text)
	ai_prob = result[0]['score']100 if result[0]['label']=='FAKE' else 100-result[0]['score']100

	# Display results
	st.subheader("Analysis Results")
	create_gauge(ai_prob)

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}")
	with col2:
	st.metric("AI Probability", f"{ai_prob:.1f}%")

	with st.expander("View Text Sample"):
	st.text(processed_text[:1000] + ("..." if word_count>1000 else ""))