Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import subprocess | |
# Ensure the spaCy model is downloaded | |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) | |
from utils.text_processing import extract_text_from_pdf, split_into_clauses | |
from utils.model_utils import predict_unfairness | |
# Set page title and favicon | |
st.set_page_config( | |
page_title="Terms of Service Analyzer", | |
page_icon="π", | |
layout="wide" | |
) | |
# Load model and tokenizer from Hugging Face | |
def load_model(): | |
model = AutoModelForSequenceClassification.from_pretrained("CodeHima/Tos-Roberta") | |
tokenizer = AutoTokenizer.from_pretrained("CodeHima/Tos-Roberta") | |
return model, tokenizer | |
model, tokenizer = load_model() | |
st.title("π Terms of Service Analyzer") | |
# File upload | |
uploaded_file = st.file_uploader("Choose a PDF or text file", type=["pdf", "txt"]) | |
# Text input | |
text_input = st.text_area("Or paste your Terms of Service here") | |
if uploaded_file is not None or text_input: | |
# Create a progress bar | |
progress_bar = st.progress(0) | |
# Create a status text | |
status_text = st.empty() | |
if uploaded_file is not None: | |
status_text.text("Reading file...") | |
progress_bar.progress(10) | |
if uploaded_file.type == "application/pdf": | |
text = extract_text_from_pdf(uploaded_file) | |
else: | |
text = uploaded_file.getvalue().decode("utf-8") | |
else: | |
text = text_input | |
status_text.text("Splitting into clauses...") | |
progress_bar.progress(30) | |
clauses = split_into_clauses(text) | |
results = [] | |
total_clauses = len(clauses) | |
for i, clause in enumerate(clauses): | |
status_text.text(f"Analyzing clause {i+1} of {total_clauses}...") | |
# Update progress calculation to ensure it's always between 0 and 100 | |
progress = min(30 + int((i+1) / total_clauses * 60), 90) | |
progress_bar.progress(progress) | |
label, probabilities = predict_unfairness(clause, model, tokenizer) | |
results.append({ | |
"clause": clause, | |
"label": label, | |
"probabilities": probabilities | |
}) | |
status_text.text("Preparing results...") | |
progress_bar.progress(100) | |
df = pd.DataFrame(results) | |
# Calculate summary | |
total_clauses = len(df) | |
clearly_fair = sum(df['label'] == 'clearly_fair') | |
potentially_unfair = sum(df['label'] == 'potentially_unfair') | |
clearly_unfair = sum(df['label'] == 'clearly_unfair') | |
# Clear the progress bar and status text | |
progress_bar.empty() | |
status_text.empty() | |
# Display summary | |
st.header("Summary") | |
col1, col2, col3 = st.columns(3) | |
col1.metric("Clearly Fair", clearly_fair, f"{clearly_fair/total_clauses:.1%}") | |
col2.metric("Potentially Unfair", potentially_unfair, f"{potentially_unfair/total_clauses:.1%}") | |
col3.metric("Clearly Unfair", clearly_unfair, f"{clearly_unfair/total_clauses:.1%}") | |
# Recommendation | |
if clearly_unfair > 0 or potentially_unfair / total_clauses > 0.3: | |
st.warning("β οΈ Exercise caution! This ToS contains unfair or potentially unfair clauses.") | |
elif potentially_unfair > 0: | |
st.info("βΉοΈ Proceed with awareness. This ToS contains some potentially unfair clauses.") | |
else: | |
st.success("β This ToS appears to be fair. Always read carefully nonetheless.") | |
# Display results | |
st.header("Detailed Analysis") | |
for _, row in df.iterrows(): | |
if row['label'] == 'clearly_fair': | |
st.success(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}") | |
elif row['label'] == 'potentially_unfair': | |
st.warning(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}") | |
else: | |
st.error(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}") | |
st.write(f"Probabilities: Clearly Fair: {row['probabilities'][0]:.2f}, " | |
f"Potentially Unfair: {row['probabilities'][1]:.2f}, " | |
f"Clearly Unfair: {row['probabilities'][2]:.2f}") | |
st.divider() | |
else: | |
st.info("Please upload a file or paste your Terms of Service to begin analysis.") |