File size: 4,280 Bytes
34e855f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from utils.text_processing import extract_text_from_pdf, split_into_clauses
from utils.model_utils import predict_unfairness

# Set page title and favicon
st.set_page_config(
    page_title="Terms of Service Analyzer",
    page_icon="πŸ“œ",
    layout="wide"
)

# Load model and tokenizer from Hugging Face
@st.cache_resource
def load_model():
    model = AutoModelForSequenceClassification.from_pretrained("CodeHima/Tos-Roberta")
    tokenizer = AutoTokenizer.from_pretrained("CodeHima/Tos-Roberta")
    return model, tokenizer

model, tokenizer = load_model()

st.title("πŸ“œ Terms of Service Analyzer")

# File upload
uploaded_file = st.file_uploader("Choose a PDF or text file", type=["pdf", "txt"])

# Text input
text_input = st.text_area("Or paste your Terms of Service here")

if uploaded_file is not None or text_input:
    # Create a progress bar
    progress_bar = st.progress(0)
    
    # Create a status text
    status_text = st.empty()
    
    if uploaded_file is not None:
        status_text.text("Reading file...")
        progress_bar.progress(10)
        if uploaded_file.type == "application/pdf":
            text = extract_text_from_pdf(uploaded_file)
        else:
            text = uploaded_file.getvalue().decode("utf-8")
    else:
        text = text_input
    
    status_text.text("Splitting into clauses...")
    progress_bar.progress(30)
    clauses = split_into_clauses(text)
    
    results = []
    total_clauses = len(clauses)
    
    for i, clause in enumerate(clauses):
        status_text.text(f"Analyzing clause {i+1} of {total_clauses}...")
        # Update progress calculation to ensure it's always between 0 and 100
        progress = min(30 + int((i+1) / total_clauses * 60), 90)
        progress_bar.progress(progress)
        label, probabilities = predict_unfairness(clause, model, tokenizer)
        results.append({
            "clause": clause,
            "label": label,
            "probabilities": probabilities
        })
    
    status_text.text("Preparing results...")
    progress_bar.progress(100)
    
    df = pd.DataFrame(results)
    
    # Calculate summary
    total_clauses = len(df)
    clearly_fair = sum(df['label'] == 'clearly_fair')
    potentially_unfair = sum(df['label'] == 'potentially_unfair')
    clearly_unfair = sum(df['label'] == 'clearly_unfair')
    
    # Clear the progress bar and status text
    progress_bar.empty()
    status_text.empty()
    
    # Display summary
    st.header("Summary")
    col1, col2, col3 = st.columns(3)
    col1.metric("Clearly Fair", clearly_fair, f"{clearly_fair/total_clauses:.1%}")
    col2.metric("Potentially Unfair", potentially_unfair, f"{potentially_unfair/total_clauses:.1%}")
    col3.metric("Clearly Unfair", clearly_unfair, f"{clearly_unfair/total_clauses:.1%}")
    
    # Recommendation
    if clearly_unfair > 0 or potentially_unfair / total_clauses > 0.3:
        st.warning("⚠️ Exercise caution! This ToS contains unfair or potentially unfair clauses.")
    elif potentially_unfair > 0:
        st.info("ℹ️ Proceed with awareness. This ToS contains some potentially unfair clauses.")
    else:
        st.success("βœ… This ToS appears to be fair. Always read carefully nonetheless.")
    
    # Display results
    st.header("Detailed Analysis")
    for _, row in df.iterrows():
        if row['label'] == 'clearly_fair':
            st.success(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
        elif row['label'] == 'potentially_unfair':
            st.warning(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
        else:
            st.error(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
        
        st.write(f"Probabilities: Clearly Fair: {row['probabilities'][0]:.2f}, "
                 f"Potentially Unfair: {row['probabilities'][1]:.2f}, "
                 f"Clearly Unfair: {row['probabilities'][2]:.2f}")
        st.divider()
else:
    st.info("Please upload a file or paste your Terms of Service to begin analysis.")