CodeHima commited on
Commit
34e855f
ยท
1 Parent(s): a20611a

chore: Add streamlit app for analyzing Terms of Service

Browse files
Files changed (2) hide show
  1. app.py +113 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ from utils.text_processing import extract_text_from_pdf, split_into_clauses
6
+ from utils.model_utils import predict_unfairness
7
+
8
+ # Set page title and favicon
9
+ st.set_page_config(
10
+ page_title="Terms of Service Analyzer",
11
+ page_icon="๐Ÿ“œ",
12
+ layout="wide"
13
+ )
14
+
15
+ # Load model and tokenizer from Hugging Face
16
+ @st.cache_resource
17
+ def load_model():
18
+ model = AutoModelForSequenceClassification.from_pretrained("CodeHima/Tos-Roberta")
19
+ tokenizer = AutoTokenizer.from_pretrained("CodeHima/Tos-Roberta")
20
+ return model, tokenizer
21
+
22
+ model, tokenizer = load_model()
23
+
24
+ st.title("๐Ÿ“œ Terms of Service Analyzer")
25
+
26
+ # File upload
27
+ uploaded_file = st.file_uploader("Choose a PDF or text file", type=["pdf", "txt"])
28
+
29
+ # Text input
30
+ text_input = st.text_area("Or paste your Terms of Service here")
31
+
32
+ if uploaded_file is not None or text_input:
33
+ # Create a progress bar
34
+ progress_bar = st.progress(0)
35
+
36
+ # Create a status text
37
+ status_text = st.empty()
38
+
39
+ if uploaded_file is not None:
40
+ status_text.text("Reading file...")
41
+ progress_bar.progress(10)
42
+ if uploaded_file.type == "application/pdf":
43
+ text = extract_text_from_pdf(uploaded_file)
44
+ else:
45
+ text = uploaded_file.getvalue().decode("utf-8")
46
+ else:
47
+ text = text_input
48
+
49
+ status_text.text("Splitting into clauses...")
50
+ progress_bar.progress(30)
51
+ clauses = split_into_clauses(text)
52
+
53
+ results = []
54
+ total_clauses = len(clauses)
55
+
56
+ for i, clause in enumerate(clauses):
57
+ status_text.text(f"Analyzing clause {i+1} of {total_clauses}...")
58
+ # Update progress calculation to ensure it's always between 0 and 100
59
+ progress = min(30 + int((i+1) / total_clauses * 60), 90)
60
+ progress_bar.progress(progress)
61
+ label, probabilities = predict_unfairness(clause, model, tokenizer)
62
+ results.append({
63
+ "clause": clause,
64
+ "label": label,
65
+ "probabilities": probabilities
66
+ })
67
+
68
+ status_text.text("Preparing results...")
69
+ progress_bar.progress(100)
70
+
71
+ df = pd.DataFrame(results)
72
+
73
+ # Calculate summary
74
+ total_clauses = len(df)
75
+ clearly_fair = sum(df['label'] == 'clearly_fair')
76
+ potentially_unfair = sum(df['label'] == 'potentially_unfair')
77
+ clearly_unfair = sum(df['label'] == 'clearly_unfair')
78
+
79
+ # Clear the progress bar and status text
80
+ progress_bar.empty()
81
+ status_text.empty()
82
+
83
+ # Display summary
84
+ st.header("Summary")
85
+ col1, col2, col3 = st.columns(3)
86
+ col1.metric("Clearly Fair", clearly_fair, f"{clearly_fair/total_clauses:.1%}")
87
+ col2.metric("Potentially Unfair", potentially_unfair, f"{potentially_unfair/total_clauses:.1%}")
88
+ col3.metric("Clearly Unfair", clearly_unfair, f"{clearly_unfair/total_clauses:.1%}")
89
+
90
+ # Recommendation
91
+ if clearly_unfair > 0 or potentially_unfair / total_clauses > 0.3:
92
+ st.warning("โš ๏ธ Exercise caution! This ToS contains unfair or potentially unfair clauses.")
93
+ elif potentially_unfair > 0:
94
+ st.info("โ„น๏ธ Proceed with awareness. This ToS contains some potentially unfair clauses.")
95
+ else:
96
+ st.success("โœ… This ToS appears to be fair. Always read carefully nonetheless.")
97
+
98
+ # Display results
99
+ st.header("Detailed Analysis")
100
+ for _, row in df.iterrows():
101
+ if row['label'] == 'clearly_fair':
102
+ st.success(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
103
+ elif row['label'] == 'potentially_unfair':
104
+ st.warning(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
105
+ else:
106
+ st.error(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
107
+
108
+ st.write(f"Probabilities: Clearly Fair: {row['probabilities'][0]:.2f}, "
109
+ f"Potentially Unfair: {row['probabilities'][1]:.2f}, "
110
+ f"Clearly Unfair: {row['probabilities'][2]:.2f}")
111
+ st.divider()
112
+ else:
113
+ st.info("Please upload a file or paste your Terms of Service to begin analysis.")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ torch
4
+ transformers
5
+ PyPDF2
6
+ spacy