CodeHima commited on
Commit
ba8276b
Β·
verified Β·
1 Parent(s): d9d861d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -112
app.py CHANGED
@@ -1,113 +1,118 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import torch
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
- from utils.text_processing import extract_text_from_pdf, split_into_clauses
6
- from utils.model_utils import predict_unfairness
7
-
8
- # Set page title and favicon
9
- st.set_page_config(
10
- page_title="Terms of Service Analyzer",
11
- page_icon="πŸ“œ",
12
- layout="wide"
13
- )
14
-
15
- # Load model and tokenizer from Hugging Face
16
- @st.cache_resource
17
- def load_model():
18
- model = AutoModelForSequenceClassification.from_pretrained("CodeHima/Tos-Roberta")
19
- tokenizer = AutoTokenizer.from_pretrained("CodeHima/Tos-Roberta")
20
- return model, tokenizer
21
-
22
- model, tokenizer = load_model()
23
-
24
- st.title("πŸ“œ Terms of Service Analyzer")
25
-
26
- # File upload
27
- uploaded_file = st.file_uploader("Choose a PDF or text file", type=["pdf", "txt"])
28
-
29
- # Text input
30
- text_input = st.text_area("Or paste your Terms of Service here")
31
-
32
- if uploaded_file is not None or text_input:
33
- # Create a progress bar
34
- progress_bar = st.progress(0)
35
-
36
- # Create a status text
37
- status_text = st.empty()
38
-
39
- if uploaded_file is not None:
40
- status_text.text("Reading file...")
41
- progress_bar.progress(10)
42
- if uploaded_file.type == "application/pdf":
43
- text = extract_text_from_pdf(uploaded_file)
44
- else:
45
- text = uploaded_file.getvalue().decode("utf-8")
46
- else:
47
- text = text_input
48
-
49
- status_text.text("Splitting into clauses...")
50
- progress_bar.progress(30)
51
- clauses = split_into_clauses(text)
52
-
53
- results = []
54
- total_clauses = len(clauses)
55
-
56
- for i, clause in enumerate(clauses):
57
- status_text.text(f"Analyzing clause {i+1} of {total_clauses}...")
58
- # Update progress calculation to ensure it's always between 0 and 100
59
- progress = min(30 + int((i+1) / total_clauses * 60), 90)
60
- progress_bar.progress(progress)
61
- label, probabilities = predict_unfairness(clause, model, tokenizer)
62
- results.append({
63
- "clause": clause,
64
- "label": label,
65
- "probabilities": probabilities
66
- })
67
-
68
- status_text.text("Preparing results...")
69
- progress_bar.progress(100)
70
-
71
- df = pd.DataFrame(results)
72
-
73
- # Calculate summary
74
- total_clauses = len(df)
75
- clearly_fair = sum(df['label'] == 'clearly_fair')
76
- potentially_unfair = sum(df['label'] == 'potentially_unfair')
77
- clearly_unfair = sum(df['label'] == 'clearly_unfair')
78
-
79
- # Clear the progress bar and status text
80
- progress_bar.empty()
81
- status_text.empty()
82
-
83
- # Display summary
84
- st.header("Summary")
85
- col1, col2, col3 = st.columns(3)
86
- col1.metric("Clearly Fair", clearly_fair, f"{clearly_fair/total_clauses:.1%}")
87
- col2.metric("Potentially Unfair", potentially_unfair, f"{potentially_unfair/total_clauses:.1%}")
88
- col3.metric("Clearly Unfair", clearly_unfair, f"{clearly_unfair/total_clauses:.1%}")
89
-
90
- # Recommendation
91
- if clearly_unfair > 0 or potentially_unfair / total_clauses > 0.3:
92
- st.warning("⚠️ Exercise caution! This ToS contains unfair or potentially unfair clauses.")
93
- elif potentially_unfair > 0:
94
- st.info("ℹ️ Proceed with awareness. This ToS contains some potentially unfair clauses.")
95
- else:
96
- st.success("βœ… This ToS appears to be fair. Always read carefully nonetheless.")
97
-
98
- # Display results
99
- st.header("Detailed Analysis")
100
- for _, row in df.iterrows():
101
- if row['label'] == 'clearly_fair':
102
- st.success(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
103
- elif row['label'] == 'potentially_unfair':
104
- st.warning(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
105
- else:
106
- st.error(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
107
-
108
- st.write(f"Probabilities: Clearly Fair: {row['probabilities'][0]:.2f}, "
109
- f"Potentially Unfair: {row['probabilities'][1]:.2f}, "
110
- f"Clearly Unfair: {row['probabilities'][2]:.2f}")
111
- st.divider()
112
- else:
 
 
 
 
 
113
  st.info("Please upload a file or paste your Terms of Service to begin analysis.")
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import subprocess
6
+
7
+ # Ensure the spaCy model is downloaded
8
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
9
+
10
+ from utils.text_processing import extract_text_from_pdf, split_into_clauses
11
+ from utils.model_utils import predict_unfairness
12
+
13
+ # Set page title and favicon
14
+ st.set_page_config(
15
+ page_title="Terms of Service Analyzer",
16
+ page_icon="πŸ“œ",
17
+ layout="wide"
18
+ )
19
+
20
+ # Load model and tokenizer from Hugging Face
21
+ @st.cache_resource
22
+ def load_model():
23
+ model = AutoModelForSequenceClassification.from_pretrained("CodeHima/Tos-Roberta")
24
+ tokenizer = AutoTokenizer.from_pretrained("CodeHima/Tos-Roberta")
25
+ return model, tokenizer
26
+
27
+ model, tokenizer = load_model()
28
+
29
+ st.title("πŸ“œ Terms of Service Analyzer")
30
+
31
+ # File upload
32
+ uploaded_file = st.file_uploader("Choose a PDF or text file", type=["pdf", "txt"])
33
+
34
+ # Text input
35
+ text_input = st.text_area("Or paste your Terms of Service here")
36
+
37
+ if uploaded_file is not None or text_input:
38
+ # Create a progress bar
39
+ progress_bar = st.progress(0)
40
+
41
+ # Create a status text
42
+ status_text = st.empty()
43
+
44
+ if uploaded_file is not None:
45
+ status_text.text("Reading file...")
46
+ progress_bar.progress(10)
47
+ if uploaded_file.type == "application/pdf":
48
+ text = extract_text_from_pdf(uploaded_file)
49
+ else:
50
+ text = uploaded_file.getvalue().decode("utf-8")
51
+ else:
52
+ text = text_input
53
+
54
+ status_text.text("Splitting into clauses...")
55
+ progress_bar.progress(30)
56
+ clauses = split_into_clauses(text)
57
+
58
+ results = []
59
+ total_clauses = len(clauses)
60
+
61
+ for i, clause in enumerate(clauses):
62
+ status_text.text(f"Analyzing clause {i+1} of {total_clauses}...")
63
+ # Update progress calculation to ensure it's always between 0 and 100
64
+ progress = min(30 + int((i+1) / total_clauses * 60), 90)
65
+ progress_bar.progress(progress)
66
+ label, probabilities = predict_unfairness(clause, model, tokenizer)
67
+ results.append({
68
+ "clause": clause,
69
+ "label": label,
70
+ "probabilities": probabilities
71
+ })
72
+
73
+ status_text.text("Preparing results...")
74
+ progress_bar.progress(100)
75
+
76
+ df = pd.DataFrame(results)
77
+
78
+ # Calculate summary
79
+ total_clauses = len(df)
80
+ clearly_fair = sum(df['label'] == 'clearly_fair')
81
+ potentially_unfair = sum(df['label'] == 'potentially_unfair')
82
+ clearly_unfair = sum(df['label'] == 'clearly_unfair')
83
+
84
+ # Clear the progress bar and status text
85
+ progress_bar.empty()
86
+ status_text.empty()
87
+
88
+ # Display summary
89
+ st.header("Summary")
90
+ col1, col2, col3 = st.columns(3)
91
+ col1.metric("Clearly Fair", clearly_fair, f"{clearly_fair/total_clauses:.1%}")
92
+ col2.metric("Potentially Unfair", potentially_unfair, f"{potentially_unfair/total_clauses:.1%}")
93
+ col3.metric("Clearly Unfair", clearly_unfair, f"{clearly_unfair/total_clauses:.1%}")
94
+
95
+ # Recommendation
96
+ if clearly_unfair > 0 or potentially_unfair / total_clauses > 0.3:
97
+ st.warning("⚠️ Exercise caution! This ToS contains unfair or potentially unfair clauses.")
98
+ elif potentially_unfair > 0:
99
+ st.info("ℹ️ Proceed with awareness. This ToS contains some potentially unfair clauses.")
100
+ else:
101
+ st.success("βœ… This ToS appears to be fair. Always read carefully nonetheless.")
102
+
103
+ # Display results
104
+ st.header("Detailed Analysis")
105
+ for _, row in df.iterrows():
106
+ if row['label'] == 'clearly_fair':
107
+ st.success(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
108
+ elif row['label'] == 'potentially_unfair':
109
+ st.warning(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
110
+ else:
111
+ st.error(f"**{row['label'].replace('_', ' ').title()}:** {row['clause']}")
112
+
113
+ st.write(f"Probabilities: Clearly Fair: {row['probabilities'][0]:.2f}, "
114
+ f"Potentially Unfair: {row['probabilities'][1]:.2f}, "
115
+ f"Clearly Unfair: {row['probabilities'][2]:.2f}")
116
+ st.divider()
117
+ else:
118
  st.info("Please upload a file or paste your Terms of Service to begin analysis.")