Spaces:
Running
on
Zero
Running
on
Zero
Vadim Borisov
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -31,12 +31,12 @@ def split_text(text, max_length=512):
|
|
31 |
sentences = text.split('. ')
|
32 |
chunks = []
|
33 |
current_chunk = ""
|
34 |
-
|
35 |
for sentence in sentences:
|
36 |
# Add the period back if it was removed during splitting
|
37 |
if not sentence.endswith('.'):
|
38 |
sentence += '.'
|
39 |
-
|
40 |
# Check if adding the sentence exceeds the max_length
|
41 |
encoded = tokenizer.encode(current_chunk + " " + sentence, truncation=False)
|
42 |
if len(encoded) > max_length:
|
@@ -52,38 +52,57 @@ def split_text(text, max_length=512):
|
|
52 |
current_chunk = ""
|
53 |
else:
|
54 |
current_chunk += " " + sentence
|
55 |
-
|
56 |
if current_chunk:
|
57 |
chunks.append(current_chunk.strip())
|
58 |
-
|
59 |
return chunks
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
@spaces.GPU
|
62 |
def analyze_sentiment(text, show_probabilities=False):
|
63 |
"""
|
64 |
Analyzes the sentiment of the input text. If the text exceeds the token limit,
|
65 |
-
it splits the text into chunks and aggregates the results.
|
66 |
"""
|
67 |
try:
|
68 |
chunks = split_text(text)
|
69 |
all_probabilities = []
|
70 |
-
all_predictions = []
|
71 |
detailed_results = ""
|
72 |
|
73 |
for idx, chunk in enumerate(chunks, 1):
|
74 |
inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
|
75 |
-
|
76 |
with torch.no_grad():
|
77 |
outputs = model(**inputs)
|
78 |
-
|
79 |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
|
80 |
predicted_class = probabilities.argmax()
|
81 |
predicted_sentiment = SENTIMENT_MAP[predicted_class]
|
82 |
confidence = probabilities[predicted_class]
|
83 |
-
|
84 |
all_probabilities.append(probabilities)
|
85 |
-
|
86 |
-
|
87 |
if show_probabilities:
|
88 |
detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
|
89 |
for cls, prob in zip(SENTIMENT_MAP.values(), probabilities):
|
@@ -91,20 +110,20 @@ def analyze_sentiment(text, show_probabilities=False):
|
|
91 |
detailed_results += "\n"
|
92 |
else:
|
93 |
detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
|
94 |
-
|
95 |
-
# Aggregate results
|
96 |
-
|
97 |
-
|
98 |
-
final_sentiment = SENTIMENT_MAP[final_class]
|
99 |
-
final_confidence = avg_probabilities[final_class]
|
100 |
-
|
101 |
result = f"**Overall Sentiment: {final_sentiment}**\nConfidence: {final_confidence:.2%}\n\n"
|
102 |
-
|
103 |
if show_probabilities:
|
104 |
result += "### Detailed Analysis:\n" + detailed_results
|
|
|
|
|
|
|
105 |
else:
|
106 |
result += "### Detailed Analysis:\n" + detailed_results
|
107 |
-
|
108 |
return result
|
109 |
except Exception as e:
|
110 |
return f"An error occurred during sentiment analysis: {str(e)}"
|
@@ -117,7 +136,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
117 |
Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: **Very Negative**, **Negative**, **Neutral**, **Positive**, or **Very Positive**.
|
118 |
"""
|
119 |
)
|
120 |
-
|
121 |
with gr.Row():
|
122 |
with gr.Column():
|
123 |
input_text = gr.Textbox(
|
@@ -132,27 +151,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
132 |
analyze_button = gr.Button("Analyze Sentiment")
|
133 |
with gr.Column():
|
134 |
output = gr.Markdown(label="Result")
|
135 |
-
|
136 |
with gr.Accordion("Examples", open=False):
|
137 |
examples = [
|
138 |
["I absolutely loved this movie! The acting was superb and the plot was engaging.", True],
|
139 |
["The service at this restaurant was terrible. I'll never go back.", False],
|
140 |
["The product works as expected. Nothing special, but it gets the job done.", True],
|
141 |
["I'm somewhat disappointed with my purchase. It's not as good as I hoped.", False],
|
142 |
-
["This book changed my life! I couldn't put it down and learned so much.", True]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
]
|
144 |
gr.Examples(
|
145 |
examples=examples,
|
146 |
inputs=[input_text, show_probs],
|
147 |
label="Predefined Examples"
|
148 |
)
|
149 |
-
|
150 |
analyze_button.click(
|
151 |
fn=analyze_sentiment,
|
152 |
inputs=[input_text, show_probs],
|
153 |
outputs=output
|
154 |
)
|
155 |
-
|
156 |
gr.Markdown(
|
157 |
"""
|
158 |
---
|
|
|
31 |
sentences = text.split('. ')
|
32 |
chunks = []
|
33 |
current_chunk = ""
|
34 |
+
|
35 |
for sentence in sentences:
|
36 |
# Add the period back if it was removed during splitting
|
37 |
if not sentence.endswith('.'):
|
38 |
sentence += '.'
|
39 |
+
|
40 |
# Check if adding the sentence exceeds the max_length
|
41 |
encoded = tokenizer.encode(current_chunk + " " + sentence, truncation=False)
|
42 |
if len(encoded) > max_length:
|
|
|
52 |
current_chunk = ""
|
53 |
else:
|
54 |
current_chunk += " " + sentence
|
55 |
+
|
56 |
if current_chunk:
|
57 |
chunks.append(current_chunk.strip())
|
58 |
+
|
59 |
return chunks
|
60 |
|
61 |
+
def aggregate_sentiments(all_probabilities, threshold=0.7):
|
62 |
+
"""
|
63 |
+
Aggregates the sentiment probabilities from all chunks.
|
64 |
+
Prioritizes extreme sentiments if any chunk has a high confidence in them.
|
65 |
+
Otherwise, uses weighted voting based on confidence scores.
|
66 |
+
"""
|
67 |
+
aggregated_probs = torch.tensor(all_probabilities).mean(dim=0).numpy()
|
68 |
+
aggregated_confidence = torch.tensor(all_probabilities).mean(dim=0).max().item()
|
69 |
+
predicted_class = aggregated_probs.argmax()
|
70 |
+
final_sentiment = SENTIMENT_MAP[predicted_class]
|
71 |
+
final_confidence = aggregated_probs[predicted_class]
|
72 |
+
|
73 |
+
# Check for extreme sentiments with high confidence
|
74 |
+
for idx, prob in enumerate(aggregated_probs):
|
75 |
+
if (idx == 0 or idx == 4) and prob > threshold:
|
76 |
+
final_sentiment = SENTIMENT_MAP[idx]
|
77 |
+
final_confidence = prob
|
78 |
+
break
|
79 |
+
|
80 |
+
return final_sentiment, final_confidence, aggregated_probs
|
81 |
+
|
82 |
@spaces.GPU
|
83 |
def analyze_sentiment(text, show_probabilities=False):
|
84 |
"""
|
85 |
Analyzes the sentiment of the input text. If the text exceeds the token limit,
|
86 |
+
it splits the text into chunks and aggregates the results intelligently.
|
87 |
"""
|
88 |
try:
|
89 |
chunks = split_text(text)
|
90 |
all_probabilities = []
|
|
|
91 |
detailed_results = ""
|
92 |
|
93 |
for idx, chunk in enumerate(chunks, 1):
|
94 |
inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
|
95 |
+
|
96 |
with torch.no_grad():
|
97 |
outputs = model(**inputs)
|
98 |
+
|
99 |
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
|
100 |
predicted_class = probabilities.argmax()
|
101 |
predicted_sentiment = SENTIMENT_MAP[predicted_class]
|
102 |
confidence = probabilities[predicted_class]
|
103 |
+
|
104 |
all_probabilities.append(probabilities)
|
105 |
+
|
|
|
106 |
if show_probabilities:
|
107 |
detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
|
108 |
for cls, prob in zip(SENTIMENT_MAP.values(), probabilities):
|
|
|
110 |
detailed_results += "\n"
|
111 |
else:
|
112 |
detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
|
113 |
+
|
114 |
+
# Aggregate results
|
115 |
+
final_sentiment, final_confidence, aggregated_probs = aggregate_sentiments(all_probabilities)
|
116 |
+
|
|
|
|
|
|
|
117 |
result = f"**Overall Sentiment: {final_sentiment}**\nConfidence: {final_confidence:.2%}\n\n"
|
118 |
+
|
119 |
if show_probabilities:
|
120 |
result += "### Detailed Analysis:\n" + detailed_results
|
121 |
+
result += "### Aggregated Probabilities:\n"
|
122 |
+
for cls, prob in zip(SENTIMENT_MAP.values(), aggregated_probs):
|
123 |
+
result += f"{cls}: {prob:.2%}\n"
|
124 |
else:
|
125 |
result += "### Detailed Analysis:\n" + detailed_results
|
126 |
+
|
127 |
return result
|
128 |
except Exception as e:
|
129 |
return f"An error occurred during sentiment analysis: {str(e)}"
|
|
|
136 |
Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: **Very Negative**, **Negative**, **Neutral**, **Positive**, or **Very Positive**.
|
137 |
"""
|
138 |
)
|
139 |
+
|
140 |
with gr.Row():
|
141 |
with gr.Column():
|
142 |
input_text = gr.Textbox(
|
|
|
151 |
analyze_button = gr.Button("Analyze Sentiment")
|
152 |
with gr.Column():
|
153 |
output = gr.Markdown(label="Result")
|
154 |
+
|
155 |
with gr.Accordion("Examples", open=False):
|
156 |
examples = [
|
157 |
["I absolutely loved this movie! The acting was superb and the plot was engaging.", True],
|
158 |
["The service at this restaurant was terrible. I'll never go back.", False],
|
159 |
["The product works as expected. Nothing special, but it gets the job done.", True],
|
160 |
["I'm somewhat disappointed with my purchase. It's not as good as I hoped.", False],
|
161 |
+
["This book changed my life! I couldn't put it down and learned so much.", True],
|
162 |
+
[
|
163 |
+
"""Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: Very Negative, Negative, Neutral, Positive, or Very Positive.
|
164 |
+
|
165 |
+
Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: Very Negative, Negative, Neutral, Positive, or Very Positive.
|
166 |
+
|
167 |
+
FUCK YOU BITCH""",
|
168 |
+
True
|
169 |
+
]
|
170 |
]
|
171 |
gr.Examples(
|
172 |
examples=examples,
|
173 |
inputs=[input_text, show_probs],
|
174 |
label="Predefined Examples"
|
175 |
)
|
176 |
+
|
177 |
analyze_button.click(
|
178 |
fn=analyze_sentiment,
|
179 |
inputs=[input_text, show_probs],
|
180 |
outputs=output
|
181 |
)
|
182 |
+
|
183 |
gr.Markdown(
|
184 |
"""
|
185 |
---
|