Spaces:

BulatF
/

StreamlitSentiment

Runtime error

App Files Files Community

BulatF commited on Jul 5, 2023

Commit

768bcdc

•

1 Parent(s): 2fd93e8

Upload app.py

Browse files

Files changed (1) hide show

app.py +30 -20

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import streamlit as st
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch.nn.functional as F
 import torch
 import io
@@ -18,9 +20,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
 st.set_page_config(layout="wide")
 # Import the new model and tokenizer
-class_model_name = 'facebook/bart-large-mnli'
-class_model = AutoModelForSequenceClassification.from_pretrained(class_model_name)
-class_tokenizer = AutoTokenizer.from_pretrained(class_model_name)
 #defs
@@ -43,13 +44,20 @@ def get_table_download_link(df):
 # Function for classifying with the new model
-def classify_with_new_classes(reviews, class_name):
-    inputs = class_tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
-    outputs = class_model(**inputs)
-    probabilities = F.softmax(outputs.logits, dim=1).tolist()
-    class_scores = [prob[1] for prob in probabilities]  # Assuming binary classification
     return class_scores
 def main():
     st.title('Sentiment Analysis')
     st.markdown('Upload an Excel file to get sentiment analytics')
@@ -77,19 +85,21 @@ def main():
     start_button = st.button('Start Analysis')
     if start_button and df is not None:
         # Drop rows with NaN or blank values in the review_column
         df = df[df[review_column].notna()]
         df = df[df[review_column].str.strip() != '']
         class_names = [name.strip() for name in class_names.split(',')]  # Split class names into a list
         for name in class_names:  # Add a new column for each class name
-            df[name] = 0.0
         if review_column in df.columns:
             with st.spinner('Performing sentiment analysis...'):
                 df, df_display = process_reviews(df, review_column, class_names)
             display_ratings(df, review_column)  # updated this line
             display_dataframe(df, df_display)
         else:
@@ -114,13 +124,14 @@ def process_reviews(df, review_column, class_names):
             raw_scores.extend(batch_scores)
             review_counter += len(batch_reviews)
             progress_bar.progress(review_counter / total_reviews)
     class_scores_dict = {}  # New dictionary to store class scores
-    for name in class_names:
-        with st.spinner(f'Generating classes for {name}...'):
-            class_scores = classify_with_new_classes(df[review_column].tolist(), name)
-            df[name] = class_scores
-            class_scores_dict[name] = class_scores  # Store class scores in the dictionary
     # Add a new column with the class that has the highest score
     df['Highest Class'] = df[class_names].idxmax(axis=1)
@@ -131,7 +142,7 @@ def process_reviews(df, review_column, class_names):
     df_display = scores_to_percent(df_new.copy())
     # Get all columns excluding the created ones and the review_column
-    remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class']]
     # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
     df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + class_names + ['Highest Class'] + remaining_columns]
@@ -144,7 +155,6 @@ def process_reviews(df, review_column, class_names):
 def scores_to_df(df):
     for i in range(1, 6):
         df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)

 import streamlit as st
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import pipeline
 import torch.nn.functional as F
 import torch
 import io
 st.set_page_config(layout="wide")
 # Import the new model and tokenizer
+classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 #defs
 # Function for classifying with the new model
+def classify_with_new_classes(reviews, class_names):
+    class_scores = []
+    for review in reviews:
+        result = classifier(review, class_names)
+        scores_dict = dict(zip(result['labels'], result['scores']))
+        # Reorder scores to match the original class_names order
+        scores = [scores_dict[name] for name in class_names]
+        class_scores.append(scores)
     return class_scores
 def main():
     st.title('Sentiment Analysis')
     st.markdown('Upload an Excel file to get sentiment analytics')
     start_button = st.button('Start Analysis')
     if start_button and df is not None:
         # Drop rows with NaN or blank values in the review_column
         df = df[df[review_column].notna()]
         df = df[df[review_column].str.strip() != '']
         class_names = [name.strip() for name in class_names.split(',')]  # Split class names into a list
         for name in class_names:  # Add a new column for each class name
+            if name not in df.columns:
+                df[name] = 0.0
         if review_column in df.columns:
             with st.spinner('Performing sentiment analysis...'):
                 df, df_display = process_reviews(df, review_column, class_names)
             display_ratings(df, review_column)  # updated this line
             display_dataframe(df, df_display)
         else:
             raw_scores.extend(batch_scores)
             review_counter += len(batch_reviews)
             progress_bar.progress(review_counter / total_reviews)
+    with st.spinner('Generating classes...'):
+        class_scores = classify_with_new_classes(df[review_column].tolist(), class_names)
     class_scores_dict = {}  # New dictionary to store class scores
+    for i, name in enumerate(class_names):
+        df[name] = [score[i] for score in class_scores]
+        class_scores_dict[name] = [score[i] for score in class_scores]
     # Add a new column with the class that has the highest score
     df['Highest Class'] = df[class_names].idxmax(axis=1)
     df_display = scores_to_percent(df_new.copy())
     # Get all columns excluding the created ones and the review_column
+    remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + class_names]
     # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
     df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + class_names + ['Highest Class'] + remaining_columns]
 def scores_to_df(df):
     for i in range(1, 6):
         df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)