import gradio as gr from transformers import pipeline from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import make_pipeline from sklearn.model_selection import train_test_split from sklearn import metrics import pandas as pd from transformers.utils import logging logging.set_verbosity("ERROR") # Load the provided dataset file_path = 'data.csv' df = pd.read_csv(file_path) # Split data into training and test sets X_train, X_test, y_train, y_test = train_test_split(df['Sentence'], df['Sentiment'], test_size=0.2, random_state=42) # Define models nb_model = make_pipeline(TfidfVectorizer(), MultinomialNB()) svm_model = make_pipeline(TfidfVectorizer(), SVC(probability=True)) rf_model = make_pipeline(TfidfVectorizer(), RandomForestClassifier()) # Train models nb_model.fit(X_train, y_train) svm_model.fit(X_train, y_train) rf_model.fit(X_train, y_train) # Define sentences to choose from sentences = [ "The announced restructuring will erase the company's indebtedness.", "UPM-Kymmene upgraded to `in-line' from `underperform' by Goldman Sachs.", "Profitability (in EBIT %) was not impressive due to expenses rising by 14.3%.", "The Finnish bank has issued a profit warning.", "TeliaSonera's underlying results however included 457 mln SKr in positive one-offs, hence the adjusted underlying EBITDA actually amounts to 7.309 bln SKr, clearly below expectations, analysts said." ] # Function to map BERT labels def map_bert_label(label): if label in ["1 star", "2 stars"]: return "negative" elif label == "3 stars": return "neutral" elif label in ["4 stars", "5 stars"]: return "positive" # Function to analyze sentiment def analyze_sentiment(sentence): # Define model paths model_paths = { "BERT": "nlptown/bert-base-multilingual-uncased-sentiment", } # Analyze sentiment using transformers models results = {} for model_name, model_path in model_paths.items(): sentiment_analyzer = pipeline("sentiment-analysis", model=model_path) result = sentiment_analyzer(sentence[:512])[0] # Analyze first 512 characters for brevity if model_name == "BERT": result['label'] = map_bert_label(result['label']) results[model_name] = result # Analyze sentiment using sklearn models results["Naive Bayes"] = {"label": nb_model.predict([sentence])[0], "score": nb_model.predict_proba([sentence]).max()} results["SVM"] = {"label": svm_model.predict([sentence])[0], "score": svm_model.predict_proba([sentence]).max()} results["Random Forest"] = {"label": rf_model.predict([sentence])[0], "score": rf_model.predict_proba([sentence]).max()} return sentence, results # Define custom CSS with slightly larger font size custom_css = """ .gradio-container, .gradio-container * { font-size: 0.65rem !important; } .gradio-container h1 { font-size: 1.1rem !important; } .gradio-container h2, .gradio-container h3 { font-size: 0.9rem !important; } .gradio-container .label { font-size: 0.75rem !important; } .gradio-container .output-markdown pre { font-size: 0.6rem !important; } """ # Create Gradio interface with custom CSS with gr.Blocks(css=custom_css) as demo: gr.Markdown("# Compare Sentiment Analysis Across Models") gr.Markdown("Select a sentence to see sentiment analysis results from multiple models.") dropdown = gr.Dropdown(choices=sentences, label="Select Sentence") text_output = gr.Textbox(label="Selected Sentence", lines=2) sentiment_output = gr.JSON(label="Sentiment Scores") dropdown.change(analyze_sentiment, inputs=[dropdown], outputs=[text_output, sentiment_output]) demo.launch()