SentimentAnalysis / helper_functions.py
kkesarwani's picture
gru model for sentiment analysis
1a44e19
raw
history blame
2.16 kB
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import spacy
with open('label_names.txt', 'r') as f:
labels = [emotion.strip() for emotion in f.readlines()]
encoder= LabelEncoder()
encoder.fit(labels)
nlp = spacy.load("en_core_web_sm")
def plot_pie_chart(data_frame: pd.DataFrame, title: str) -> None:
"""
Plot a pie chart to visualize label distribution in the provided DataFrame.
Args:
data_frame (pd.DataFrame): The DataFrame containing the data to visualize.
title (str): The title for the pie chart.
Returns:
None
"""
label_count = data_frame['label'].value_counts()
plt.figure(figsize=(8, 8))
sns.set_style("whitegrid")
plt.pie(label_count, labels=label_count.index, colors=sns.color_palette("hls", len(label_count.index)), autopct='%1.1f%%', startangle=90)
plt.title(f"{title} Label Distribution")
plt.show()
plt.close()
def preprocess_text(df: pd.DataFrame, emotions: list=['love', 'surprise']):
"""
Preprocesses text data in a DataFrame.
Args:
df (pd.DataFrame): DataFrame containing 'sentence' and 'label' columns.
encoder (LabelEncoder): Label encoder for the labels.
emotions (list): List of emotions to drop from the DataFrame.
Returns:
pd.DataFrame: DataFrame with preprocessed text and encoded labels.
"""
for i in emotions:
df = df[df['label'] != i]
df['processed_text'] = df['text'].apply(lambda x: ' '.join([token.lemma_ for token in nlp(x) if not token.is_stop and not token.is_punct and not token.is_space]))
df['label_num'] = encoder.transform(df['label'])
df.drop(columns=['text', 'label'], inplace=True)
return df
def preprocess_single_sentence(sentence):
"""
Preprocesses a single sentence.
Args:
sentence (str): Input sentence.
Returns:
str: Preprocessed and tokenized sentence.
"""
processed_text = ' '.join([token.lemma_ for token in nlp(sentence) if not token.is_stop and not token.is_punct and not token.is_space])
return processed_text