Spaces:

kkesarwani
/

SentimentAnalysis

Sleeping

App Files Files Community

SentimentAnalysis / helper_functions.py

kkesarwani

gru model for sentiment analysis

1a44e19 about 1 year ago

raw

history blame

2.16 kB

	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.preprocessing import LabelEncoder
	import spacy

	with open('label_names.txt', 'r') as f:
	labels = [emotion.strip() for emotion in f.readlines()]

	encoder= LabelEncoder()
	encoder.fit(labels)

	nlp = spacy.load("en_core_web_sm")


	def plot_pie_chart(data_frame: pd.DataFrame, title: str) -> None:
	"""
	Plot a pie chart to visualize label distribution in the provided DataFrame.

	Args:
	data_frame (pd.DataFrame): The DataFrame containing the data to visualize.
	title (str): The title for the pie chart.

	Returns:
	None
	"""
	label_count = data_frame['label'].value_counts()
	plt.figure(figsize=(8, 8))
	sns.set_style("whitegrid")
	plt.pie(label_count, labels=label_count.index, colors=sns.color_palette("hls", len(label_count.index)), autopct='%1.1f%%', startangle=90)
	plt.title(f"{title} Label Distribution")
	plt.show()
	plt.close()

	def preprocess_text(df: pd.DataFrame, emotions: list=['love', 'surprise']):
	"""
	Preprocesses text data in a DataFrame.

	Args:
	df (pd.DataFrame): DataFrame containing 'sentence' and 'label' columns.
	encoder (LabelEncoder): Label encoder for the labels.
	emotions (list): List of emotions to drop from the DataFrame.

	Returns:
	pd.DataFrame: DataFrame with preprocessed text and encoded labels.
	"""
	for i in emotions:
	df = df[df['label'] != i]

	df['processed_text'] = df['text'].apply(lambda x: ' '.join([token.lemma_ for token in nlp(x) if not token.is_stop and not token.is_punct and not token.is_space]))

	df['label_num'] = encoder.transform(df['label'])
	df.drop(columns=['text', 'label'], inplace=True)
	return df

	def preprocess_single_sentence(sentence):
	"""
	Preprocesses a single sentence.

	Args:
	sentence (str): Input sentence.

	Returns:
	str: Preprocessed and tokenized sentence.
	"""
	processed_text = ' '.join([token.lemma_ for token in nlp(sentence) if not token.is_stop and not token.is_punct and not token.is_space])
	return processed_text