Spaces:

andyqin18
/

sentiment-analysis-app

Sleeping

App Files Files Community

sentiment-analysis-app / app.py

andyqin18

Test Table

668f6af over 1 year ago

raw

history blame

3.48 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification


	fine_tuned_model = "andyqin18/test-finetuned"
	sample_text_num = 10

	# Define analyze function
	def analyze(model_name: str, text: str, top_k=1) -> dict:
	'''
	Output result of sentiment analysis of a text through a defined model
	'''
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
	return classifier(text)

	# App title
	st.title("Sentiment Analysis App - Milestone2")
	st.write("This app is to analyze the sentiments behind a text.")
	st.write("Currently it uses pre-trained models without fine-tuning.")

	# Model hub
	model_descrip = {
	fine_tuned_model: "This is a customized BERT-base finetuned model that detects multiple toxicity for a text. \
	Labels: toxic, severe_toxic, obscene, threat, insult, identity_hate",
	"distilbert-base-uncased-finetuned-sst-2-english": "This model is a fine-tune checkpoint of DistilBERT-base-uncased, fine-tuned on SST-2. \
	Labels: POSITIVE; NEGATIVE ",
	"cardiffnlp/twitter-roberta-base-sentiment": "This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark. \
	Labels: 0 -> Negative; 1 -> Neutral; 2 -> Positive",
	"finiteautomata/bertweet-base-sentiment-analysis": "Model trained with SemEval 2017 corpus (around ~40k tweets). Base model is BERTweet, a RoBERTa model trained on English tweets. \
	Labels: POS; NEU; NEG"
	}

	df = pd.read_csv("/milestone3/comp/test_comment.csv")
	test_texts = df["comment_text"].values
	sample_texts = np.random.choice(test_texts, size=sample_text_num, replace=False)

	init_table_dict = {
	"Text": [],
	"Highest Toxicity Class": [],
	"Highest Score": [],
	"Second Highest Toxicity Class": [],
	"Second Highest Score": []
	}

	for text in sample_texts:
	result = analyze(fine_tuned_model, text, top_k=2)
	init_table_dict["Text"].append(text[:50])
	init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
	init_table_dict["Highest Score"].append(result[0][0]['score'])
	init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
	init_table_dict["Second Highest Score"].append(result[0][1]['score'])


	user_input = st.text_input("Enter your text:", value="NYU is the better than Columbia.")
	user_model = st.selectbox("Please select a model:", model_descrip)

	# Display model information
	st.write("### Model Description:")
	st.write(model_descrip[user_model])




	# Perform analysis and print result
	if st.button("Analyze"):
	if not user_input:
	st.write("Please enter a text.")
	else:
	with st.spinner("Hang on.... Analyzing..."):
	if user_model == fine_tuned_model:
	result = analyze(user_model, user_input, top_k=2)


	df = pd.DataFrame(init_table_dict)
	st.dataframe(df)

	else:
	result = analyze(user_model, user_input)
	st.write("Result:")
	st.write(f"Label: {result[0]['label']}")
	st.write(f"Confidence Score: {result[0]['score']}")

	else:
	st.write("Go on! Try the app!")