Spaces:

AliHaider0343
/

implicit-and-explicit-aspects-Extraction-in-Restaurant-Reviews-Domain

Running

App Files Files Community

implicit-and-explicit-aspects-Extraction-in-Restaurant-Reviews-Domain / app.py

AliHaider0343

Update app.py

175a254 about 1 year ago

raw

history blame

3.79 kB

	import torch
	import streamlit as st
	from transformers import RobertaTokenizer, RobertaForSequenceClassification
	import nltk
	from nltk.corpus import stopwords
	import re
	import string

	nltk.download('stopwords')
	nltk.download('punkt')
	stop_words = set(stopwords.words('english'))
	stop_words.discard('and')


	def tokenize_sentences(sentence):
	encoded_dict = tokenizer.encode_plus(
	sentence,
	add_special_tokens=True,
	max_length=128,
	padding='max_length',
	truncation=True,
	return_attention_mask=True,
	return_tensors='pt'
	)
	return torch.cat([encoded_dict['input_ids']], dim=0), torch.cat([encoded_dict['attention_mask']], dim=0)

	def remove_stop_words(sentence):
	words = nltk.word_tokenize(sentence)
	custom_words = ['recommend', 'having', 'Hello', 'best', 'restaurant', 'top', 'want', 'need', 'well', 'most', 'should', 'be', 'good', 'also']
	stop_words.update(custom_words)
	words_without_stopwords = [word for word in words if word.lower() not in stop_words]
	sentence_without_stopwords = ' '.join(words_without_stopwords)
	return sentence_without_stopwords

	def preprocess_query(query):
	query = str(query).lower()
	query = query.strip()
	query = remove_stop_words(query)
	query=query.translate(str.maketrans("", "", string.punctuation))
	return query

	def predict_aspects(sentence, threshold):
	input_ids, attention_mask = tokenize_sentences(sentence)
	with torch.no_grad():
	outputs = aspects_model(input_ids, attention_mask=attention_mask)
	logits = outputs.logits
	predicted_aspects = torch.sigmoid(logits).squeeze().tolist()
	results = dict()
	for label, prediction in zip(LABEL_COLUMNS_ASPECTS, predicted_aspects):
	if prediction < threshold:
	continue
	precentage = round(float(prediction) * 100, 2)
	results[label] = precentage
	return results

	# Load tokenizer and model
	BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION = 'roberta-large'
	tokenizer = RobertaTokenizer.from_pretrained(BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION, do_lower_case=True)

	LABEL_COLUMNS_ASPECTS = ['FOOD-CUISINE', 'FOOD-DEALS', 'FOOD-DIET_OPTION', 'FOOD-EXPERIENCE', 'FOOD-FLAVOR', 'FOOD-GENERAL', 'FOOD-INGREDIENT', 'FOOD-KITCHEN', 'FOOD-MEAL', 'FOOD-MENU', 'FOOD-PORTION', 'FOOD-PRESENTATION', 'FOOD-PRICE', 'FOOD-QUALITY', 'FOOD-RECOMMENDATION', 'FOOD-TASTE', 'GENERAL-GENERAL', 'RESTAURANT-ATMOSPHERE', 'RESTAURANT-BUILDING', 'RESTAURANT-DECORATION', 'RESTAURANT-EXPERIENCE', 'RESTAURANT-FEATURES', 'RESTAURANT-GENERAL', 'RESTAURANT-HYGIENE', 'RESTAURANT-KITCHEN', 'RESTAURANT-LOCATION', 'RESTAURANT-OPTIONS', 'RESTAURANT-RECOMMENDATION', 'RESTAURANT-SEATING_PLAN', 'RESTAURANT-VIEW', 'SERVICE-BEHAVIOUR', 'SERVICE-EXPERIENCE', 'SERVICE-GENERAL', 'SERVICE-WAIT_TIME']

	aspects_model = RobertaForSequenceClassification.from_pretrained(BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION, num_labels=len(LABEL_COLUMNS_ASPECTS))
	aspects_model.load_state_dict(torch.load('./Aspects_Extraction_Model_updated.pth', map_location=torch.device('cpu')))
	aspects_model.eval()

	# Streamlit App
	st.title("Implicit and Explicit Aspect Extraction")

	sentence = st.text_input("Enter a sentence:")
	threshold = st.slider("Threshold", min_value=0.0, max_value=1.0, step=0.01, value=0.5)

	if sentence:
	processed_sentence = preprocess_query(sentence)
	results = predict_aspects(processed_sentence, threshold)
	if len(results) > 0:
	st.write("Predicted Aspects:")
	table_data = [["Aspect", "Probability"]]
	for aspect, percentage in results.items():
	aspect_parts = aspect.split("-")
	table_data.append(aspect_parts + [f"{percentage}%"])
	st.table(table_data)
	else:
	st.write("No aspects above the threshold.")