AliHaider0343's picture
Update app.py
175a254
raw
history blame
3.79 kB
import torch
import streamlit as st
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import nltk
from nltk.corpus import stopwords
import re
import string
nltk.download('stopwords')
nltk.download('punkt')
stop_words = set(stopwords.words('english'))
stop_words.discard('and')
def tokenize_sentences(sentence):
encoded_dict = tokenizer.encode_plus(
sentence,
add_special_tokens=True,
max_length=128,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return torch.cat([encoded_dict['input_ids']], dim=0), torch.cat([encoded_dict['attention_mask']], dim=0)
def remove_stop_words(sentence):
words = nltk.word_tokenize(sentence)
custom_words = ['recommend', 'having', 'Hello', 'best', 'restaurant', 'top', 'want', 'need', 'well', 'most', 'should', 'be', 'good', 'also']
stop_words.update(custom_words)
words_without_stopwords = [word for word in words if word.lower() not in stop_words]
sentence_without_stopwords = ' '.join(words_without_stopwords)
return sentence_without_stopwords
def preprocess_query(query):
query = str(query).lower()
query = query.strip()
query = remove_stop_words(query)
query=query.translate(str.maketrans("", "", string.punctuation))
return query
def predict_aspects(sentence, threshold):
input_ids, attention_mask = tokenize_sentences(sentence)
with torch.no_grad():
outputs = aspects_model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
predicted_aspects = torch.sigmoid(logits).squeeze().tolist()
results = dict()
for label, prediction in zip(LABEL_COLUMNS_ASPECTS, predicted_aspects):
if prediction < threshold:
continue
precentage = round(float(prediction) * 100, 2)
results[label] = precentage
return results
# Load tokenizer and model
BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION = 'roberta-large'
tokenizer = RobertaTokenizer.from_pretrained(BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION, do_lower_case=True)
LABEL_COLUMNS_ASPECTS = ['FOOD-CUISINE', 'FOOD-DEALS', 'FOOD-DIET_OPTION', 'FOOD-EXPERIENCE', 'FOOD-FLAVOR', 'FOOD-GENERAL', 'FOOD-INGREDIENT', 'FOOD-KITCHEN', 'FOOD-MEAL', 'FOOD-MENU', 'FOOD-PORTION', 'FOOD-PRESENTATION', 'FOOD-PRICE', 'FOOD-QUALITY', 'FOOD-RECOMMENDATION', 'FOOD-TASTE', 'GENERAL-GENERAL', 'RESTAURANT-ATMOSPHERE', 'RESTAURANT-BUILDING', 'RESTAURANT-DECORATION', 'RESTAURANT-EXPERIENCE', 'RESTAURANT-FEATURES', 'RESTAURANT-GENERAL', 'RESTAURANT-HYGIENE', 'RESTAURANT-KITCHEN', 'RESTAURANT-LOCATION', 'RESTAURANT-OPTIONS', 'RESTAURANT-RECOMMENDATION', 'RESTAURANT-SEATING_PLAN', 'RESTAURANT-VIEW', 'SERVICE-BEHAVIOUR', 'SERVICE-EXPERIENCE', 'SERVICE-GENERAL', 'SERVICE-WAIT_TIME']
aspects_model = RobertaForSequenceClassification.from_pretrained(BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION, num_labels=len(LABEL_COLUMNS_ASPECTS))
aspects_model.load_state_dict(torch.load('./Aspects_Extraction_Model_updated.pth', map_location=torch.device('cpu')))
aspects_model.eval()
# Streamlit App
st.title("Implicit and Explicit Aspect Extraction")
sentence = st.text_input("Enter a sentence:")
threshold = st.slider("Threshold", min_value=0.0, max_value=1.0, step=0.01, value=0.5)
if sentence:
processed_sentence = preprocess_query(sentence)
results = predict_aspects(processed_sentence, threshold)
if len(results) > 0:
st.write("Predicted Aspects:")
table_data = [["Aspect", "Probability"]]
for aspect, percentage in results.items():
aspect_parts = aspect.split("-")
table_data.append(aspect_parts + [f"{percentage}%"])
st.table(table_data)
else:
st.write("No aspects above the threshold.")