AzizTh's picture
Update app.py
5aa6ce6 verified
import pandas as pd
from sentence_transformers import SentenceTransformer
import gradio as gr
import spacy
import subprocess
# Run the spacy model download command
# try:
# Try to load the model to check if it's already installed
# nlp = spacy.load("en_core_web_trf")
# except OSError:
# If the model is not found, download it
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"])
nlp = spacy.load("en_core_web_trf")
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)
df_new = pd.read_csv('last_df.csv')
df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey')
#
#
# Function to extract city name from the query
def get_city_name(query):
text_query = nlp(query)
for city in text_query.ents:
if city.label_ == "GPE":
return city.text.lower()
return None
# Function to filter DataFrame by location
def filter_by_loc(query):
city_name = get_city_name(query)
if city_name in df_new['locality'].str.lower().unique():
filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()]
return filtered_df
else:
return df_new
import torch.nn as nn
import torch
import ast
# Function to calculate similarity score
def get_similarity_score(row, query_embedding):
similarity = nn.CosineSimilarity(dim=0) # Use dim=0 for 1D tensors
# Safely evaluate string representations of lists
rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding']))
hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding']))
review_embedding = torch.tensor(ast.literal_eval(row['review_embedding']))
sim1 = similarity(rating_value_embedding, query_embedding)
sim2 = similarity(hotel_combined_embedding, query_embedding)
sim3 = similarity(review_embedding, query_embedding)
return sim1.item() + sim2.item() + sim3.item()
# Main function to process the query and return results
def process_query(query):
query_embedding = model.encode(query)
# Filter DataFrame by location
filtered_data = filter_by_loc(query)
# Convert query_embedding to a tensor if it is not already
query_embedding_tensor = torch.tensor(query_embedding)
# Apply the similarity function to the filtered DataFrame
filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)
# df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)
top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1)
hotel_name = top_similar['hotel_name'].values[0]
hotel_description = top_similar['hotel_description'].values[0]
hotel_rate = top_similar['rate'].values[0]
hotel_price_range = top_similar['price_range'].values[0]
hotel_review = top_similar['review_title'].values[0]
hotel_city = top_similar['locality'].values[0]
hotel_country = top_similar['country'].values[0]
# Format the output
result = "Here's the most similar hotel we found:\n"
result += "-" * 30 + "\n"
result += f"Hotel Name: {hotel_name}\n"
result += f"City: {hotel_city}\n"
result += f"Country: {hotel_country}\n"
result += f"Star Rating: {hotel_rate}\n"
result += f"Price Range: {hotel_price_range}\n"
return result
ui = gr.Interface(
fn=process_query,
inputs=gr.Textbox(label="Query", placeholder="Enter your query"),
outputs="text",
title="Hotel Similarity Finder",
description="Enter a query to find similar hotels."
)
ui.launch()