Spaces:

DINGOLANI
/

testautosearch

Sleeping

App Files Files Community

testautosearch / app.py

DINGOLANI

Update app.py

2ae3444 verified 3 months ago

raw

history blame contribute delete

4.01 kB

	import gradio as gr
	import kagglehub
	from sentence_transformers import SentenceTransformer, util
	import pandas as pd
	from rapidfuzz import fuzz, process
	import os

	# Download dataset from Kaggl
	dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
	csv_file = os.path.join(dataset_path, "vestiaire.csv")

	# Load dataset and check column names
	df = pd.read_csv(csv_file, nrows=5)
	print("Column Names in Dataset:", df.columns)

	# Function to get the correct column name
	def get_column_name(possible_names, df):
	for name in possible_names:
	if name in df.columns:
	return name
	raise KeyError(f"None of the expected column names {possible_names} found in dataset. Available columns: {df.columns}")

	# Map column names dynamically
	designer_column = get_column_name(["brand_name"], df)
	category_column = get_column_name(["product_category"], df)

	# Load full dataset
	df = pd.read_csv(csv_file, nrows=10000)

	# Extract relevant data
	designer_data = df[designer_column].dropna().unique().tolist()
	category_data = df[category_column].dropna().unique().tolist()

	# Load the model
	model_name = "sentence-transformers/all-MiniLM-L6-v2"
	model = SentenceTransformer(model_name)

	# Function to find synonyms dynamically with fallback
	def find_synonym(word, top_n=1):
	query_embedding = model.encode(word, convert_to_tensor=True)
	combined_data = designer_data + category_data
	results = util.semantic_search(query_embedding, model.encode(combined_data, convert_to_tensor=True), top_k=top_n)
	# Check if results exist
	if results and len(results[0]) > 0:
	return [combined_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
	return [] # Return an empty list if no results

	# Function to correct spellings
	def correct_spelling(word):
	matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1)
	if matches:
	best_match, score, _ = matches[0]
	if score > 70:
	return best_match
	return word

	# Autocomplete function with safe handling of synonyms
	def autocomplete(query):
	if not query.strip():
	return "None", "None", [], []

	original_query = query.strip()
	corrected_query = correct_spelling(original_query)
	synonym_results = find_synonym(corrected_query, top_n=1)
	synonym_query = synonym_results[0] if synonym_results else corrected_query

	# Perform fuzzy matching for designers and categories separately
	designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5)
	category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5)

	# Extract top matches for designers and categories
	designer_suggestions = [match[0] for match in designer_matches]
	category_suggestions = [match[0] for match in category_matches]

	# Detect if spelling correction or synonym replacement occurred
	correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
	synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"

	return correction_status, synonym_status, designer_suggestions, category_suggestions

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)")

	query = gr.Textbox(label="Start typing for autocomplete")
	correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
	synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
	designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False)
	category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False)

	query.change(
	fn=autocomplete,
	inputs=query,
	outputs=[correction_output, synonym_output, designer_output, category_output]
	)

	demo.launch(share=True)