Spaces:
Sleeping
Sleeping
File size: 4,007 Bytes
debeeab 50bd810 bfb7b53 f053894 90a841f 50bd810 2ae3444 50bd810 7930eed 50bd810 9373e7c 50bd810 7930eed 9373e7c 3f671f4 debeeab 9373e7c 3f671f4 9373e7c b718343 7930eed 90a841f 7930eed 9373e7c debeeab 335d436 bfb7b53 28c99b4 335d436 28c99b4 335d436 bfb7b53 7167762 7930eed 7efa999 50bd810 7efa999 50bd810 7167762 335d436 debeeab d52c941 7930eed 7167762 335d436 90a841f 7930eed 7167762 7930eed 7167762 0ede6b5 7930eed debeeab 7167762 debeeab 7930eed fd80bbd 411666d 7167762 7930eed fd80bbd 7930eed debeeab 335d436 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import kagglehub
from sentence_transformers import SentenceTransformer, util
import pandas as pd
from rapidfuzz import fuzz, process
import os
# Download dataset from Kaggl
dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
csv_file = os.path.join(dataset_path, "vestiaire.csv")
# Load dataset and check column names
df = pd.read_csv(csv_file, nrows=5)
print("Column Names in Dataset:", df.columns)
# Function to get the correct column name
def get_column_name(possible_names, df):
for name in possible_names:
if name in df.columns:
return name
raise KeyError(f"None of the expected column names {possible_names} found in dataset. Available columns: {df.columns}")
# Map column names dynamically
designer_column = get_column_name(["brand_name"], df)
category_column = get_column_name(["product_category"], df)
# Load full dataset
df = pd.read_csv(csv_file, nrows=10000)
# Extract relevant data
designer_data = df[designer_column].dropna().unique().tolist()
category_data = df[category_column].dropna().unique().tolist()
# Load the model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(model_name)
# Function to find synonyms dynamically with fallback
def find_synonym(word, top_n=1):
query_embedding = model.encode(word, convert_to_tensor=True)
combined_data = designer_data + category_data
results = util.semantic_search(query_embedding, model.encode(combined_data, convert_to_tensor=True), top_k=top_n)
# Check if results exist
if results and len(results[0]) > 0:
return [combined_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
return [] # Return an empty list if no results
# Function to correct spellings
def correct_spelling(word):
matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1)
if matches:
best_match, score, _ = matches[0]
if score > 70:
return best_match
return word
# Autocomplete function with safe handling of synonyms
def autocomplete(query):
if not query.strip():
return "None", "None", [], []
original_query = query.strip()
corrected_query = correct_spelling(original_query)
synonym_results = find_synonym(corrected_query, top_n=1)
synonym_query = synonym_results[0] if synonym_results else corrected_query
# Perform fuzzy matching for designers and categories separately
designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5)
category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5)
# Extract top matches for designers and categories
designer_suggestions = [match[0] for match in designer_matches]
category_suggestions = [match[0] for match in category_matches]
# Detect if spelling correction or synonym replacement occurred
correction_status = f"{original_query} β {corrected_query}" if original_query != corrected_query else "None"
synonym_status = f"{corrected_query} β {synonym_query}" if corrected_query != synonym_query else "None"
return correction_status, synonym_status, designer_suggestions, category_suggestions
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)")
query = gr.Textbox(label="Start typing for autocomplete")
correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False)
category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False)
query.change(
fn=autocomplete,
inputs=query,
outputs=[correction_output, synonym_output, designer_output, category_output]
)
demo.launch(share=True)
|