Spaces:

DINGOLANI
/

testautosearch

Sleeping

File size: 4,007 Bytes

debeeab
50bd810
bfb7b53
f053894
90a841f
50bd810
 
2ae3444
50bd810
7930eed
50bd810
9373e7c
 
 
50bd810
7930eed
9373e7c
 
 
 
3f671f4
debeeab
9373e7c
3f671f4
 
9373e7c
 
 
b718343
7930eed
 
 
90a841f
7930eed
9373e7c
 
debeeab
335d436
bfb7b53
 
28c99b4
 
335d436
 
28c99b4
335d436
bfb7b53
7167762
 
7930eed
7efa999
50bd810
 
7efa999
50bd810
7167762
335d436
debeeab
d52c941
7930eed
7167762
 
 
335d436
 
90a841f
7930eed
 
 
 
 
 
 
7167762
7930eed
7167762
 
0ede6b5
7930eed
debeeab
7167762
debeeab
7930eed
fd80bbd
411666d
7167762
 
7930eed
 
fd80bbd
7930eed
 
 
 
 
debeeab
335d436

import gradio as gr
import kagglehub
from sentence_transformers import SentenceTransformer, util
import pandas as pd
from rapidfuzz import fuzz, process
import os

# Download dataset from Kaggl
dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
csv_file = os.path.join(dataset_path, "vestiaire.csv")

# Load dataset and check column names
df = pd.read_csv(csv_file, nrows=5)
print("Column Names in Dataset:", df.columns)

# Function to get the correct column name
def get_column_name(possible_names, df):
    for name in possible_names:
        if name in df.columns:
            return name
    raise KeyError(f"None of the expected column names {possible_names} found in dataset. Available columns: {df.columns}")

# Map column names dynamically
designer_column = get_column_name(["brand_name"], df)
category_column = get_column_name(["product_category"], df)

# Load full dataset
df = pd.read_csv(csv_file, nrows=10000)

# Extract relevant data
designer_data = df[designer_column].dropna().unique().tolist()
category_data = df[category_column].dropna().unique().tolist()

# Load the model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(model_name)

# Function to find synonyms dynamically with fallback
def find_synonym(word, top_n=1):
    query_embedding = model.encode(word, convert_to_tensor=True)
    combined_data = designer_data + category_data
    results = util.semantic_search(query_embedding, model.encode(combined_data, convert_to_tensor=True), top_k=top_n)
    # Check if results exist
    if results and len(results[0]) > 0:
        return [combined_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
    return []  # Return an empty list if no results

# Function to correct spellings
def correct_spelling(word):
    matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1)
    if matches:
        best_match, score, _ = matches[0]
        if score > 70:
            return best_match
    return word

# Autocomplete function with safe handling of synonyms
def autocomplete(query):
    if not query.strip():
        return "None", "None", [], []

    original_query = query.strip()
    corrected_query = correct_spelling(original_query)
    synonym_results = find_synonym(corrected_query, top_n=1)
    synonym_query = synonym_results[0] if synonym_results else corrected_query

    # Perform fuzzy matching for designers and categories separately
    designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5)
    category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5)

    # Extract top matches for designers and categories
    designer_suggestions = [match[0] for match in designer_matches]
    category_suggestions = [match[0] for match in category_matches]

    # Detect if spelling correction or synonym replacement occurred
    correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
    synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"

    return correction_status, synonym_status, designer_suggestions, category_suggestions

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)")

    query = gr.Textbox(label="Start typing for autocomplete")
    correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
    synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
    designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False)
    category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False)

    query.change(
        fn=autocomplete,
        inputs=query,
        outputs=[correction_output, synonym_output, designer_output, category_output]
    )

demo.launch(share=True)