import gradio as gr import kagglehub from sentence_transformers import SentenceTransformer, util import pandas as pd from rapidfuzz import fuzz, process import os # Download dataset from Kaggl dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset") csv_file = os.path.join(dataset_path, "vestiaire.csv") # Load dataset and check column names df = pd.read_csv(csv_file, nrows=5) print("Column Names in Dataset:", df.columns) # Function to get the correct column name def get_column_name(possible_names, df): for name in possible_names: if name in df.columns: return name raise KeyError(f"None of the expected column names {possible_names} found in dataset. Available columns: {df.columns}") # Map column names dynamically designer_column = get_column_name(["brand_name"], df) category_column = get_column_name(["product_category"], df) # Load full dataset df = pd.read_csv(csv_file, nrows=10000) # Extract relevant data designer_data = df[designer_column].dropna().unique().tolist() category_data = df[category_column].dropna().unique().tolist() # Load the model model_name = "sentence-transformers/all-MiniLM-L6-v2" model = SentenceTransformer(model_name) # Function to find synonyms dynamically with fallback def find_synonym(word, top_n=1): query_embedding = model.encode(word, convert_to_tensor=True) combined_data = designer_data + category_data results = util.semantic_search(query_embedding, model.encode(combined_data, convert_to_tensor=True), top_k=top_n) # Check if results exist if results and len(results[0]) > 0: return [combined_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6] return [] # Return an empty list if no results # Function to correct spellings def correct_spelling(word): matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1) if matches: best_match, score, _ = matches[0] if score > 70: return best_match return word # Autocomplete function with safe handling of synonyms def autocomplete(query): if not query.strip(): return "None", "None", [], [] original_query = query.strip() corrected_query = correct_spelling(original_query) synonym_results = find_synonym(corrected_query, top_n=1) synonym_query = synonym_results[0] if synonym_results else corrected_query # Perform fuzzy matching for designers and categories separately designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5) category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5) # Extract top matches for designers and categories designer_suggestions = [match[0] for match in designer_matches] category_suggestions = [match[0] for match in category_matches] # Detect if spelling correction or synonym replacement occurred correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None" synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None" return correction_status, synonym_status, designer_suggestions, category_suggestions # Gradio UI with gr.Blocks() as demo: gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)") query = gr.Textbox(label="Start typing for autocomplete") correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False) synonym_output = gr.Textbox(label="Synonym Applied", interactive=False) designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False) category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False) query.change( fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, designer_output, category_output] ) demo.launch(share=True)