import gradio as gr
import kagglehub
from sentence_transformers import SentenceTransformer, util
import pandas as pd
from rapidfuzz import fuzz, process
import os

# Download dataset from Kaggl
dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
csv_file = os.path.join(dataset_path, "vestiaire.csv")

# Load dataset and check column names
df = pd.read_csv(csv_file, nrows=5)
print("Column Names in Dataset:", df.columns)

# Function to get the correct column name
def get_column_name(possible_names, df):
    for name in possible_names:
        if name in df.columns:
            return name
    raise KeyError(f"None of the expected column names {possible_names} found in dataset. Available columns: {df.columns}")

# Map column names dynamically
designer_column = get_column_name(["brand_name"], df)
category_column = get_column_name(["product_category"], df)

# Load full dataset
df = pd.read_csv(csv_file, nrows=10000)

# Extract relevant data
designer_data = df[designer_column].dropna().unique().tolist()
category_data = df[category_column].dropna().unique().tolist()

# Load the model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(model_name)

# Function to find synonyms dynamically with fallback
def find_synonym(word, top_n=1):
    query_embedding = model.encode(word, convert_to_tensor=True)
    combined_data = designer_data + category_data
    results = util.semantic_search(query_embedding, model.encode(combined_data, convert_to_tensor=True), top_k=top_n)
    # Check if results exist
    if results and len(results[0]) > 0:
        return [combined_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
    return []  # Return an empty list if no results

# Function to correct spellings
def correct_spelling(word):
    matches = process.extract(word, designer_data + category_data, scorer=fuzz.partial_ratio, limit=1)
    if matches:
        best_match, score, _ = matches[0]
        if score > 70:
            return best_match
    return word

# Autocomplete function with safe handling of synonyms
def autocomplete(query):
    if not query.strip():
        return "None", "None", [], []

    original_query = query.strip()
    corrected_query = correct_spelling(original_query)
    synonym_results = find_synonym(corrected_query, top_n=1)
    synonym_query = synonym_results[0] if synonym_results else corrected_query

    # Perform fuzzy matching for designers and categories separately
    designer_matches = process.extract(synonym_query, designer_data, scorer=fuzz.partial_ratio, limit=5)
    category_matches = process.extract(synonym_query, category_data, scorer=fuzz.partial_ratio, limit=5)

    # Extract top matches for designers and categories
    designer_suggestions = [match[0] for match in designer_matches]
    category_suggestions = [match[0] for match in category_matches]

    # Detect if spelling correction or synonym replacement occurred
    correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
    synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"

    return correction_status, synonym_status, designer_suggestions, category_suggestions

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Designers & Categories)")

    query = gr.Textbox(label="Start typing for autocomplete")
    correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
    synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
    designer_output = gr.Textbox(label="Designer Suggestions", lines=5, interactive=False)
    category_output = gr.Textbox(label="Category Suggestions", lines=5, interactive=False)

    query.change(
        fn=autocomplete,
        inputs=query,
        outputs=[correction_output, synonym_output, designer_output, category_output]
    )

demo.launch(share=True)