import gradio as gr
from transformers import pipeline

# Load trained model
model_name = "DINGOLANI/distilbert-ner-v2"

try:
    nlp_ner = pipeline("token-classification", model=model_name, tokenizer=model_name)
except Exception as e:
    raise RuntimeError(f"Failed to load model: {e}")

# Mapping of entity labels to human-readable categories
label_map = {
    "LABEL_1": "Brand",
    "LABEL_2": "Brand",
    "LABEL_3": "Category",
    "LABEL_4": "Category",
    "LABEL_5": "Gender",
    "LABEL_6": "Price",
    "LABEL_7": "Price"
}

def parse_fashion_query(query):
    """
    Parses a fashion search query and extracts structured attributes.
    """
    result = nlp_ner(query)

    structured_output = {}
    prev_entity = None

    for label in result:
        entity = label["entity"]
        word = label["word"]
        readable_entity = label_map.get(entity)

        if not readable_entity:
            continue  # Skip unknown labels

        # Merge subwords (handling "##tokens")
        if word.startswith("##") and prev_entity == readable_entity:
            structured_output[readable_entity][-1] += word[2:]
        else:
            structured_output.setdefault(readable_entity, []).append(word)

        prev_entity = readable_entity

    return structured_output, result  # Returns structured and raw outputs

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🛍️ Luxury Fashion Query Parser")

    query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., Gucci handbags for women under $5000")
    parse_button = gr.Button("Analyze")

    with gr.Row():
        structured_output_box = gr.JSON(label="🔍 Structured Output (Human Readable)")
        raw_output_box = gr.JSON(label="🛠 Raw Model Output")

    parse_button.click(parse_fashion_query, inputs=[query_input], outputs=[structured_output_box, raw_output_box])

# Run Gradio app
demo.launch()