import gradio as gr from transformers import pipeline # Load trained model model_name = "DINGOLANI/distilbert-ner-v2" try: nlp_ner = pipeline("token-classification", model=model_name, tokenizer=model_name) except Exception as e: raise RuntimeError(f"Failed to load model: {e}") # Mapping of entity labels to human-readable categories label_map = { "LABEL_1": "Brand", "LABEL_2": "Brand", "LABEL_3": "Category", "LABEL_4": "Category", "LABEL_5": "Gender", "LABEL_6": "Price", "LABEL_7": "Price" } def parse_fashion_query(query): """ Parses a fashion search query and extracts structured attributes. """ result = nlp_ner(query) structured_output = {} prev_entity = None for label in result: entity = label["entity"] word = label["word"] readable_entity = label_map.get(entity) if not readable_entity: continue # Skip unknown labels # Merge subwords (handling "##tokens") if word.startswith("##") and prev_entity == readable_entity: structured_output[readable_entity][-1] += word[2:] else: structured_output.setdefault(readable_entity, []).append(word) prev_entity = readable_entity return structured_output, result # Returns structured and raw outputs # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🛍️ Luxury Fashion Query Parser") query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., Gucci handbags for women under $5000") parse_button = gr.Button("Analyze") with gr.Row(): structured_output_box = gr.JSON(label="🔍 Structured Output (Human Readable)") raw_output_box = gr.JSON(label="🛠 Raw Model Output") parse_button.click(parse_fashion_query, inputs=[query_input], outputs=[structured_output_box, raw_output_box]) # Run Gradio app demo.launch()