Spaces:

DINGOLANI
/

QueryAnalyzerV2

Runtime error

App Files Files Community

DINGOLANI commited on 23 days ago

Commit

57b32e0

verified ·

1 Parent(s): 736b778

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -30

app.py CHANGED Viewed

@@ -1,47 +1,45 @@
 import gradio as gr
 import torch
-from transformers import CLIPProcessor, CLIPModel
 import re
-# Load FashionCLIP model
-model_name = "patrickjohncyh/fashion-clip"
-model = CLIPModel.from_pretrained(model_name)
-processor = CLIPProcessor.from_pretrained(model_name)
-# Regex for price extraction
 price_pattern = re.compile(r'(\bunder\b|\babove\b|\bbelow\b|\bbetween\b)?\s?(\d{1,5})\s?(AED|USD|EUR)?', re.IGNORECASE)
-def get_text_embedding(text_list):
-    """
-    Converts a list of input texts into embeddings using FashionCLIP.
-    """
-    inputs = processor(text=text_list, return_tensors="pt", padding=True)  # Corrected input format
-    with torch.no_grad():
-        text_embedding = model.get_text_features(**inputs)
-    return text_embedding
 def extract_attributes(query):
     """
-    Extract structured fashion attributes dynamically using FashionCLIP.
     """
     structured_output = {"Brand": "Unknown", "Category": "Unknown", "Gender": "Unknown", "Price": "Unknown"}
-    # Get embedding for the query
-    query_embedding = get_text_embedding([query])
-    # Reference labels for classification
-    reference_labels = ["Brand", "Category", "Gender", "Price"]
-    reference_embeddings = get_text_embedding(reference_labels)
-    # Compute cosine similarity
-    similarities = torch.nn.functional.cosine_similarity(query_embedding, reference_embeddings)
-    best_match_index = similarities.argmax().item()
-    # Assign attribute dynamically
-    attribute_type = reference_labels[best_match_index]
-    structured_output[attribute_type] = query  # Assigns the query text to the detected attribute
-    # Extract price dynamically
     price_match = price_pattern.search(query)
     if price_match:
         condition, amount, currency = price_match.groups()
@@ -52,13 +50,14 @@ def extract_attributes(query):
 # Define Gradio UI
 def parse_query(user_query):
     """
-    Takes user query and returns structured attributes dynamically.
     """
     parsed_output = extract_attributes(user_query)
-    return parsed_output  # Returns structured JSON
 with gr.Blocks() as demo:
-    gr.Markdown("# 🛍️ Fashion Query Parser using FashionCLIP")
     query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., Gucci men’s perfume under 200AED")
     output_box = gr.JSON(label="Parsed Output")

 import gradio as gr
 import torch
 import re
+from transformers import pipeline
+# Load fine-tuned NER model from Hugging Face Hub
+model_name = "luxury-fashion-ner"
+ner_pipeline = pipeline("ner", model=model_name, tokenizer=model_name)
+# Regex for extracting price
 price_pattern = re.compile(r'(\bunder\b|\babove\b|\bbelow\b|\bbetween\b)?\s?(\d{1,5})\s?(AED|USD|EUR)?', re.IGNORECASE)
+# Keywords for gender extraction
+gender_keywords = ["men", "male", "women", "female", "unisex"]
 def extract_attributes(query):
     """
+    Extract structured fashion attributes dynamically using the fine-tuned NER model.
     """
     structured_output = {"Brand": "Unknown", "Category": "Unknown", "Gender": "Unknown", "Price": "Unknown"}
+    # Run NER model on query
+    entities = ner_pipeline(query)
+    for entity in entities:
+        entity_text = entity["word"].replace("##", "")  # Fix tokenization artifacts
+        entity_label = entity["entity"]
+        if "ORG" in entity_label:  # Organization = Brand
+            structured_output["Brand"] = entity_text
+        elif "MISC" in entity_label:  # Miscellaneous = Category
+            structured_output["Category"] = entity_text
+        elif "LOC" in entity_label:  # Locations (sometimes used for brands)
+            structured_output["Brand"] = entity_text
+    # Extract gender
+    for gender in gender_keywords:
+        if gender in query.lower():
+            structured_output["Gender"] = gender.capitalize()
+            break
+    # Extract price
     price_match = price_pattern.search(query)
     if price_match:
         condition, amount, currency = price_match.groups()
 # Define Gradio UI
 def parse_query(user_query):
     """
+    Parses fashion-related queries into structured attributes.
     """
     parsed_output = extract_attributes(user_query)
+    return parsed_output  # JSON output
+# Create Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 🛍️ Luxury Fashion Query Parser using Fine-Tuned NER Model")
     query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., Gucci men’s perfume under 200AED")
     output_box = gr.JSON(label="Parsed Output")