Spaces:

DINGOLANI
/

testautosearch

Sleeping

App Files Files Community

DINGOLANI commited on Jan 28

Commit

50bd810

verified ·

1 Parent(s): 7efa999

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -1,31 +1,31 @@
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
 import pandas as pd
 from rapidfuzz import fuzz, process
 # Load the model
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 model = SentenceTransformer(model_name)
-# Load CSV data
-data_file = "Luxury_Products_Apparel_Data.csv"  # Ensure this file is uploaded
-try:
-    df = pd.read_csv(data_file, nrows=1000)  # Limit rows for testing
-except FileNotFoundError:
-    df = pd.DataFrame({
-        "ProductName": ["Gucci Sunglasses", "Nike Sneakers", "Louis Vuitton Handbag"],
-        "Category": ["Accessories", "Shoes"],
-        "SubCategory": ["Sunglasses", "Totes"]
-    })  # Fallback sample data
 # Extract relevant fields
-product_names = df["ProductName"].dropna().tolist()
 categories = df["Category"].dropna().unique().tolist()
-subcategories = df["SubCategory"].dropna().unique().tolist()
 # Merge into one dataset for autocomplete
-autocomplete_data = product_names + categories + subcategories
-autocomplete_data = [str(item).strip('"') for item in autocomplete_data]  # Clean text
 # Encode all items in the dataset into embeddings
 autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
@@ -34,20 +34,16 @@ autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True
 def find_synonym(word, top_n=1):
     query_embedding = model.encode(word, convert_to_tensor=True)
     results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
-    if results[0]:
-        return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]  # Only return synonyms with good confidence
-    return [word]  # Return original word if no synonym is found
 # Function to correct spellings
 def correct_spelling(word):
-    matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=3)
     if matches:
-        best_match, score, _ = matches[0]  # Extract the best match string & score
-        if score > 70:  # Lowered threshold to allow for more typo tolerance
             return best_match
-    return word  # Return the original word if no good match is found
 # Autocomplete function with tracking
 def autocomplete(query):
@@ -62,23 +58,20 @@ def autocomplete(query):
     matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
     suggestions = "\n".join([match[0] for match in matches])
-    # Detect if spelling correction or synonym replacement occurred
     correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
     synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"
-    # Return three separate values for Gradio
     return correction_status, synonym_status, suggestions
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("### AI-Powered Autocomplete with Spell Correction & Synonyms")
     query = gr.Textbox(label="Start typing for autocomplete")
     correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
     synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
     suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
-    # Bind function to UI
     query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
 demo.launch()

 import gradio as gr
+import kagglehub
 from sentence_transformers import SentenceTransformer, util
 import pandas as pd
 from rapidfuzz import fuzz, process
+import os
+# Download the latest version of the Vestiaire Fashion Dataset
+dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
+# Find the CSV file in the downloaded dataset folder
+csv_file = os.path.join(dataset_path, "vestiaire.csv")  # Adjust filename if needed
+# Load CSV
+df = pd.read_csv(csv_file, nrows=10000)  # Load first 10K rows for performance
 # Load the model
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 model = SentenceTransformer(model_name)
 # Extract relevant fields
+designers = df["Designer"].dropna().unique().tolist()
 categories = df["Category"].dropna().unique().tolist()
+products = df["Product"].dropna().tolist()
 # Merge into one dataset for autocomplete
+autocomplete_data = designers + categories + products
+autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
 # Encode all items in the dataset into embeddings
 autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
 def find_synonym(word, top_n=1):
     query_embedding = model.encode(word, convert_to_tensor=True)
     results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
+    return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
 # Function to correct spellings
 def correct_spelling(word):
+    matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=1)
     if matches:
+        best_match, score, _ = matches[0]
+        if score > 70:
             return best_match
+    return word
 # Autocomplete function with tracking
 def autocomplete(query):
     matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
     suggestions = "\n".join([match[0] for match in matches])
     correction_status = f"{original_query} → {corrected_query}" if original_query != corrected_query else "None"
     synonym_status = f"{corrected_query} → {synonym_query}" if corrected_query != synonym_query else "None"
     return correction_status, synonym_status, suggestions
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Vestiaire Dataset)")
     query = gr.Textbox(label="Start typing for autocomplete")
     correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
     synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
     suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
     query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
 demo.launch()