Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from sentence_transformers import SentenceTransformer
|
3 |
import pandas as pd
|
4 |
from rapidfuzz import fuzz, process
|
5 |
|
@@ -13,9 +13,9 @@ try:
|
|
13 |
df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
|
14 |
except FileNotFoundError:
|
15 |
df = pd.DataFrame({
|
16 |
-
"ProductName": ["Gucci
|
17 |
-
"Category": ["
|
18 |
-
"SubCategory": ["
|
19 |
}) # Fallback sample data
|
20 |
|
21 |
# Extract relevant fields
|
@@ -25,24 +25,35 @@ subcategories = df["SubCategory"].dropna().unique().tolist()
|
|
25 |
|
26 |
# Merge into one dataset for autocomplete
|
27 |
autocomplete_data = product_names + categories + subcategories
|
|
|
28 |
|
29 |
-
#
|
30 |
-
|
31 |
|
32 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
def autocomplete(query):
|
34 |
if not query.strip():
|
35 |
return [] # Avoid empty queries
|
36 |
|
37 |
-
#
|
38 |
-
|
|
|
|
|
|
|
39 |
|
40 |
-
# Return list of suggestions
|
41 |
return [match[0] for match in matches]
|
42 |
|
43 |
# Gradio interface
|
44 |
with gr.Blocks() as demo:
|
45 |
-
gr.Markdown("###
|
46 |
|
47 |
query = gr.Textbox(label="Start typing for autocomplete")
|
48 |
autocomplete_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
|
|
|
1 |
import gradio as gr
|
2 |
+
from sentence_transformers import SentenceTransformer, util
|
3 |
import pandas as pd
|
4 |
from rapidfuzz import fuzz, process
|
5 |
|
|
|
13 |
df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
|
14 |
except FileNotFoundError:
|
15 |
df = pd.DataFrame({
|
16 |
+
"ProductName": ["Gucci Sunglasses", "Nike Sneakers", "Louis Vuitton Handbag"],
|
17 |
+
"Category": ["Accessories", "Shoes"],
|
18 |
+
"SubCategory": ["Sunglasses", "Totes"]
|
19 |
}) # Fallback sample data
|
20 |
|
21 |
# Extract relevant fields
|
|
|
25 |
|
26 |
# Merge into one dataset for autocomplete
|
27 |
autocomplete_data = product_names + categories + subcategories
|
28 |
+
autocomplete_data = [str(item).strip('"') for item in autocomplete_data] # Clean text
|
29 |
|
30 |
+
# Encode all items in the dataset into embeddings
|
31 |
+
autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
|
32 |
|
33 |
+
# Synonym Expansion Function
|
34 |
+
def find_synonym(word, top_n=1):
|
35 |
+
"""Finds the most similar word in the dataset based on embeddings."""
|
36 |
+
query_embedding = model.encode(word, convert_to_tensor=True)
|
37 |
+
results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
|
38 |
+
return [autocomplete_data[result['corpus_id']] for result in results[0]]
|
39 |
+
|
40 |
+
# Autocomplete function with synonym handling
|
41 |
def autocomplete(query):
|
42 |
if not query.strip():
|
43 |
return [] # Avoid empty queries
|
44 |
|
45 |
+
# Find synonyms dynamically
|
46 |
+
synonyms = find_synonym(query, top_n=3)
|
47 |
+
|
48 |
+
# Perform fuzzy matching with synonyms included
|
49 |
+
matches = process.extract(query, autocomplete_data + synonyms, scorer=fuzz.partial_ratio, limit=5)
|
50 |
|
51 |
+
# Return list of suggestions
|
52 |
return [match[0] for match in matches]
|
53 |
|
54 |
# Gradio interface
|
55 |
with gr.Blocks() as demo:
|
56 |
+
gr.Markdown("### AI-Powered Autocomplete with Synonyms")
|
57 |
|
58 |
query = gr.Textbox(label="Start typing for autocomplete")
|
59 |
autocomplete_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
|