DINGOLANI commited on
Commit
bfb7b53
·
verified ·
1 Parent(s): 8a7677b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -11
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from sentence_transformers import SentenceTransformer
3
  import pandas as pd
4
  from rapidfuzz import fuzz, process
5
 
@@ -13,9 +13,9 @@ try:
13
  df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
14
  except FileNotFoundError:
15
  df = pd.DataFrame({
16
- "ProductName": ["Gucci Shoes", "Nike Sneakers", "Louis Vuitton Handbag"],
17
- "Category": ["Shoes", "Bags"],
18
- "SubCategory": ["Sneakers", "Totes"]
19
  }) # Fallback sample data
20
 
21
  # Extract relevant fields
@@ -25,24 +25,35 @@ subcategories = df["SubCategory"].dropna().unique().tolist()
25
 
26
  # Merge into one dataset for autocomplete
27
  autocomplete_data = product_names + categories + subcategories
 
28
 
29
- # Clean data by removing unnecessary characters
30
- autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
31
 
32
- # Autocomplete function
 
 
 
 
 
 
 
33
  def autocomplete(query):
34
  if not query.strip():
35
  return [] # Avoid empty queries
36
 
37
- # Fuzzy matching with typo tolerance
38
- matches = process.extract(query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
 
 
 
39
 
40
- # Return list of suggestions (Gradio will display them in separate lines)
41
  return [match[0] for match in matches]
42
 
43
  # Gradio interface
44
  with gr.Blocks() as demo:
45
- gr.Markdown("### Improved Autocomplete for Luxury Products")
46
 
47
  query = gr.Textbox(label="Start typing for autocomplete")
48
  autocomplete_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
 
1
  import gradio as gr
2
+ from sentence_transformers import SentenceTransformer, util
3
  import pandas as pd
4
  from rapidfuzz import fuzz, process
5
 
 
13
  df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
14
  except FileNotFoundError:
15
  df = pd.DataFrame({
16
+ "ProductName": ["Gucci Sunglasses", "Nike Sneakers", "Louis Vuitton Handbag"],
17
+ "Category": ["Accessories", "Shoes"],
18
+ "SubCategory": ["Sunglasses", "Totes"]
19
  }) # Fallback sample data
20
 
21
  # Extract relevant fields
 
25
 
26
  # Merge into one dataset for autocomplete
27
  autocomplete_data = product_names + categories + subcategories
28
+ autocomplete_data = [str(item).strip('"') for item in autocomplete_data] # Clean text
29
 
30
+ # Encode all items in the dataset into embeddings
31
+ autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
32
 
33
+ # Synonym Expansion Function
34
+ def find_synonym(word, top_n=1):
35
+ """Finds the most similar word in the dataset based on embeddings."""
36
+ query_embedding = model.encode(word, convert_to_tensor=True)
37
+ results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
38
+ return [autocomplete_data[result['corpus_id']] for result in results[0]]
39
+
40
+ # Autocomplete function with synonym handling
41
  def autocomplete(query):
42
  if not query.strip():
43
  return [] # Avoid empty queries
44
 
45
+ # Find synonyms dynamically
46
+ synonyms = find_synonym(query, top_n=3)
47
+
48
+ # Perform fuzzy matching with synonyms included
49
+ matches = process.extract(query, autocomplete_data + synonyms, scorer=fuzz.partial_ratio, limit=5)
50
 
51
+ # Return list of suggestions
52
  return [match[0] for match in matches]
53
 
54
  # Gradio interface
55
  with gr.Blocks() as demo:
56
+ gr.Markdown("### AI-Powered Autocomplete with Synonyms")
57
 
58
  query = gr.Textbox(label="Start typing for autocomplete")
59
  autocomplete_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)