DINGOLANI commited on
Commit
50bd810
Β·
verified Β·
1 Parent(s): 7efa999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -28
app.py CHANGED
@@ -1,31 +1,31 @@
1
  import gradio as gr
 
2
  from sentence_transformers import SentenceTransformer, util
3
  import pandas as pd
4
  from rapidfuzz import fuzz, process
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Load the model
7
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
8
  model = SentenceTransformer(model_name)
9
 
10
- # Load CSV data
11
- data_file = "Luxury_Products_Apparel_Data.csv" # Ensure this file is uploaded
12
- try:
13
- df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
14
- except FileNotFoundError:
15
- df = pd.DataFrame({
16
- "ProductName": ["Gucci Sunglasses", "Nike Sneakers", "Louis Vuitton Handbag"],
17
- "Category": ["Accessories", "Shoes"],
18
- "SubCategory": ["Sunglasses", "Totes"]
19
- }) # Fallback sample data
20
-
21
  # Extract relevant fields
22
- product_names = df["ProductName"].dropna().tolist()
23
  categories = df["Category"].dropna().unique().tolist()
24
- subcategories = df["SubCategory"].dropna().unique().tolist()
25
 
26
  # Merge into one dataset for autocomplete
27
- autocomplete_data = product_names + categories + subcategories
28
- autocomplete_data = [str(item).strip('"') for item in autocomplete_data] # Clean text
29
 
30
  # Encode all items in the dataset into embeddings
31
  autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
@@ -34,20 +34,16 @@ autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True
34
  def find_synonym(word, top_n=1):
35
  query_embedding = model.encode(word, convert_to_tensor=True)
36
  results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
37
-
38
- if results[0]:
39
- return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6] # Only return synonyms with good confidence
40
- return [word] # Return original word if no synonym is found
41
 
42
  # Function to correct spellings
43
  def correct_spelling(word):
44
- matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=3)
45
-
46
  if matches:
47
- best_match, score, _ = matches[0] # Extract the best match string & score
48
- if score > 70: # Lowered threshold to allow for more typo tolerance
49
  return best_match
50
- return word # Return the original word if no good match is found
51
 
52
  # Autocomplete function with tracking
53
  def autocomplete(query):
@@ -62,23 +58,20 @@ def autocomplete(query):
62
  matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
63
  suggestions = "\n".join([match[0] for match in matches])
64
 
65
- # Detect if spelling correction or synonym replacement occurred
66
  correction_status = f"{original_query} β†’ {corrected_query}" if original_query != corrected_query else "None"
67
  synonym_status = f"{corrected_query} β†’ {synonym_query}" if corrected_query != synonym_query else "None"
68
 
69
- # Return three separate values for Gradio
70
  return correction_status, synonym_status, suggestions
71
 
72
  # Gradio UI
73
  with gr.Blocks() as demo:
74
- gr.Markdown("### AI-Powered Autocomplete with Spell Correction & Synonyms")
75
 
76
  query = gr.Textbox(label="Start typing for autocomplete")
77
  correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
78
  synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
79
  suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
80
 
81
- # Bind function to UI
82
  query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
83
 
84
  demo.launch()
 
1
  import gradio as gr
2
+ import kagglehub
3
  from sentence_transformers import SentenceTransformer, util
4
  import pandas as pd
5
  from rapidfuzz import fuzz, process
6
+ import os
7
+
8
+ # Download the latest version of the Vestiaire Fashion Dataset
9
+ dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
10
+
11
+ # Find the CSV file in the downloaded dataset folder
12
+ csv_file = os.path.join(dataset_path, "vestiaire.csv") # Adjust filename if needed
13
+
14
+ # Load CSV
15
+ df = pd.read_csv(csv_file, nrows=10000) # Load first 10K rows for performance
16
 
17
  # Load the model
18
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
19
  model = SentenceTransformer(model_name)
20
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Extract relevant fields
22
+ designers = df["Designer"].dropna().unique().tolist()
23
  categories = df["Category"].dropna().unique().tolist()
24
+ products = df["Product"].dropna().tolist()
25
 
26
  # Merge into one dataset for autocomplete
27
+ autocomplete_data = designers + categories + products
28
+ autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
29
 
30
  # Encode all items in the dataset into embeddings
31
  autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
 
34
  def find_synonym(word, top_n=1):
35
  query_embedding = model.encode(word, convert_to_tensor=True)
36
  results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
37
+ return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
 
 
 
38
 
39
  # Function to correct spellings
40
  def correct_spelling(word):
41
+ matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=1)
 
42
  if matches:
43
+ best_match, score, _ = matches[0]
44
+ if score > 70:
45
  return best_match
46
+ return word
47
 
48
  # Autocomplete function with tracking
49
  def autocomplete(query):
 
58
  matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
59
  suggestions = "\n".join([match[0] for match in matches])
60
 
 
61
  correction_status = f"{original_query} β†’ {corrected_query}" if original_query != corrected_query else "None"
62
  synonym_status = f"{corrected_query} β†’ {synonym_query}" if corrected_query != synonym_query else "None"
63
 
 
64
  return correction_status, synonym_status, suggestions
65
 
66
  # Gradio UI
67
  with gr.Blocks() as demo:
68
+ gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Vestiaire Dataset)")
69
 
70
  query = gr.Textbox(label="Start typing for autocomplete")
71
  correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
72
  synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
73
  suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
74
 
 
75
  query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
76
 
77
  demo.launch()