Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,31 +1,31 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from sentence_transformers import SentenceTransformer, util
|
3 |
import pandas as pd
|
4 |
from rapidfuzz import fuzz, process
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Load the model
|
7 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
8 |
model = SentenceTransformer(model_name)
|
9 |
|
10 |
-
# Load CSV data
|
11 |
-
data_file = "Luxury_Products_Apparel_Data.csv" # Ensure this file is uploaded
|
12 |
-
try:
|
13 |
-
df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
|
14 |
-
except FileNotFoundError:
|
15 |
-
df = pd.DataFrame({
|
16 |
-
"ProductName": ["Gucci Sunglasses", "Nike Sneakers", "Louis Vuitton Handbag"],
|
17 |
-
"Category": ["Accessories", "Shoes"],
|
18 |
-
"SubCategory": ["Sunglasses", "Totes"]
|
19 |
-
}) # Fallback sample data
|
20 |
-
|
21 |
# Extract relevant fields
|
22 |
-
|
23 |
categories = df["Category"].dropna().unique().tolist()
|
24 |
-
|
25 |
|
26 |
# Merge into one dataset for autocomplete
|
27 |
-
autocomplete_data =
|
28 |
-
autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
|
29 |
|
30 |
# Encode all items in the dataset into embeddings
|
31 |
autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
|
@@ -34,20 +34,16 @@ autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True
|
|
34 |
def find_synonym(word, top_n=1):
|
35 |
query_embedding = model.encode(word, convert_to_tensor=True)
|
36 |
results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
|
37 |
-
|
38 |
-
if results[0]:
|
39 |
-
return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6] # Only return synonyms with good confidence
|
40 |
-
return [word] # Return original word if no synonym is found
|
41 |
|
42 |
# Function to correct spellings
|
43 |
def correct_spelling(word):
|
44 |
-
matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=
|
45 |
-
|
46 |
if matches:
|
47 |
-
best_match, score, _ = matches[0]
|
48 |
-
if score > 70:
|
49 |
return best_match
|
50 |
-
return word
|
51 |
|
52 |
# Autocomplete function with tracking
|
53 |
def autocomplete(query):
|
@@ -62,23 +58,20 @@ def autocomplete(query):
|
|
62 |
matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
|
63 |
suggestions = "\n".join([match[0] for match in matches])
|
64 |
|
65 |
-
# Detect if spelling correction or synonym replacement occurred
|
66 |
correction_status = f"{original_query} β {corrected_query}" if original_query != corrected_query else "None"
|
67 |
synonym_status = f"{corrected_query} β {synonym_query}" if corrected_query != synonym_query else "None"
|
68 |
|
69 |
-
# Return three separate values for Gradio
|
70 |
return correction_status, synonym_status, suggestions
|
71 |
|
72 |
# Gradio UI
|
73 |
with gr.Blocks() as demo:
|
74 |
-
gr.Markdown("### AI-Powered
|
75 |
|
76 |
query = gr.Textbox(label="Start typing for autocomplete")
|
77 |
correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
|
78 |
synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
|
79 |
suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
|
80 |
|
81 |
-
# Bind function to UI
|
82 |
query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
|
83 |
|
84 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import kagglehub
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
import pandas as pd
|
5 |
from rapidfuzz import fuzz, process
|
6 |
+
import os
|
7 |
+
|
8 |
+
# Download the latest version of the Vestiaire Fashion Dataset
|
9 |
+
dataset_path = kagglehub.dataset_download("justinpakzad/vestiaire-fashion-dataset")
|
10 |
+
|
11 |
+
# Find the CSV file in the downloaded dataset folder
|
12 |
+
csv_file = os.path.join(dataset_path, "vestiaire.csv") # Adjust filename if needed
|
13 |
+
|
14 |
+
# Load CSV
|
15 |
+
df = pd.read_csv(csv_file, nrows=10000) # Load first 10K rows for performance
|
16 |
|
17 |
# Load the model
|
18 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
19 |
model = SentenceTransformer(model_name)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# Extract relevant fields
|
22 |
+
designers = df["Designer"].dropna().unique().tolist()
|
23 |
categories = df["Category"].dropna().unique().tolist()
|
24 |
+
products = df["Product"].dropna().tolist()
|
25 |
|
26 |
# Merge into one dataset for autocomplete
|
27 |
+
autocomplete_data = designers + categories + products
|
28 |
+
autocomplete_data = [str(item).strip('"') for item in autocomplete_data]
|
29 |
|
30 |
# Encode all items in the dataset into embeddings
|
31 |
autocomplete_embeddings = model.encode(autocomplete_data, convert_to_tensor=True)
|
|
|
34 |
def find_synonym(word, top_n=1):
|
35 |
query_embedding = model.encode(word, convert_to_tensor=True)
|
36 |
results = util.semantic_search(query_embedding, autocomplete_embeddings, top_k=top_n)
|
37 |
+
return [autocomplete_data[result['corpus_id']] for result in results[0] if result['score'] > 0.6]
|
|
|
|
|
|
|
38 |
|
39 |
# Function to correct spellings
|
40 |
def correct_spelling(word):
|
41 |
+
matches = process.extract(word, autocomplete_data, scorer=fuzz.partial_ratio, limit=1)
|
|
|
42 |
if matches:
|
43 |
+
best_match, score, _ = matches[0]
|
44 |
+
if score > 70:
|
45 |
return best_match
|
46 |
+
return word
|
47 |
|
48 |
# Autocomplete function with tracking
|
49 |
def autocomplete(query):
|
|
|
58 |
matches = process.extract(synonym_query, autocomplete_data, scorer=fuzz.partial_ratio, limit=5)
|
59 |
suggestions = "\n".join([match[0] for match in matches])
|
60 |
|
|
|
61 |
correction_status = f"{original_query} β {corrected_query}" if original_query != corrected_query else "None"
|
62 |
synonym_status = f"{corrected_query} β {synonym_query}" if corrected_query != synonym_query else "None"
|
63 |
|
|
|
64 |
return correction_status, synonym_status, suggestions
|
65 |
|
66 |
# Gradio UI
|
67 |
with gr.Blocks() as demo:
|
68 |
+
gr.Markdown("### AI-Powered Luxury Fashion Autocomplete (Vestiaire Dataset)")
|
69 |
|
70 |
query = gr.Textbox(label="Start typing for autocomplete")
|
71 |
correction_output = gr.Textbox(label="Spelling Correction Applied", interactive=False)
|
72 |
synonym_output = gr.Textbox(label="Synonym Applied", interactive=False)
|
73 |
suggestions_output = gr.Textbox(label="Autocomplete Suggestions", lines=5, interactive=False)
|
74 |
|
|
|
75 |
query.change(fn=autocomplete, inputs=query, outputs=[correction_output, synonym_output, suggestions_output])
|
76 |
|
77 |
demo.launch()
|