Mishal23's picture
Update app.py
88e35ba verified
# app.py
# Install necessary libraries (Use only if these aren't already installed)
# !pip install pandas sentence-transformers transformers datasets faiss-cpu gradio
# Import libraries
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
from datasets import load_dataset
import gradio as gr
# Load the Dataset from Hugging Face and FAQ CSV
support_data = load_dataset("rjac/e-commerce-customer-support-qa")
# Load FAQ data from a local CSV file directly
# Ensure this file is in the same directory or provide the full file path
faq_data = pd.read_csv("Ecommerce_FAQs.csv")
# Preprocess and Clean Data
faq_data.rename(columns={'prompt': 'Question', 'response': 'Answer'}, inplace=True)
faq_data = faq_data[['Question', 'Answer']]
support_data_df = pd.DataFrame(support_data['train'])
# Extract question-answer pairs from the conversation field
def extract_conversation(data):
try:
parts = data.split("\n\n")
question = parts[1].split(": ", 1)[1] if len(parts) > 1 else ""
answer = parts[2].split(": ", 1)[1] if len(parts) > 2 else ""
return pd.Series({"Question": question, "Answer": answer})
except IndexError:
return pd.Series({"Question": "", "Answer": ""})
# Apply extraction function
support_data_df[['Question', 'Answer']] = support_data_df['conversation'].apply(extract_conversation)
# Combine FAQ data with support data
combined_data = pd.concat([faq_data, support_data_df[['Question', 'Answer']]], ignore_index=True)
# Initialize SBERT Model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
# Generate and Index Embeddings for Combined Data
questions = combined_data['Question'].tolist()
embeddings = model.encode(questions, convert_to_tensor=True)
# Create FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings.cpu().numpy())
# Define Retrieval Function
def retrieve_answer(question):
question_embedding = model.encode([question], convert_to_tensor=True)
question_embedding_np = question_embedding.cpu().numpy()
_, closest_index = index.search(question_embedding_np, k=1)
best_match_idx = closest_index[0][0]
answer = combined_data.iloc[best_match_idx]['Answer']
return answer
# Gradio Interface
def chatbot_interface(user_input):
response = retrieve_answer(user_input)
return f"Bot: {response}"
# Set up Gradio Chat Interface
iface = gr.Interface(
fn=chatbot_interface,
inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
outputs="text",
title="E-commerce Support Chatbot",
description="Ask questions about order tracking, returns, account help, and more!"
)
# Launch the Interface
iface.launch(share=True)