# app.py # Install necessary libraries (Use only if these aren't already installed) # !pip install pandas sentence-transformers transformers datasets faiss-cpu gradio # Import libraries import pandas as pd from sentence_transformers import SentenceTransformer import faiss from datasets import load_dataset import gradio as gr # Load the Dataset from Hugging Face and FAQ CSV support_data = load_dataset("rjac/e-commerce-customer-support-qa") # Load FAQ data from a local CSV file directly # Ensure this file is in the same directory or provide the full file path faq_data = pd.read_csv("Ecommerce_FAQs.csv") # Preprocess and Clean Data faq_data.rename(columns={'prompt': 'Question', 'response': 'Answer'}, inplace=True) faq_data = faq_data[['Question', 'Answer']] support_data_df = pd.DataFrame(support_data['train']) # Extract question-answer pairs from the conversation field def extract_conversation(data): try: parts = data.split("\n\n") question = parts[1].split(": ", 1)[1] if len(parts) > 1 else "" answer = parts[2].split(": ", 1)[1] if len(parts) > 2 else "" return pd.Series({"Question": question, "Answer": answer}) except IndexError: return pd.Series({"Question": "", "Answer": ""}) # Apply extraction function support_data_df[['Question', 'Answer']] = support_data_df['conversation'].apply(extract_conversation) # Combine FAQ data with support data combined_data = pd.concat([faq_data, support_data_df[['Question', 'Answer']]], ignore_index=True) # Initialize SBERT Model model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') # Generate and Index Embeddings for Combined Data questions = combined_data['Question'].tolist() embeddings = model.encode(questions, convert_to_tensor=True) # Create FAISS index index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings.cpu().numpy()) # Define Retrieval Function def retrieve_answer(question): question_embedding = model.encode([question], convert_to_tensor=True) question_embedding_np = question_embedding.cpu().numpy() _, closest_index = index.search(question_embedding_np, k=1) best_match_idx = closest_index[0][0] answer = combined_data.iloc[best_match_idx]['Answer'] return answer # Gradio Interface def chatbot_interface(user_input): response = retrieve_answer(user_input) return f"Bot: {response}" # Set up Gradio Chat Interface iface = gr.Interface( fn=chatbot_interface, inputs=gr.Textbox(lines=2, placeholder="Type your question here..."), outputs="text", title="E-commerce Support Chatbot", description="Ask questions about order tracking, returns, account help, and more!" ) # Launch the Interface iface.launch(share=True)