import re import numpy as np import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from sentence_transformers import SentenceTransformer nltk.download('stopwords') nltk.download('punkt') nltk.download('wordnet') stop_words = set(stopwords.words('english')) lemmatizer = WordNetLemmatizer() model = SentenceTransformer('all-mpnet-base-v2') clip_model = SentenceTransformer('clip-ViT-B-32') def clean_text(text): # Lowercase text = text.lower() # Remove special characters and digits text = re.sub(r'[^a-z\s]', '', text) # Tokenize words = word_tokenize(text) # Remove stopwords and lemmatize words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words] # Join words back to a single string cleaned_text = ' '.join(words) return cleaned_text def extract_order_id_from_query(text): match = re.search(r'\bB-\d+\b', text) if match: return match.group(0) return None def generate_text_embedding(text): try: text_embedding = clip_model.encode(text, convert_to_tensor=True) return text_embedding.cpu().numpy() except Exception as e: print(f"Error processing text '{text}': {e}") return np.zeros((512,)) def generate_image_embedding(image_path): try: image = Image.open(image_path) image = image.convert('RGB') image_embedding = clip_model.encode(image, convert_to_tensor=True) return image_embedding.cpu().numpy() # Convert to numpy array for easy storage except Exception as e: print(f"Error processing image from {image_path}: {e}") return np.zeros((512,)) def clear_chat(): return [] def undo_last_message(chatbot): if chatbot: chatbot.pop() return chatbot