Spaces:
Runtime error
Runtime error
File size: 3,230 Bytes
9cc7e25 22cfb6e 9cc7e25 22cfb6e 9cc7e25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# import packages
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
from sentence_transformers import SentenceTransformer
import chromadb
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import faiss
# Embedding vector
class VectorStore:
def __init__(self, collection_name):
# Initialize the embedding model
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
self.chroma_client = chromadb.Client()
self.collection = self.chroma_client.create_collection(name=collection_name)
# Method to populate the vector store with embeddings from a dataset
def populate_vectors(self, dataset):
# Select the text columns to concatenate
title = dataset['train']['title_cleaned'][:5000] # Limiting to 100 examples for the demo
recipe = dataset['train']['recipe_new'][:5000]
meal_type = dataset['train']['meal_type'][:5000]
allergy = dataset['train']['allergy_type'][:5000]
ingredients_alternative = dataset['train']['ingredients_alternatives'][:5000]
# Concatenate the text from both columns
texts = [f"{tit} {rep} {meal} {alle} {ingr} " for tit, rep, meal,alle, ingr in zip(title,recipe,meal_type,allergy,ingredients_alternative)]
for i, item in enumerate(texts):
embeddings = self.embedding_model.encode(item).tolist()
self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
# # Method to search the ChromaDB collection for relevant context based on a query
def search_context(self, query, n_results=1):
query_embeddings = self.embedding_model.encode(query).tolist()
return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
# importing dataset hosted on huggingface
# dataset details - https://huggingface.co/datasets/Thefoodprocessor/recipe_new_with_features_full
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
# create a vector embedding
vector_store = VectorStore("embedding_vector")
vector_store.populate_vectors(dataset)
# Load the model and tokenizer
# text generation model
model_name = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Define the chatbot response function
def chatbot_response(user_input):
global conversation_history
results = vector_store.search_context(user_input, n_results=1)
context = results['documents'][0] if results['documents'] else ""
conversation_history.append(f"User: {user_input}\nContext: {context[:150]}\nBot:")
inputs = tokenizer("\n".join(conversation_history), return_tensors="pt")
outputs = model.generate(**inputs, max_length=150, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
conversation_history.append(response)
return response
# Gradio interface
def chat(user_input):
response = chatbot_response(user_input)
return response
iface = gr.Interface(fn=chat, inputs="text", outputs="text")
iface.launch()
|