import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import pprint import os import ast import gradio as gr from gradio.themes.base import Base import weaviate from weaviate.embedded import EmbeddedOptions from langchain_community.vectorstores import Weaviate from langchain.prompts import ChatPromptTemplate from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.schema import Document from langchain_community.chat_models import ChatOpenAI from langchain.schema.runnable import RunnablePassthrough from langchain.schema.output_parser import StrOutputParser from langchain_core.messages import HumanMessage, SystemMessage os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") df = pd.read_csv('./RAW_recipes.csv') # Variables max_length = 231637 #total number of recipes aka rows curr_len = 10000 # how much we want to process and embed #Concatenate all rows into one string curr_i = 0 recipe_info = [] for index, row in df.iterrows(): if curr_i >= curr_len: break curr_i+=1 name, id, minutes, contributor_id, submitted, tags, nutrition, n_steps, steps, description, ingredients, n_ingredients = row #convert to list nutrition = ast.literal_eval(nutrition) steps = ast.literal_eval(steps) #format nutrition nutrition_map = ["Calorie"," Total Fat", 'Sugar', 'Sodium', 'Protein', 'Saturated Fat', 'Total Carbohydrate'] nutrition_labeled = [] for label, num in zip(nutrition_map, nutrition): if label == "Calorie": nutrition_labeled.append(f"{label} : {num} per serving") else: nutrition_labeled.append(f"{label} : {num} % daily value") #format steps for i in range(len(steps)): steps[i] = f"{i+1}. " + steps[i] recipe_info.append(f''' {name} : {minutes} minutes, submitted on {submitted} description: {description}, ingredients: {ingredients} number of ingredients: {n_ingredients} tags: {tags}, nutrition: {nutrition_labeled}, total steps: {n_steps} steps: {steps} '''.replace("\r", "").replace("\n", "")) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) #split into recipe_info into chunks docs = [] for doc in recipe_info: # Wrap each string in a Document object document = Document(page_content=doc) # create a Document object with the content chunk = text_splitter.split_documents([document]) # Pass a list of Document objects docs.append(chunk) # merge all chunks into one merged_documents = [] for doc in docs: merged_documents.extend(doc) # Hugging Face model for embeddings. model_name = "sentence-transformers/all-MiniLM-L6-v2" model_kwargs = {'device': 'cpu'} embeddings = HuggingFaceEmbeddings( model_name=model_name, model_kwargs=model_kwargs, ) #initialize weaviate client client = weaviate.Client( embedded_options = EmbeddedOptions() ) vector_search = Weaviate.from_documents( client = client, documents = merged_documents, embedding = embeddings, by_text = False ) # Instantiate Weaviate Vector Search as a retriever # Basic RAG. # k to search for only the 25 most relevant documents. # score_threshold to use only documents with a relevance score above 0.77. k = 10 score_threshold = 0.77 retriever = vector_search.as_retriever( search_type = "mmr", search_kwargs = { "k": k, "score_threshold": score_threshold } ) template = """ You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question at the end. The following pieces of retrieved context are recipes. If you don't know the answer, just say that you don't know. Don't try to make up an answer. Dont say anthing mean or offensive. Context: {context} Question: {question} """ custom_rag_prompt = ChatPromptTemplate.from_template(template) llm = ChatOpenAI( model_name="gpt-3.5-turbo", temperature=0.2) # Regular chain format: chain = prompt | model | output_parser rag_chain = ( {"context": retriever, "question": RunnablePassthrough()} | custom_rag_prompt | llm | StrOutputParser() ) def get_response(query): return rag_chain.invoke(query) with gr.Blocks(theme=Base(), title="RAG Recipe AI") as demo: gr.Markdown(""" # RAG Recipe AI This model will answer all your recipe-related questions. Enter a question about a recipe, and the system will return an answer based on 10,000 food.com recipes stored in the vector database. \n Features Considered: \n \t - Cook Time \t - Nutrition Information \t - Steps \t - Ingredients \t - Dish Description Sample Queries: \n \t - What is an easy dessert I can make with apples? \t - What is the nutritional information of a Caesar salad? \t - How many calories is in an average American burger? """) textbox = gr.Textbox(label="Question:") with gr.Row(): button = gr.Button("Submit", variant="primary") with gr.Column(): output1 = gr.Textbox(lines=1, max_lines=10, label="Answer:") # Call get_response function upon clicking the Submit button. button.click(get_response, textbox, outputs=[output1]) demo.launch()