import time import traceback import gradio as gr import os import asyncio from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore from haystack import Pipeline, Document from haystack.components.generators import OpenAIGenerator from haystack.components.builders.prompt_builder import PromptBuilder from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder from haystack.document_stores.types import DuplicatePolicy from haystack.components.writers import DocumentWriter import requests import json try: documents = [ Document(content="In the vibrant streets of Paris, Jean, a struggling painter, discovers an ancient secret hidden within his family’s artwork. As he unravels the mystery, he must navigate the treacherous art world, facing rivals and allies alike, on a journey that could redefine his destiny and restore a lost masterpiece."), Document(content="Mark, a visionary software developer in Berlin, stumbles upon a groundbreaking algorithm that could change the tech world forever. But as he delves deeper, he finds himself entangled in a web of corporate espionage, challenging his ethics and risking everything he has worked for."), Document(content="Giorgio, a dedicated archaeologist in Rome, uncovers an artifact that challenges the foundation of Roman history. His quest for truth leads him through ancient ruins and modern intrigue, as he battles to protect his discovery from those who wish to suppress it."), Document(content="Lily, an ambitious journalist in New York, receives a mysterious diary that leads her on an investigation into a century-old mystery involving a forbidden romance and a family's hidden secrets."), Document(content="Elena, a young physicist in Geneva, discovers a paradox within the laws of quantum mechanics that could alter our understanding of the universe. Her journey to prove her theory brings her face to face with dark matter and the very fabric of space and time."), Document(content="Alex, a disillusioned detective in London, finds himself on the trail of a serial killer who leaves cryptic clues rooted in historical events. As he pieces together the puzzle, he uncovers a conspiracy that could shake the foundations of the British monarchy."), Document(content="Nora, a marine biologist in the Great Barrier Reef, discovers a new species of coral with extraordinary regenerative properties. Her fight to protect her discovery from exploitation leads her into a conflict with powerful interests determined to harness the coral's powers for themselves."), Document(content="Simon, an expert linguist in Cairo, deciphers an ancient scroll that reveals the location of a hidden chamber beneath the Sphinx. His search for the chamber propels him into a world of mystery and danger, as he confronts a secret society with its own agenda."), Document(content="Amelia, a renowned chef in Tokyo, embarks on a quest to rediscover a lost recipe that was once the hallmark of her family's restaurant. Her journey takes her across Japan, delving into the country's culinary history and challenging her beliefs about cooking and tradition."), Document(content="Victor, a retired astronaut in Houston, is contacted by an alien intelligence through a mysterious transmission. As he seeks to uncover the truth, he is drawn into an interstellar adventure that reveals the universe's greatest mysteries and humanity's place among the stars.") ] # Initializing a document store in MongoDB Atlas to store the narrative documents. document_store = MongoDBAtlasDocumentStore( database_name="sample_mflix", # Specifies the database name. collection_name="haystack_embedded_movies", # Specifies the collection name where documents will be stored. vector_search_index="default", # The search index for vectorized content. ) # Setting up a document writer to handle the insertion of documents into the MongoDB collection. doc_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP) # Initializing a document embedder to convert text content into vectorized form. doc_embedder = OpenAIDocumentEmbedder() # Creating a pipeline for indexing documents. The pipeline includes embedding and writing documents. indexing_pipe = Pipeline() indexing_pipe.add_component(instance=doc_embedder, name="doc_embedder") indexing_pipe.add_component(instance=doc_writer, name="doc_writer") # Connecting the components of the pipeline for document flow. indexing_pipe.connect("doc_embedder.documents", "doc_writer.documents") # Running the pipeline with the list of documents to index them in MongoDB. indexing_pipe.run({"doc_embedder": {"documents": documents}}) # Template for generating prompts for a movie recommendation engine. prompt_template = """ You are a movie recommendation engine use the following context documents.\nDocuments: {% for doc in documents %} {{ doc.content }} {% endfor %} \Query: {{query}} \nAnswer: """ # Setting up a retrieval-augmented generation (RAG) pipeline for generating responses. rag_pipeline = Pipeline() rag_pipeline.add_component("text_embedder", OpenAITextEmbedder()) # Adding a component for retrieving related documents from MongoDB based on the query embedding. rag_pipeline.add_component(instance=MongoDBAtlasEmbeddingRetriever(document_store=document_store,top_k=15), name="retriever") # Building prompts based on retrieved documents to be used for generating responses. rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder") # Adding a language model generator to produce the final text output. rag_pipeline.add_component(instance=OpenAIGenerator(), name="llm") # Connecting the components of the RAG pipeline to ensure proper data flow. rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") rag_pipeline.connect("retriever", "prompt_builder.documents") rag_pipeline.connect("prompt_builder", "llm") # Exception handling to catch and display errors during the pipeline execution. except Exception as e: print("An error occurred: \n" + error_message) def get_movies(message, history): try: result = rag_pipeline.run( { "text_embedder": {"text": message}, "prompt_builder": {"query": message}, }); # print(result) print_llm_text = result['llm']['replies'][0] for i in range(len(print_llm_text)): time.sleep(0.03) yield print_llm_text[: i+1] except Exception as e: error_message = traceback.format_exc() print("An error occurred: \n" + error_message) yield "Please clone the repo and add your open ai key as well as your MongoDB Atlas URI in the Secret Section of you Space\n OPENAI_API_KEY (your Open AI key) and MONGODB_ATLAS_CLUSTER_URI (0.0.0.0/0 whitelisted instance with Vector index created) \n\n For more information : https://mongodb.com/products/platform/atlas-vector-search" # Convert documents to a format suitable for Gradio Dataframe data_for_dataframe = [[doc.content] for doc in documents] headers = ["Plot"] def update_movie_data(new_data): """ Converts the updated data from the Dataframe back into Document objects. """ print(new_data) new_documents = [] # Iterate over the DataFrame rows as (index, Series) pairs for index, row in new_data.iterrows(): plot_content = row['Plot'] # Access the 'Plot' column print(plot_content) new_documents.append(Document(content=plot_content)) indexing_pipe.run({"doc_embedder": {"documents": new_documents}}) return new_data def fetch_url_data(url): try: response = requests.get(url) response.raise_for_status() # Raises an HTTPError if the HTTP request returned an unsuccessful status code return response.text except requests.RequestException as e: return f"Error: {e}" # Setup Gradio interface with gr.Blocks() as demo: with gr.Tab("Demo"): gr.Markdown("## Movie Plot Viewer") movie_table = gr.Dataframe(value=data_for_dataframe, headers=headers, interactive=False) submit_button = gr.Button("Update Data") ## value=[(None, "Hi, I'm a MongoDB and Heystack based question and answer bot 🤖, I can help you answer on the knowledge base above…")] gr.ChatInterface(get_movies,examples=["What characters are from Rome?", "Combine 3 plots of your choice", "List all characters"], title="Atlas Vector Search Chat",description="This small chat uses a similarity search to find relevant plots as listed above, it uses MongoDB Atlas and Haystack integaration: https://haystack.deepset.ai/integrations/mongodb",submit_btn="Search").queue() submit_button.click(fn=update_movie_data, inputs=[movie_table], outputs=[movie_table]) with gr.Tab("Code"): gr.Code(label="Code", language="python", value=fetch_url_data('https://huggingface.co/spaces/MongoDB/Haystack-MongoDB-Integration-Chat/raw/main/app.py')) if __name__ == "__main__": demo.launch()