Spaces:

bstraehle
/

rag

Running

App Files Files Community

bstraehle commited on Jan 6, 2024

Commit

0ddb69a

1 Parent(s): 4873e9b

Update rag_llamaindex.py

Browse files

Files changed (1) hide show

rag_llamaindex.py +51 -50

rag_llamaindex.py CHANGED Viewed

@@ -22,69 +22,70 @@ MONGODB_INDEX_NAME        = "default"
 logging.basicConfig(stream = sys.stdout, level = logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler(stream = sys.stdout))
-def load_documents():
-    docs = []
-    # PDF
-    PDFReader = download_loader("PDFReader")
-    loader = PDFReader()
-    out_dir = Path("data")
-    if not out_dir.exists():
-        os.makedirs(out_dir)
-    out_path = out_dir / "gpt-4.pdf"
-    if not out_path.exists():
-        r = requests.get(PDF_URL)
-        with open(out_path, "wb") as f:
-            f.write(r.content)
-    docs.extend(loader.load_data(file = Path(out_path)))
-    #print("docs = " + str(len(docs)))
-    # Web
-    SimpleWebPageReader = download_loader("SimpleWebPageReader")
-    loader = SimpleWebPageReader()
-    docs.extend(loader.load_data(urls = [WEB_URL]))
-    #print("docs = " + str(len(docs)))
-    # YouTube
-    loader = YoutubeTranscriptReader()
-    docs.extend(loader.load_data(ytlinks = [YOUTUBE_URL_1,
-                                            YOUTUBE_URL_2]))
-    #print("docs = " + str(len(docs)))
-    return docs
-def store_documents(config, docs):
-    storage_context = StorageContext.from_defaults(
-        vector_store = get_vector_store())
-    VectorStoreIndex.from_documents(
-        docs,
-        storage_context = storage_context
-    )
-def get_vector_store():
-    return MongoDBAtlasVectorSearch(
-        MongoClient(MONGODB_ATLAS_CLUSTER_URI),
-        db_name = MONGODB_DB_NAME,
-        collection_name = MONGODB_COLLECTION_NAME,
-        index_name = MONGODB_INDEX_NAME
-    )
-def rag_ingestion_llamaindex(config):
-    docs = load_documents()
-    store_documents(config, docs)
-def rag_retrieval(config, prompt):
-    index = VectorStoreIndex.from_vector_store(
-        vector_store = get_vector_store())
-    query_engine = index.as_query_engine(
-        similarity_top_k = config["k"]
-    )
-    return query_engine.query(prompt)

 logging.basicConfig(stream = sys.stdout, level = logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler(stream = sys.stdout))
+Class LlamaIndexRAG:
+    def load_documents():
+        docs = []
+        # PDF
+        PDFReader = download_loader("PDFReader")
+        loader = PDFReader()
+        out_dir = Path("data")
+        if not out_dir.exists():
+            os.makedirs(out_dir)
+        out_path = out_dir / "gpt-4.pdf"
+        if not out_path.exists():
+            r = requests.get(PDF_URL)
+            with open(out_path, "wb") as f:
+                f.write(r.content)
+        docs.extend(loader.load_data(file = Path(out_path)))
+        #print("docs = " + str(len(docs)))
+        # Web
+        SimpleWebPageReader = download_loader("SimpleWebPageReader")
+        loader = SimpleWebPageReader()
+        docs.extend(loader.load_data(urls = [WEB_URL]))
+        #print("docs = " + str(len(docs)))
+        # YouTube
+        loader = YoutubeTranscriptReader()
+        docs.extend(loader.load_data(ytlinks = [YOUTUBE_URL_1,
+                                                YOUTUBE_URL_2]))
+        #print("docs = " + str(len(docs)))
+        return docs
+    def store_documents(config, docs):
+        storage_context = StorageContext.from_defaults(
+            vector_store = get_vector_store())
+        VectorStoreIndex.from_documents(
+            docs,
+            storage_context = storage_context
+        )
+    def get_vector_store():
+        return MongoDBAtlasVectorSearch(
+            MongoClient(MONGODB_ATLAS_CLUSTER_URI),
+            db_name = MONGODB_DB_NAME,
+            collection_name = MONGODB_COLLECTION_NAME,
+            index_name = MONGODB_INDEX_NAME
+        )
+    def rag_ingestion_llamaindex(config):
+        docs = load_documents()
+        store_documents(config, docs)
+    def rag_retrieval(config, prompt):
+        index = VectorStoreIndex.from_vector_store(
+            vector_store = get_vector_store())
+        query_engine = index.as_query_engine(
+            similarity_top_k = config["k"]
+        )
+        return query_engine.query(prompt)