Spaces:

Abs6187
/

AI_Chatbot

Runtime error

App Files Files Community

Abs6187 commited on 27 days ago

Commit

cf664b9

•

1 Parent(s): ffa232c

Update Ingest.py

Browse files

Files changed (1) hide show

Ingest.py +13 -61

Ingest.py CHANGED Viewed

@@ -1,61 +1,13 @@
-import ray
-import logging
-from langchain_community.document_loaders import DirectoryLoader
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import FAISS
-from faiss import IndexFlatL2  # Assuming using L2 distance for simplicity
-# Initialize Ray
-ray.init()
-# Set up basic configuration for logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Load documents with logging
-logging.info("Loading documents...")
-loader = DirectoryLoader('data', glob="./*.txt")
-documents = loader.load()
-# Extract text from documents and split into manageable texts with logging
-logging.info("Extracting and splitting texts from documents...")
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
-texts = []
-for document in documents:
-    if hasattr(document, 'get_text'):
-        text_content = document.get_text()  # Adjust according to actual method
-    else:
-        text_content = ""  # Default to empty string if no text method is available
-    texts.extend(text_splitter.split_text(text_content))
-# Define embedding function
-def embedding_function(text):
-    embeddings_model = HuggingFaceEmbeddings(model_name="law-ai/InLegalBERT")
-    return embeddings_model.embed_query(text)
-# Create FAISS index for embeddings
-index = IndexFlatL2(768)  # Dimension of embeddings, adjust as needed
-# Assuming docstore as a simple dictionary to store document texts
-docstore = {i: text for i, text in enumerate(texts)}
-index_to_docstore_id = {i: i for i in range(len(texts))}
-# Initialize FAISS
-faiss_db = FAISS(embedding_function, index, docstore, index_to_docstore_id)
-# Process and store embeddings
-logging.info("Storing embeddings in FAISS...")
-for i, text in enumerate(texts):
-    embedding = embedding_function(text)
-    faiss_db.add_documents([embedding])
-# Exporting the vector embeddings database with logging
-logging.info("Exporting the vector embeddings database...")
-faiss_db.save_local("ipc_embed_db")
-# Log a message to indicate the completion of the process
-logging.info("Process completed successfully.")
-# Shutdown Ray after the process
-ray.shutdown()

+from transformers import AutoTokenizer, AutoModelForCausalLM
+import os
+def load_model():
+    """Loads tokenizer and model from local files."""
+    model_dir = "./"  # Ensure all model files are in the root directory
+    tokenizer = AutoTokenizer.from_pretrained(model_dir, config="config.json")
+    model = AutoModelForCausalLM.from_pretrained(model_dir)
+    return tokenizer, model
+if __name__ == "__main__":
+    tokenizer, model = load_model()
+    print("Model and tokenizer loaded successfully.")