Spaces:

MOHAMMED-N
/

Chat_with_NDMO

Running

App Files Files Community

MOHAMMED-N commited on 5 days ago

Commit

cf69165

verified ·

1 Parent(s): c39cadf

Create chain_setup.py

Browse files

Files changed (1) hide show

chain_setup.py +51 -0

chain_setup.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+from huggingface_hub import hf_hub_download
+from langchain.llms import LlamaCpp
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+def load_llm():
+   """
+    Downloads the Q4_K_M GGUF model from mobeidat's Hugging Face repository and loads it via llama-cpp.
+    """
+    # 1) Download the GGUF model from Hugging Face
+    model_file = hf_hub_download(
+        repo_id="mobeidat/c4ai-command-r7b-arabic-02-2025-Q4_K_M-GGUF",
+        filename="c4ai-command-r7b-arabic-02-2025-q4_k_m.gguf",
+        local_dir="./models",
+        local_dir_use_symlinks=False
+    )
+    # 2) Load the model with llama-cpp via LangChain’s LlamaCpp
+    llm = LlamaCpp(
+    model_path=model_file,
+    flash_attn=False,
+    n_ctx=2048,  # or 4096
+    n_batch=512, # or even 256
+    chat_format='chatml'
+)
+    return llm
+def build_conversational_chain(vectorstore):
+    """
+    Creates a ConversationalRetrievalChain using the local llama-cpp-based LLM
+    and a ConversationBufferMemory for multi-turn Q&A.
+    """
+    llm = load_llm()
+    # We'll store chat history in memory so the chain can handle multi-turn conversations
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        return_messages=True
+    )
+    qa_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
+        memory=memory,
+        verbose=True
+    )
+    return qa_chain