MOHAMMED-N commited on
Commit
cf69165
·
verified ·
1 Parent(s): c39cadf

Create chain_setup.py

Browse files
Files changed (1) hide show
  1. chain_setup.py +51 -0
chain_setup.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import hf_hub_download
3
+ from langchain.llms import LlamaCpp
4
+ from langchain.chains import ConversationalRetrievalChain
5
+ from langchain.memory import ConversationBufferMemory
6
+
7
+ def load_llm():
8
+ """
9
+ Downloads the Q4_K_M GGUF model from mobeidat's Hugging Face repository and loads it via llama-cpp.
10
+ """
11
+ # 1) Download the GGUF model from Hugging Face
12
+ model_file = hf_hub_download(
13
+ repo_id="mobeidat/c4ai-command-r7b-arabic-02-2025-Q4_K_M-GGUF",
14
+ filename="c4ai-command-r7b-arabic-02-2025-q4_k_m.gguf",
15
+ local_dir="./models",
16
+ local_dir_use_symlinks=False
17
+ )
18
+
19
+ # 2) Load the model with llama-cpp via LangChain’s LlamaCpp
20
+ llm = LlamaCpp(
21
+ model_path=model_file,
22
+ flash_attn=False,
23
+ n_ctx=2048, # or 4096
24
+ n_batch=512, # or even 256
25
+ chat_format='chatml'
26
+ )
27
+
28
+
29
+ return llm
30
+
31
+ def build_conversational_chain(vectorstore):
32
+ """
33
+ Creates a ConversationalRetrievalChain using the local llama-cpp-based LLM
34
+ and a ConversationBufferMemory for multi-turn Q&A.
35
+ """
36
+ llm = load_llm()
37
+
38
+ # We'll store chat history in memory so the chain can handle multi-turn conversations
39
+ memory = ConversationBufferMemory(
40
+ memory_key="chat_history",
41
+ return_messages=True
42
+ )
43
+
44
+ qa_chain = ConversationalRetrievalChain.from_llm(
45
+ llm=llm,
46
+ retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
47
+ memory=memory,
48
+ verbose=True
49
+ )
50
+
51
+ return qa_chain