MOHAMMED-N commited on
Commit
c39cadf
·
verified ·
1 Parent(s): 4f4f858

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -99
app.py CHANGED
@@ -1,114 +1,58 @@
 
 
1
  import streamlit as st
2
  import os
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
-
5
- # --- LANGCHAIN IMPORTS ---
6
- from langchain_community.document_loaders import PyPDFLoader
7
- from langchain_experimental.text_splitter import SemanticChunker
8
- from langchain_huggingface import HuggingFaceEmbeddings
9
- from langchain_community.vectorstores import FAISS
10
- from langchain.memory import ConversationBufferMemory
11
-
12
- # 1) SET UP PAGE
13
- st.title("💬 المحادثة التفاعلية - إدارة البيانات وحماية البيانات الشخصية")
14
- local_file = "Policies001.pdf"
15
-
16
- index_folder = "faiss_index"
17
-
18
- # Inject custom CSS for right-to-left text
19
- st.markdown(
20
- """
21
- <style>
22
- .rtl {
23
- direction: rtl;
24
- text-align: right;
25
- }
26
- </style>
27
- """,
28
- unsafe_allow_html=True
29
- )
30
-
31
- # 2) LOAD OR BUILD VECTORSTORE
32
- embeddings = HuggingFaceEmbeddings(
33
- model_name="CAMeL-Lab/bert-base-arabic-camelbert-mix",
34
- model_kwargs={"trust_remote_code": True}
35
- )
36
-
37
- if os.path.exists(index_folder):
38
- vectorstore = FAISS.load_local(index_folder, embeddings, allow_dangerous_deserialization=True)
39
- else:
40
- loader = PyPDFLoader(local_file)
41
- documents = loader.load()
42
-
43
- text_splitter = SemanticChunker(
44
- embeddings=embeddings,
45
- breakpoint_threshold_type='percentile',
46
- breakpoint_threshold_amount=90
47
- )
48
- chunked_docs = text_splitter.split_documents(documents)
49
-
50
- vectorstore = FAISS.from_documents(chunked_docs, embeddings)
51
- vectorstore.save_local(index_folder)
52
-
53
- # 3) CREATE RETRIEVER
54
- retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
55
 
56
- # 4) SET UP "COMMAND-R7B-ARABIC" AS LLM
57
- # Authenticate and load the model
58
- model_name = "CohereForAI/c4ai-command-r7b-arabic-02-2025" # Replace with the actual Hugging Face model ID
 
59
 
60
- # Set Hugging Face token securely
61
- hf_token = os.getenv("HF_TOKEN") # Ensure you set your token as an environment variable in Hugging Face Spaces
62
 
63
- if hf_token is None:
64
- st.error("Hugging Face token not found. Please set the 'HF_TOKEN' environment variable.")
65
- st.stop()
66
 
67
- # Load tokenizer and model using the token
68
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
69
- model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token)
70
 
71
- # Hugging Face pipeline for text generation
72
- qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
73
 
74
- # Memory object to store conversation
75
- memory = ConversationBufferMemory(
76
- memory_key="chat_history", # key used internally by the chain
77
- return_messages=True # ensures we get the entire message history
78
- )
79
 
80
- # 5) MANAGE SESSION STATE FOR UI CHAT
81
- if "messages" not in st.session_state:
82
- st.session_state["messages"] = [
83
- {"role": "assistant", "content": "👋 مرحبًا! اسألني أي شيء عن إدارة البيانات وحماية البيانات الشخصية!"}
84
- ]
85
 
86
- # Display existing messages in chat format
87
- for msg in st.session_state["messages"]:
88
- with st.chat_message(msg["role"]):
89
- # Apply the "rtl" class to style Arabic text correctly
90
- st.markdown(f'<div class="rtl">{msg["content"]}</div>', unsafe_allow_html=True)
91
 
92
- # 6) CHAT INPUT
93
- user_input = st.chat_input("اكتب سؤالك هنا")
94
 
95
- # 7) PROCESS NEW USER MESSAGE
96
- if user_input:
97
- # a) Display user message in UI
98
- st.session_state["messages"].append({"role": "user", "content": user_input})
99
- with st.chat_message("user"):
100
- st.markdown(f'<div class="rtl">{user_input}</div>', unsafe_allow_html=True)
101
 
102
- # b) Run pipeline to generate a response
103
- # Combine retriever results and user input for context-aware answering
104
- retrieved_docs = retriever.get_relevant_documents(user_input)
105
- context = "\n".join([doc.page_content for doc in retrieved_docs])
106
- full_input = f"السياق:\n{context}\n\nالسؤال:\n{user_input}"
107
 
108
- # Generate answer using the pipeline
109
- response = qa_pipeline(full_input, max_length=500, num_return_sequences=1)[0]["generated_text"]
 
 
110
 
111
- # c) Display assistant response
112
- st.session_state["messages"].append({"role": "assistant", "content": response})
113
- with st.chat_message("assistant"):
114
- st.markdown(f'<div class="rtl">{response}</div>', unsafe_allow_html=True)
 
1
+ # app.py
2
+
3
  import streamlit as st
4
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Local imports
7
+ from embedding import load_embeddings
8
+ from vectorstore import load_or_build_vectorstore
9
+ from chain_setup import build_conversational_chain
10
 
11
+ def main():
12
+ st.title("💬 المحادثة التفاعلية - ادارة البياناتات و حماية البيانات الشخصية")
13
 
14
+ # Paths and constants
15
+ local_file = "Policies001.pdf"
16
+ index_folder = "faiss_index"
17
 
18
+ # Step 1: Load Embeddings
19
+ embeddings = load_embeddings()
 
20
 
21
+ # Step 2: Build or load VectorStore
22
+ vectorstore = load_or_build_vectorstore(local_file, index_folder, embeddings)
23
 
24
+ # Step 3: Build the Conversational Retrieval Chain
25
+ qa_chain = build_conversational_chain(vectorstore)
 
 
 
26
 
27
+ # Step 4: Session State for UI Chat
28
+ if "messages" not in st.session_state:
29
+ st.session_state["messages"] = [
30
+ {"role": "assistant", "content": "👋 مرحبًا! اسألني أي شيء عن إدارة البيانات وحماية البيانات الشخصية"}
31
+ ]
32
 
33
+ # Display existing messages
34
+ for msg in st.session_state["messages"]:
35
+ with st.chat_message(msg["role"]):
36
+ st.markdown(msg["content"])
 
37
 
38
+ # Step 5: Chat Input
39
+ user_input = st.chat_input("اكتب سؤالك")
40
 
41
+ # Step 6: Process user input
42
+ if user_input:
43
+ # a) Display user message
44
+ st.session_state["messages"].append({"role": "user", "content": user_input})
45
+ with st.chat_message("user"):
46
+ st.markdown(user_input)
47
 
48
+ # b) Run chain
49
+ response_dict = qa_chain({"question": user_input})
50
+ answer = response_dict["answer"]
 
 
51
 
52
+ # c) Display assistant response
53
+ st.session_state["messages"].append({"role": "assistant", "content": answer})
54
+ with st.chat_message("assistant"):
55
+ st.markdown(answer)
56
 
57
+ if __name__ == "__main__":
58
+ main()