valeriylo commited on
Commit
22d8576
·
verified ·
1 Parent(s): f57bead

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -19
app.py CHANGED
@@ -14,12 +14,11 @@ from langchain.llms import HuggingFaceHub, LlamaCpp
14
  from huggingface_hub import snapshot_download, hf_hub_download
15
 
16
 
17
- # from prompts import CONDENSE_QUESTION_PROMPT
 
 
18
 
19
- repo_name = "IlyaGusev/saiga_mistral_7b_gguf"
20
- model_name = "model-q4_K.gguf"
21
-
22
- #snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
23
 
24
 
25
  def get_pdf_text(pdf_docs):
@@ -34,8 +33,8 @@ def get_pdf_text(pdf_docs):
34
 
35
  def get_text_chunks(text):
36
  text_splitter = CharacterTextSplitter(separator="\n",
37
- chunk_size=1000, # 1000
38
- chunk_overlap=200, # 200
39
  length_function=len
40
  )
41
  chunks = text_splitter.split_text(text)
@@ -44,10 +43,7 @@ def get_text_chunks(text):
44
 
45
 
46
  def get_vectorstore(text_chunks):
47
- #embeddings = OpenAIEmbeddings()
48
- #embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
49
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
50
- #embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
51
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
52
 
53
  return vectorstore
@@ -65,8 +61,6 @@ def get_conversation_chain(vectorstore, model_name):
65
  # echo=True
66
  # )
67
 
68
- # llm = ChatOpenAI()
69
-
70
  llm = GigaChat(credentials=os.getenv("GIGACHAT_CREDENTIALS"),
71
  verify_ssl_certs=False)
72
 
@@ -106,13 +100,6 @@ def handle_userinput(user_question):
106
  st.write(bot_template.replace(
107
  "{{MSG}}", str(text.page_content)), unsafe_allow_html=True)
108
 
109
-
110
-
111
- #for text in enumerate(st.session_state.retrieved_text):
112
- # st.write(text[1].page_content, '\n')
113
-
114
- #print(response['source_documents'][0])
115
-
116
  # main code
117
  load_dotenv()
118
 
 
14
  from huggingface_hub import snapshot_download, hf_hub_download
15
 
16
 
17
+ # If you want to use gguf model, uncomment 18-19 and 54-62 lines, comment-out 64-65. Otherwise provide GigaChat Credentials through HF secrets menu
18
+ #repo_name = "IlyaGusev/saiga_mistral_7b_gguf"
19
+ #model_name = "model-q4_K.gguf"
20
 
21
+ snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
 
 
 
22
 
23
 
24
  def get_pdf_text(pdf_docs):
 
33
 
34
  def get_text_chunks(text):
35
  text_splitter = CharacterTextSplitter(separator="\n",
36
+ chunk_size=1000,
37
+ chunk_overlap=200,
38
  length_function=len
39
  )
40
  chunks = text_splitter.split_text(text)
 
43
 
44
 
45
  def get_vectorstore(text_chunks):
 
 
46
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
 
47
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
48
 
49
  return vectorstore
 
61
  # echo=True
62
  # )
63
 
 
 
64
  llm = GigaChat(credentials=os.getenv("GIGACHAT_CREDENTIALS"),
65
  verify_ssl_certs=False)
66
 
 
100
  st.write(bot_template.replace(
101
  "{{MSG}}", str(text.page_content)), unsafe_allow_html=True)
102
 
 
 
 
 
 
 
 
103
  # main code
104
  load_dotenv()
105