saaketvarma commited on
Commit
20370d0
·
1 Parent(s): 76f476f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -9
app.py CHANGED
@@ -1,10 +1,7 @@
1
  import base64
2
  import os
3
 
4
- import sys
5
  import streamlit as st
6
- from langchain.embeddings.openai import OpenAIEmbeddings
7
- from langchain.llms import OpenAI
8
  from langchain.chains import RetrievalQA
9
  from langchain.document_loaders import PDFMinerLoader
10
  from langchain.embeddings import SentenceTransformerEmbeddings
@@ -17,8 +14,6 @@ import torch
17
 
18
  st.set_page_config(layout="wide")
19
 
20
-
21
-
22
  def process_answer(instruction, qa_chain):
23
  response = ''
24
  generated_text = qa_chain.run(instruction)
@@ -41,13 +36,11 @@ def data_ingestion():
41
  loader = PDFMinerLoader(os.path.join(root, file))
42
 
43
  documents = loader.load()
44
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
45
  splits = text_splitter.split_documents(documents)
46
 
47
- # create embeddings of the chunked document
48
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
49
-
50
- #embeddings = OpenAIEmbeddings()
51
  vectordb = FAISS.from_documents(splits, embeddings)
52
  vectordb.save_local("faiss_index")
53
 
 
1
  import base64
2
  import os
3
 
 
4
  import streamlit as st
 
 
5
  from langchain.chains import RetrievalQA
6
  from langchain.document_loaders import PDFMinerLoader
7
  from langchain.embeddings import SentenceTransformerEmbeddings
 
14
 
15
  st.set_page_config(layout="wide")
16
 
 
 
17
  def process_answer(instruction, qa_chain):
18
  response = ''
19
  generated_text = qa_chain.run(instruction)
 
36
  loader = PDFMinerLoader(os.path.join(root, file))
37
 
38
  documents = loader.load()
39
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=500)
40
  splits = text_splitter.split_documents(documents)
41
 
42
+ # create embeddings here
43
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
 
 
44
  vectordb = FAISS.from_documents(splits, embeddings)
45
  vectordb.save_local("faiss_index")
46