marcolorenzi98 commited on
Commit
d7c6aea
1 Parent(s): 4214383

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -0
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain.llms import HuggingFacePipeline
9
  from langchain.document_loaders.csv_loader import CSVLoader
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.embeddings import HuggingFaceEmbeddings
 
12
  from langchain.chains import RetrievalQA
13
  from langchain.vectorstores import Chroma
14
  import gradio as gr
@@ -42,6 +43,20 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
42
 
43
  ##############################################################################
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  query_pipeline = transformers.pipeline(
46
  "text-generation",
47
  model=model,
 
9
  from langchain.document_loaders.csv_loader import CSVLoader
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.embeddings import HuggingFaceEmbeddings
12
+ from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
13
  from langchain.chains import RetrievalQA
14
  from langchain.vectorstores import Chroma
15
  import gradio as gr
 
43
 
44
  ##############################################################################
45
 
46
+ embedding = SpacyEmbeddings(model_name="en_core_web_sm")
47
+
48
+ # Embed and store the texts
49
+ # Supplying a persist_directory will store the embeddings on disk
50
+ persist_directory = 'Enron_case_RAG/Langchain_ChromaDB'
51
+
52
+ # load from disk
53
+ db3 = Chroma(persist_directory=persist_directory,
54
+ embedding_function=embedding,
55
+ collection_name="Enron_vectorstore"
56
+ )
57
+
58
+ ##############################################################################
59
+
60
  query_pipeline = transformers.pipeline(
61
  "text-generation",
62
  model=model,