Simon
commited on
Commit
•
fdf8874
1
Parent(s):
b261943
dont care
Browse files- app.py +84 -0
- requirements.txt +4 -0
- 🗣️questionmydocs📄_with_langchain_(assignment_version).py +0 -0
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# # we'll need a few dependencies before we can do this
|
2 |
+
# #!pip install chromadb -q
|
3 |
+
|
4 |
+
# from langchain.vectorstores import Chroma
|
5 |
+
|
6 |
+
# persist_directory = "vector_db"
|
7 |
+
|
8 |
+
# vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) #### YOUR CODE HERE
|
9 |
+
|
10 |
+
# """Now we can persist our Chroma vector store - and then show an example of how you would load that persisted vector store."""
|
11 |
+
|
12 |
+
# vectordb.persist()
|
13 |
+
# vectordb = None
|
14 |
+
|
15 |
+
# """As you can see when you run the following cell - loaded the persisted vectore store is *much* quicker than reinstantiating it - and that is the benefit of `persist_directory`!"""
|
16 |
+
|
17 |
+
# vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
|
18 |
+
|
19 |
+
# """Now that we have our docs set-up - we're ready to progress to the next part of the LangChain applciation puzzle!
|
20 |
+
|
21 |
+
# ### Tool Chain
|
22 |
+
|
23 |
+
# Now we can leverage our `oc_retriever` as a tool in our LangChain application!
|
24 |
+
|
25 |
+
# We'll be utilizing the BLOOMZ-1b7 model as our LLM today - so we can expect that our results will be less effective than if we used OpenAI's gpt-3.5-turbo, but the advantage is that no information will escape outside of our Colab environment.
|
26 |
+
|
27 |
+
# First up, let's load our model!
|
28 |
+
# """
|
29 |
+
|
30 |
+
# from langchain import HuggingFacePipeline
|
31 |
+
|
32 |
+
# llm = HuggingFacePipeline.from_model_id(
|
33 |
+
# model_id="bigscience/bloomz-1b7", ### YOUR CODE HERE
|
34 |
+
# task="text-generation", ### YOUR CODE HERE
|
35 |
+
# model_kwargs={"temperature" : 0, "max_length" : 500})
|
36 |
+
|
37 |
+
# """Now let's set up our document vector store as a Retriever tool so we can leverage it in our chain!"""
|
38 |
+
|
39 |
+
# doc_retriever = vectordb.as_retriever() ### YOUR CODE HERE
|
40 |
+
|
41 |
+
# """### Final Chain
|
42 |
+
|
43 |
+
# With that set-up, we're good to set-up our final RetrievalQA chain and leverage all the documents we have in our Vector DB!
|
44 |
+
# """
|
45 |
+
|
46 |
+
# from langchain.chains import RetrievalQA
|
47 |
+
|
48 |
+
# shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever) ### YOUR CODE HERE
|
49 |
+
|
50 |
+
# """Let's test it out by itself!"""
|
51 |
+
|
52 |
+
# #shakespeare_qa.run("Who was Hamlet's Mother?")
|
53 |
+
|
54 |
+
# """### Conclusion
|
55 |
+
|
56 |
+
# Here we have it!
|
57 |
+
|
58 |
+
# A system capable of querying over multiple documents - all without every needing to hit an external API!
|
59 |
+
# """
|
60 |
+
|
61 |
+
# def make_inference(query):
|
62 |
+
# docs = docsearch.get_relevant_documents(query)
|
63 |
+
# return(chain.run(input_documents=docs, question=query))
|
64 |
+
|
65 |
+
# if __name__ == "__main__":
|
66 |
+
# # make a gradio interface
|
67 |
+
# import gradio as gr
|
68 |
+
|
69 |
+
# gr.Interface(
|
70 |
+
# make_inference,
|
71 |
+
# [
|
72 |
+
# gr.inputs.Textbox(lines=2, label="Query"),
|
73 |
+
# ],
|
74 |
+
# gr.outputs.Textbox(label="Response"),
|
75 |
+
# title="🗣️TalkToMyDoc📄",
|
76 |
+
# description="🗣️TalkToMyDoc📄 is a tool that allows you to ask questions about a document. In this case - Hitch Hitchhiker's Guide to the Galaxy.",
|
77 |
+
# ).launch()
|
78 |
+
import gradio as gr
|
79 |
+
|
80 |
+
def greet(name):
|
81 |
+
return "Hello " + name + "!!"
|
82 |
+
|
83 |
+
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
84 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
tiktoken
|
4 |
+
chromadb
|
🗣️questionmydocs📄_with_langchain_(assignment_version).py
ADDED
The diff for this file is too large to render.
See raw diff
|
|