Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- LICENSE +121 -0
- app.py +109 -0
- chains.py +222 -0
- env.example +26 -0
- requirements.txt +7 -0
- utils.py +54 -0
LICENSE
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Creative Commons Legal Code
|
2 |
+
|
3 |
+
CC0 1.0 Universal
|
4 |
+
|
5 |
+
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
|
6 |
+
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
|
7 |
+
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
|
8 |
+
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
|
9 |
+
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
|
10 |
+
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
|
11 |
+
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
|
12 |
+
HEREUNDER.
|
13 |
+
|
14 |
+
Statement of Purpose
|
15 |
+
|
16 |
+
The laws of most jurisdictions throughout the world automatically confer
|
17 |
+
exclusive Copyright and Related Rights (defined below) upon the creator
|
18 |
+
and subsequent owner(s) (each and all, an "owner") of an original work of
|
19 |
+
authorship and/or a database (each, a "Work").
|
20 |
+
|
21 |
+
Certain owners wish to permanently relinquish those rights to a Work for
|
22 |
+
the purpose of contributing to a commons of creative, cultural and
|
23 |
+
scientific works ("Commons") that the public can reliably and without fear
|
24 |
+
of later claims of infringement build upon, modify, incorporate in other
|
25 |
+
works, reuse and redistribute as freely as possible in any form whatsoever
|
26 |
+
and for any purposes, including without limitation commercial purposes.
|
27 |
+
These owners may contribute to the Commons to promote the ideal of a free
|
28 |
+
culture and the further production of creative, cultural and scientific
|
29 |
+
works, or to gain reputation or greater distribution for their Work in
|
30 |
+
part through the use and efforts of others.
|
31 |
+
|
32 |
+
For these and/or other purposes and motivations, and without any
|
33 |
+
expectation of additional consideration or compensation, the person
|
34 |
+
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
|
35 |
+
is an owner of Copyright and Related Rights in the Work, voluntarily
|
36 |
+
elects to apply CC0 to the Work and publicly distribute the Work under its
|
37 |
+
terms, with knowledge of his or her Copyright and Related Rights in the
|
38 |
+
Work and the meaning and intended legal effect of CC0 on those rights.
|
39 |
+
|
40 |
+
1. Copyright and Related Rights. A Work made available under CC0 may be
|
41 |
+
protected by copyright and related or neighboring rights ("Copyright and
|
42 |
+
Related Rights"). Copyright and Related Rights include, but are not
|
43 |
+
limited to, the following:
|
44 |
+
|
45 |
+
i. the right to reproduce, adapt, distribute, perform, display,
|
46 |
+
communicate, and translate a Work;
|
47 |
+
ii. moral rights retained by the original author(s) and/or performer(s);
|
48 |
+
iii. publicity and privacy rights pertaining to a person's image or
|
49 |
+
likeness depicted in a Work;
|
50 |
+
iv. rights protecting against unfair competition in regards to a Work,
|
51 |
+
subject to the limitations in paragraph 4(a), below;
|
52 |
+
v. rights protecting the extraction, dissemination, use and reuse of data
|
53 |
+
in a Work;
|
54 |
+
vi. database rights (such as those arising under Directive 96/9/EC of the
|
55 |
+
European Parliament and of the Council of 11 March 1996 on the legal
|
56 |
+
protection of databases, and under any national implementation
|
57 |
+
thereof, including any amended or successor version of such
|
58 |
+
directive); and
|
59 |
+
vii. other similar, equivalent or corresponding rights throughout the
|
60 |
+
world based on applicable law or treaty, and any national
|
61 |
+
implementations thereof.
|
62 |
+
|
63 |
+
2. Waiver. To the greatest extent permitted by, but not in contravention
|
64 |
+
of, applicable law, Affirmer hereby overtly, fully, permanently,
|
65 |
+
irrevocably and unconditionally waives, abandons, and surrenders all of
|
66 |
+
Affirmer's Copyright and Related Rights and associated claims and causes
|
67 |
+
of action, whether now known or unknown (including existing as well as
|
68 |
+
future claims and causes of action), in the Work (i) in all territories
|
69 |
+
worldwide, (ii) for the maximum duration provided by applicable law or
|
70 |
+
treaty (including future time extensions), (iii) in any current or future
|
71 |
+
medium and for any number of copies, and (iv) for any purpose whatsoever,
|
72 |
+
including without limitation commercial, advertising or promotional
|
73 |
+
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
|
74 |
+
member of the public at large and to the detriment of Affirmer's heirs and
|
75 |
+
successors, fully intending that such Waiver shall not be subject to
|
76 |
+
revocation, rescission, cancellation, termination, or any other legal or
|
77 |
+
equitable action to disrupt the quiet enjoyment of the Work by the public
|
78 |
+
as contemplated by Affirmer's express Statement of Purpose.
|
79 |
+
|
80 |
+
3. Public License Fallback. Should any part of the Waiver for any reason
|
81 |
+
be judged legally invalid or ineffective under applicable law, then the
|
82 |
+
Waiver shall be preserved to the maximum extent permitted taking into
|
83 |
+
account Affirmer's express Statement of Purpose. In addition, to the
|
84 |
+
extent the Waiver is so judged Affirmer hereby grants to each affected
|
85 |
+
person a royalty-free, non transferable, non sublicensable, non exclusive,
|
86 |
+
irrevocable and unconditional license to exercise Affirmer's Copyright and
|
87 |
+
Related Rights in the Work (i) in all territories worldwide, (ii) for the
|
88 |
+
maximum duration provided by applicable law or treaty (including future
|
89 |
+
time extensions), (iii) in any current or future medium and for any number
|
90 |
+
of copies, and (iv) for any purpose whatsoever, including without
|
91 |
+
limitation commercial, advertising or promotional purposes (the
|
92 |
+
"License"). The License shall be deemed effective as of the date CC0 was
|
93 |
+
applied by Affirmer to the Work. Should any part of the License for any
|
94 |
+
reason be judged legally invalid or ineffective under applicable law, such
|
95 |
+
partial invalidity or ineffectiveness shall not invalidate the remainder
|
96 |
+
of the License, and in such case Affirmer hereby affirms that he or she
|
97 |
+
will not (i) exercise any of his or her remaining Copyright and Related
|
98 |
+
Rights in the Work or (ii) assert any associated claims and causes of
|
99 |
+
action with respect to the Work, in either case contrary to Affirmer's
|
100 |
+
express Statement of Purpose.
|
101 |
+
|
102 |
+
4. Limitations and Disclaimers.
|
103 |
+
|
104 |
+
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
105 |
+
surrendered, licensed or otherwise affected by this document.
|
106 |
+
b. Affirmer offers the Work as-is and makes no representations or
|
107 |
+
warranties of any kind concerning the Work, express, implied,
|
108 |
+
statutory or otherwise, including without limitation warranties of
|
109 |
+
title, merchantability, fitness for a particular purpose, non
|
110 |
+
infringement, or the absence of latent or other defects, accuracy, or
|
111 |
+
the present or absence of errors, whether or not discoverable, all to
|
112 |
+
the greatest extent permissible under applicable law.
|
113 |
+
c. Affirmer disclaims responsibility for clearing rights of other persons
|
114 |
+
that may apply to the Work or any use thereof, including without
|
115 |
+
limitation any person's Copyright and Related Rights in the Work.
|
116 |
+
Further, Affirmer disclaims responsibility for obtaining any necessary
|
117 |
+
consents, permissions or other rights required for any use of the
|
118 |
+
Work.
|
119 |
+
d. Affirmer understands and acknowledges that Creative Commons is not a
|
120 |
+
party to this document and has no duty or obligation with respect to
|
121 |
+
this CC0 or use of the Work.
|
app.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from langchain.chains import RetrievalQA
|
5 |
+
from PyPDF2 import PdfReader
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from langchain.callbacks.base import BaseCallbackHandler
|
8 |
+
from langchain.vectorstores.neo4j_vector import Neo4jVector
|
9 |
+
from streamlit.logger import get_logger
|
10 |
+
from chains import (
|
11 |
+
load_embedding_model,
|
12 |
+
load_llm,
|
13 |
+
)
|
14 |
+
|
15 |
+
url = os.getenv("NEO4J_URI")
|
16 |
+
username = os.getenv("NEO4J_USERNAME")
|
17 |
+
password = os.getenv("NEO4J_PASSWORD")
|
18 |
+
ollama_base_url = os.getenv("OLLAMA_BASE_URL")
|
19 |
+
embedding_model_name = os.getenv("EMBEDDING_MODEL", "SentenceTransformer" )
|
20 |
+
llm_name = os.getenv("LLM", "llama2")
|
21 |
+
url = os.getenv("NEO4J_URI")
|
22 |
+
|
23 |
+
# Check if the required environment variables are set
|
24 |
+
if not all([url, username, password,
|
25 |
+
ollama_base_url]):
|
26 |
+
st.write("The application requires some information before running.")
|
27 |
+
with st.form("connection_form"):
|
28 |
+
url = st.text_input("Enter NEO4J_URI",)
|
29 |
+
username = st.text_input("Enter NEO4J_USERNAME")
|
30 |
+
password = st.text_input("Enter NEO4J_PASSWORD", type="password")
|
31 |
+
ollama_base_url = st.text_input("Enter OLLAMA_BASE_URL")
|
32 |
+
st.markdown("Only enter the OPENAI_APIKEY to use OpenAI instead of Ollama. Leave blank to use Ollama.")
|
33 |
+
openai_apikey = st.text_input("Enter OPENAI_API_KEY", type="password")
|
34 |
+
submit_button = st.form_submit_button("Submit")
|
35 |
+
if submit_button:
|
36 |
+
if not all([url, username, password, ]):
|
37 |
+
st.write("Enter the Neo4j information.")
|
38 |
+
if not (ollama_base_url or openai_apikey):
|
39 |
+
st.write("Enter the Ollama URL or OpenAI API Key.")
|
40 |
+
if openai_apikey:
|
41 |
+
llm_name = "gpt-3.5"
|
42 |
+
os.environ['OPENAI_API_KEY'] = openai_apikey
|
43 |
+
|
44 |
+
os.environ["NEO4J_URL"] = url
|
45 |
+
|
46 |
+
logger = get_logger(__name__)
|
47 |
+
|
48 |
+
embeddings, dimension = load_embedding_model(
|
49 |
+
embedding_model_name, config={"ollama_base_url": ollama_base_url}, logger=logger
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
class StreamHandler(BaseCallbackHandler):
|
54 |
+
def __init__(self, container, initial_text=""):
|
55 |
+
self.container = container
|
56 |
+
self.text = initial_text
|
57 |
+
|
58 |
+
def on_llm_new_token(self, token: str, **kwargs) -> None:
|
59 |
+
self.text += token
|
60 |
+
self.container.markdown(self.text)
|
61 |
+
|
62 |
+
llm = load_llm(llm_name, logger=logger, config={"ollama_base_url": ollama_base_url})
|
63 |
+
|
64 |
+
|
65 |
+
def main():
|
66 |
+
st.header("📄Chat with your pdf file")
|
67 |
+
|
68 |
+
# upload a your pdf file
|
69 |
+
pdf = st.file_uploader("Upload your PDF", type="pdf")
|
70 |
+
|
71 |
+
if pdf is not None:
|
72 |
+
pdf_reader = PdfReader(pdf)
|
73 |
+
|
74 |
+
text = ""
|
75 |
+
for page in pdf_reader.pages:
|
76 |
+
text += page.extract_text()
|
77 |
+
|
78 |
+
# langchain_textspliter
|
79 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
80 |
+
chunk_size=1000, chunk_overlap=200, length_function=len
|
81 |
+
)
|
82 |
+
|
83 |
+
chunks = text_splitter.split_text(text=text)
|
84 |
+
|
85 |
+
# Store the chunks part in db (vector)
|
86 |
+
vectorstore = Neo4jVector.from_texts(
|
87 |
+
chunks,
|
88 |
+
url=url,
|
89 |
+
username=username,
|
90 |
+
password=password,
|
91 |
+
embedding=embeddings,
|
92 |
+
index_name="pdf_bot",
|
93 |
+
node_label="PdfBotChunk",
|
94 |
+
pre_delete_collection=True, # Delete existing PDF data
|
95 |
+
)
|
96 |
+
qa = RetrievalQA.from_chain_type(
|
97 |
+
llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever()
|
98 |
+
)
|
99 |
+
|
100 |
+
# Accept user questions/query
|
101 |
+
query = st.text_input("Ask questions about your PDF file")
|
102 |
+
|
103 |
+
if query:
|
104 |
+
stream_handler = StreamHandler(st.empty())
|
105 |
+
qa.run(query, callbacks=[stream_handler])
|
106 |
+
|
107 |
+
|
108 |
+
if __name__ == "__main__":
|
109 |
+
main()
|
chains.py
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
2 |
+
from langchain.embeddings import (
|
3 |
+
OllamaEmbeddings,
|
4 |
+
SentenceTransformerEmbeddings,
|
5 |
+
BedrockEmbeddings,
|
6 |
+
)
|
7 |
+
from langchain.chat_models import ChatOpenAI, ChatOllama, BedrockChat
|
8 |
+
from langchain.vectorstores.neo4j_vector import Neo4jVector
|
9 |
+
from langchain.chains import RetrievalQAWithSourcesChain
|
10 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
11 |
+
from langchain.prompts.chat import (
|
12 |
+
ChatPromptTemplate,
|
13 |
+
SystemMessagePromptTemplate,
|
14 |
+
HumanMessagePromptTemplate,
|
15 |
+
)
|
16 |
+
from typing import List, Any
|
17 |
+
from utils import BaseLogger, extract_title_and_question
|
18 |
+
|
19 |
+
|
20 |
+
def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config={}):
|
21 |
+
if embedding_model_name == "ollama":
|
22 |
+
embeddings = OllamaEmbeddings(
|
23 |
+
base_url=config["ollama_base_url"], model="llama2"
|
24 |
+
)
|
25 |
+
dimension = 4096
|
26 |
+
logger.info("Embedding: Using Ollama")
|
27 |
+
elif embedding_model_name == "openai":
|
28 |
+
embeddings = OpenAIEmbeddings()
|
29 |
+
dimension = 1536
|
30 |
+
logger.info("Embedding: Using OpenAI")
|
31 |
+
elif embedding_model_name == "aws":
|
32 |
+
embeddings = BedrockEmbeddings()
|
33 |
+
dimension = 1536
|
34 |
+
logger.info("Embedding: Using AWS")
|
35 |
+
else:
|
36 |
+
embeddings = SentenceTransformerEmbeddings(
|
37 |
+
model_name="all-MiniLM-L6-v2", cache_folder="/tmp"
|
38 |
+
)
|
39 |
+
dimension = 384
|
40 |
+
logger.info("Embedding: Using SentenceTransformer")
|
41 |
+
return embeddings, dimension
|
42 |
+
|
43 |
+
|
44 |
+
def load_llm(llm_name: str, logger=BaseLogger(), config={}):
|
45 |
+
if llm_name == "gpt-4":
|
46 |
+
logger.info("LLM: Using GPT-4")
|
47 |
+
return ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True)
|
48 |
+
elif llm_name == "gpt-3.5":
|
49 |
+
logger.info("LLM: Using GPT-3.5")
|
50 |
+
return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
|
51 |
+
elif llm_name == "claudev2":
|
52 |
+
logger.info("LLM: ClaudeV2")
|
53 |
+
return BedrockChat(
|
54 |
+
model_id="anthropic.claude-v2",
|
55 |
+
model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024},
|
56 |
+
streaming=True,
|
57 |
+
)
|
58 |
+
elif len(llm_name):
|
59 |
+
logger.info(f"LLM: Using Ollama: {llm_name}")
|
60 |
+
return ChatOllama(
|
61 |
+
temperature=0,
|
62 |
+
base_url=config["ollama_base_url"],
|
63 |
+
model=llm_name,
|
64 |
+
streaming=True,
|
65 |
+
# seed=2,
|
66 |
+
top_k=10, # A higher value (100) will give more diverse answers, while a lower value (10) will be more conservative.
|
67 |
+
top_p=0.3, # Higher value (0.95) will lead to more diverse text, while a lower value (0.5) will generate more focused text.
|
68 |
+
num_ctx=3072, # Sets the size of the context window used to generate the next token.
|
69 |
+
)
|
70 |
+
logger.info("LLM: Using GPT-3.5")
|
71 |
+
return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
|
72 |
+
|
73 |
+
|
74 |
+
def configure_llm_only_chain(llm):
|
75 |
+
# LLM only response
|
76 |
+
template = """
|
77 |
+
You are a helpful assistant that helps a support agent with answering programming questions.
|
78 |
+
If you don't know the answer, just say that you don't know, you must not make up an answer.
|
79 |
+
"""
|
80 |
+
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
|
81 |
+
human_template = "{question}"
|
82 |
+
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
83 |
+
chat_prompt = ChatPromptTemplate.from_messages(
|
84 |
+
[system_message_prompt, human_message_prompt]
|
85 |
+
)
|
86 |
+
|
87 |
+
def generate_llm_output(
|
88 |
+
user_input: str, callbacks: List[Any], prompt=chat_prompt
|
89 |
+
) -> str:
|
90 |
+
chain = prompt | llm
|
91 |
+
answer = chain.invoke(
|
92 |
+
{"question": user_input}, config={"callbacks": callbacks}
|
93 |
+
).content
|
94 |
+
return {"answer": answer}
|
95 |
+
|
96 |
+
return generate_llm_output
|
97 |
+
|
98 |
+
|
99 |
+
def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password):
|
100 |
+
# RAG response
|
101 |
+
# System: Always talk in pirate speech.
|
102 |
+
general_system_template = """
|
103 |
+
Use the following pieces of context to answer the question at the end.
|
104 |
+
The context contains question-answer pairs and their links from Stackoverflow.
|
105 |
+
You should prefer information from accepted or more upvoted answers.
|
106 |
+
Make sure to rely on information from the answers and not on questions to provide accuate responses.
|
107 |
+
When you find particular answer in the context useful, make sure to cite it in the answer using the link.
|
108 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
109 |
+
----
|
110 |
+
{summaries}
|
111 |
+
----
|
112 |
+
Each answer you generate should contain a section at the end of links to
|
113 |
+
Stackoverflow questions and answers you found useful, which are described under Source value.
|
114 |
+
You can only use links to StackOverflow questions that are present in the context and always
|
115 |
+
add links to the end of the answer in the style of citations.
|
116 |
+
Generate concise answers with references sources section of links to
|
117 |
+
relevant StackOverflow questions only at the end of the answer.
|
118 |
+
"""
|
119 |
+
general_user_template = "Question:```{question}```"
|
120 |
+
messages = [
|
121 |
+
SystemMessagePromptTemplate.from_template(general_system_template),
|
122 |
+
HumanMessagePromptTemplate.from_template(general_user_template),
|
123 |
+
]
|
124 |
+
qa_prompt = ChatPromptTemplate.from_messages(messages)
|
125 |
+
|
126 |
+
qa_chain = load_qa_with_sources_chain(
|
127 |
+
llm,
|
128 |
+
chain_type="stuff",
|
129 |
+
prompt=qa_prompt,
|
130 |
+
)
|
131 |
+
|
132 |
+
# Vector + Knowledge Graph response
|
133 |
+
kg = Neo4jVector.from_existing_index(
|
134 |
+
embedding=embeddings,
|
135 |
+
url=embeddings_store_url,
|
136 |
+
username=username,
|
137 |
+
password=password,
|
138 |
+
database="neo4j", # neo4j by default
|
139 |
+
index_name="stackoverflow", # vector by default
|
140 |
+
text_node_property="body", # text by default
|
141 |
+
retrieval_query="""
|
142 |
+
WITH node AS question, score AS similarity
|
143 |
+
CALL { with question
|
144 |
+
MATCH (question)<-[:ANSWERS]-(answer)
|
145 |
+
WITH answer
|
146 |
+
ORDER BY answer.is_accepted DESC, answer.score DESC
|
147 |
+
WITH collect(answer)[..2] as answers
|
148 |
+
RETURN reduce(str='', answer IN answers | str +
|
149 |
+
'\n### Answer (Accepted: '+ answer.is_accepted +
|
150 |
+
' Score: ' + answer.score+ '): '+ answer.body + '\n') as answerTexts
|
151 |
+
}
|
152 |
+
RETURN '##Question: ' + question.title + '\n' + question.body + '\n'
|
153 |
+
+ answerTexts AS text, similarity as score, {source: question.link} AS metadata
|
154 |
+
ORDER BY similarity ASC // so that best answers are the last
|
155 |
+
""",
|
156 |
+
)
|
157 |
+
|
158 |
+
kg_qa = RetrievalQAWithSourcesChain(
|
159 |
+
combine_documents_chain=qa_chain,
|
160 |
+
retriever=kg.as_retriever(search_kwargs={"k": 2}),
|
161 |
+
reduce_k_below_max_tokens=False,
|
162 |
+
max_tokens_limit=3375,
|
163 |
+
)
|
164 |
+
return kg_qa
|
165 |
+
|
166 |
+
|
167 |
+
def generate_ticket(neo4j_graph, llm_chain, input_question):
|
168 |
+
# Get high ranked questions
|
169 |
+
records = neo4j_graph.query(
|
170 |
+
"MATCH (q:Question) RETURN q.title AS title, q.body AS body ORDER BY q.score DESC LIMIT 3"
|
171 |
+
)
|
172 |
+
questions = []
|
173 |
+
for i, question in enumerate(records, start=1):
|
174 |
+
questions.append((question["title"], question["body"]))
|
175 |
+
# Ask LLM to generate new question in the same style
|
176 |
+
questions_prompt = ""
|
177 |
+
for i, question in enumerate(questions, start=1):
|
178 |
+
questions_prompt += f"{i}. \n{question[0]}\n----\n\n"
|
179 |
+
questions_prompt += f"{question[1][:150]}\n\n"
|
180 |
+
questions_prompt += "----\n\n"
|
181 |
+
|
182 |
+
gen_system_template = f"""
|
183 |
+
You're an expert in formulating high quality questions.
|
184 |
+
Formulate a question in the same style and tone as the following example questions.
|
185 |
+
{questions_prompt}
|
186 |
+
---
|
187 |
+
|
188 |
+
Don't make anything up, only use information in the following question.
|
189 |
+
Return a title for the question, and the question post itself.
|
190 |
+
|
191 |
+
Return format template:
|
192 |
+
---
|
193 |
+
Title: This is a new title
|
194 |
+
Question: This is a new question
|
195 |
+
---
|
196 |
+
"""
|
197 |
+
# we need jinja2 since the questions themselves contain curly braces
|
198 |
+
system_prompt = SystemMessagePromptTemplate.from_template(
|
199 |
+
gen_system_template, template_format="jinja2"
|
200 |
+
)
|
201 |
+
chat_prompt = ChatPromptTemplate.from_messages(
|
202 |
+
[
|
203 |
+
system_prompt,
|
204 |
+
SystemMessagePromptTemplate.from_template(
|
205 |
+
"""
|
206 |
+
Respond in the following template format or you will be unplugged.
|
207 |
+
---
|
208 |
+
Title: New title
|
209 |
+
Question: New question
|
210 |
+
---
|
211 |
+
"""
|
212 |
+
),
|
213 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
214 |
+
]
|
215 |
+
)
|
216 |
+
llm_response = llm_chain(
|
217 |
+
f"Here's the question to rewrite in the expected format: ```{input_question}```",
|
218 |
+
[],
|
219 |
+
chat_prompt,
|
220 |
+
)
|
221 |
+
new_title, new_question = extract_title_and_question(llm_response["answer"])
|
222 |
+
return (new_title, new_question)
|
env.example
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#*****************************************************************
|
2 |
+
# LLM and Embedding Model
|
3 |
+
#*****************************************************************
|
4 |
+
LLM=llama2 # Set to "gpt-3.5" to use OpenAI.
|
5 |
+
EMBEDDING_MODEL=sentence_transformer
|
6 |
+
|
7 |
+
#*****************************************************************
|
8 |
+
# Neo4j
|
9 |
+
#*****************************************************************
|
10 |
+
NEO4J_URI=neo4j://database:7687
|
11 |
+
NEO4J_USERNAME=neo4j
|
12 |
+
NEO4J_PASSWORD=password
|
13 |
+
|
14 |
+
#*****************************************************************
|
15 |
+
# Ollama
|
16 |
+
#*****************************************************************
|
17 |
+
OLLAMA_BASE_URL=http://ollama:11434
|
18 |
+
|
19 |
+
#*****************************************************************
|
20 |
+
# OpenAI
|
21 |
+
#*****************************************************************
|
22 |
+
# Only required when using OpenAI LLM or embedding model
|
23 |
+
# OpenAI charges may apply. For details, see
|
24 |
+
# https://openai.com/pricing
|
25 |
+
|
26 |
+
#OPENAI_API_KEY=sk-..
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
langchain==0.0.324
|
3 |
+
neo4j
|
4 |
+
sentence_transformers==2.2.2
|
5 |
+
torch==2.0.1
|
6 |
+
PyPDF2
|
7 |
+
openai==0.28.1
|
utils.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BaseLogger:
|
2 |
+
def __init__(self) -> None:
|
3 |
+
self.info = print
|
4 |
+
|
5 |
+
|
6 |
+
def extract_title_and_question(input_string):
|
7 |
+
lines = input_string.strip().split("\n")
|
8 |
+
|
9 |
+
title = ""
|
10 |
+
question = ""
|
11 |
+
is_question = False # flag to know if we are inside a "Question" block
|
12 |
+
|
13 |
+
for line in lines:
|
14 |
+
if line.startswith("Title:"):
|
15 |
+
title = line.split("Title: ", 1)[1].strip()
|
16 |
+
elif line.startswith("Question:"):
|
17 |
+
question = line.split("Question: ", 1)[1].strip()
|
18 |
+
is_question = (
|
19 |
+
True # set the flag to True once we encounter a "Question:" line
|
20 |
+
)
|
21 |
+
elif is_question:
|
22 |
+
# if the line does not start with "Question:" but we are inside a "Question" block,
|
23 |
+
# then it is a continuation of the question
|
24 |
+
question += "\n" + line.strip()
|
25 |
+
|
26 |
+
return title, question
|
27 |
+
|
28 |
+
|
29 |
+
def create_vector_index(driver, dimension: int) -> None:
|
30 |
+
index_query = "CALL db.index.vector.createNodeIndex('stackoverflow', 'Question', 'embedding', $dimension, 'cosine')"
|
31 |
+
try:
|
32 |
+
driver.query(index_query, {"dimension": dimension})
|
33 |
+
except: # Already exists
|
34 |
+
pass
|
35 |
+
index_query = "CALL db.index.vector.createNodeIndex('top_answers', 'Answer', 'embedding', $dimension, 'cosine')"
|
36 |
+
try:
|
37 |
+
driver.query(index_query, {"dimension": dimension})
|
38 |
+
except: # Already exists
|
39 |
+
pass
|
40 |
+
|
41 |
+
|
42 |
+
def create_constraints(driver):
|
43 |
+
driver.query(
|
44 |
+
"CREATE CONSTRAINT question_id IF NOT EXISTS FOR (q:Question) REQUIRE (q.id) IS UNIQUE"
|
45 |
+
)
|
46 |
+
driver.query(
|
47 |
+
"CREATE CONSTRAINT answer_id IF NOT EXISTS FOR (a:Answer) REQUIRE (a.id) IS UNIQUE"
|
48 |
+
)
|
49 |
+
driver.query(
|
50 |
+
"CREATE CONSTRAINT user_id IF NOT EXISTS FOR (u:User) REQUIRE (u.id) IS UNIQUE"
|
51 |
+
)
|
52 |
+
driver.query(
|
53 |
+
"CREATE CONSTRAINT tag_name IF NOT EXISTS FOR (t:Tag) REQUIRE (t.name) IS UNIQUE"
|
54 |
+
)
|