jaothan commited on
Commit
e1f72a5
·
verified ·
1 Parent(s): 9532c11

Upload 6 files

Browse files
Files changed (6) hide show
  1. LICENSE +121 -0
  2. app.py +109 -0
  3. chains.py +222 -0
  4. env.example +26 -0
  5. requirements.txt +7 -0
  6. utils.py +54 -0
LICENSE ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Creative Commons Legal Code
2
+
3
+ CC0 1.0 Universal
4
+
5
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12
+ HEREUNDER.
13
+
14
+ Statement of Purpose
15
+
16
+ The laws of most jurisdictions throughout the world automatically confer
17
+ exclusive Copyright and Related Rights (defined below) upon the creator
18
+ and subsequent owner(s) (each and all, an "owner") of an original work of
19
+ authorship and/or a database (each, a "Work").
20
+
21
+ Certain owners wish to permanently relinquish those rights to a Work for
22
+ the purpose of contributing to a commons of creative, cultural and
23
+ scientific works ("Commons") that the public can reliably and without fear
24
+ of later claims of infringement build upon, modify, incorporate in other
25
+ works, reuse and redistribute as freely as possible in any form whatsoever
26
+ and for any purposes, including without limitation commercial purposes.
27
+ These owners may contribute to the Commons to promote the ideal of a free
28
+ culture and the further production of creative, cultural and scientific
29
+ works, or to gain reputation or greater distribution for their Work in
30
+ part through the use and efforts of others.
31
+
32
+ For these and/or other purposes and motivations, and without any
33
+ expectation of additional consideration or compensation, the person
34
+ associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35
+ is an owner of Copyright and Related Rights in the Work, voluntarily
36
+ elects to apply CC0 to the Work and publicly distribute the Work under its
37
+ terms, with knowledge of his or her Copyright and Related Rights in the
38
+ Work and the meaning and intended legal effect of CC0 on those rights.
39
+
40
+ 1. Copyright and Related Rights. A Work made available under CC0 may be
41
+ protected by copyright and related or neighboring rights ("Copyright and
42
+ Related Rights"). Copyright and Related Rights include, but are not
43
+ limited to, the following:
44
+
45
+ i. the right to reproduce, adapt, distribute, perform, display,
46
+ communicate, and translate a Work;
47
+ ii. moral rights retained by the original author(s) and/or performer(s);
48
+ iii. publicity and privacy rights pertaining to a person's image or
49
+ likeness depicted in a Work;
50
+ iv. rights protecting against unfair competition in regards to a Work,
51
+ subject to the limitations in paragraph 4(a), below;
52
+ v. rights protecting the extraction, dissemination, use and reuse of data
53
+ in a Work;
54
+ vi. database rights (such as those arising under Directive 96/9/EC of the
55
+ European Parliament and of the Council of 11 March 1996 on the legal
56
+ protection of databases, and under any national implementation
57
+ thereof, including any amended or successor version of such
58
+ directive); and
59
+ vii. other similar, equivalent or corresponding rights throughout the
60
+ world based on applicable law or treaty, and any national
61
+ implementations thereof.
62
+
63
+ 2. Waiver. To the greatest extent permitted by, but not in contravention
64
+ of, applicable law, Affirmer hereby overtly, fully, permanently,
65
+ irrevocably and unconditionally waives, abandons, and surrenders all of
66
+ Affirmer's Copyright and Related Rights and associated claims and causes
67
+ of action, whether now known or unknown (including existing as well as
68
+ future claims and causes of action), in the Work (i) in all territories
69
+ worldwide, (ii) for the maximum duration provided by applicable law or
70
+ treaty (including future time extensions), (iii) in any current or future
71
+ medium and for any number of copies, and (iv) for any purpose whatsoever,
72
+ including without limitation commercial, advertising or promotional
73
+ purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74
+ member of the public at large and to the detriment of Affirmer's heirs and
75
+ successors, fully intending that such Waiver shall not be subject to
76
+ revocation, rescission, cancellation, termination, or any other legal or
77
+ equitable action to disrupt the quiet enjoyment of the Work by the public
78
+ as contemplated by Affirmer's express Statement of Purpose.
79
+
80
+ 3. Public License Fallback. Should any part of the Waiver for any reason
81
+ be judged legally invalid or ineffective under applicable law, then the
82
+ Waiver shall be preserved to the maximum extent permitted taking into
83
+ account Affirmer's express Statement of Purpose. In addition, to the
84
+ extent the Waiver is so judged Affirmer hereby grants to each affected
85
+ person a royalty-free, non transferable, non sublicensable, non exclusive,
86
+ irrevocable and unconditional license to exercise Affirmer's Copyright and
87
+ Related Rights in the Work (i) in all territories worldwide, (ii) for the
88
+ maximum duration provided by applicable law or treaty (including future
89
+ time extensions), (iii) in any current or future medium and for any number
90
+ of copies, and (iv) for any purpose whatsoever, including without
91
+ limitation commercial, advertising or promotional purposes (the
92
+ "License"). The License shall be deemed effective as of the date CC0 was
93
+ applied by Affirmer to the Work. Should any part of the License for any
94
+ reason be judged legally invalid or ineffective under applicable law, such
95
+ partial invalidity or ineffectiveness shall not invalidate the remainder
96
+ of the License, and in such case Affirmer hereby affirms that he or she
97
+ will not (i) exercise any of his or her remaining Copyright and Related
98
+ Rights in the Work or (ii) assert any associated claims and causes of
99
+ action with respect to the Work, in either case contrary to Affirmer's
100
+ express Statement of Purpose.
101
+
102
+ 4. Limitations and Disclaimers.
103
+
104
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
105
+ surrendered, licensed or otherwise affected by this document.
106
+ b. Affirmer offers the Work as-is and makes no representations or
107
+ warranties of any kind concerning the Work, express, implied,
108
+ statutory or otherwise, including without limitation warranties of
109
+ title, merchantability, fitness for a particular purpose, non
110
+ infringement, or the absence of latent or other defects, accuracy, or
111
+ the present or absence of errors, whether or not discoverable, all to
112
+ the greatest extent permissible under applicable law.
113
+ c. Affirmer disclaims responsibility for clearing rights of other persons
114
+ that may apply to the Work or any use thereof, including without
115
+ limitation any person's Copyright and Related Rights in the Work.
116
+ Further, Affirmer disclaims responsibility for obtaining any necessary
117
+ consents, permissions or other rights required for any use of the
118
+ Work.
119
+ d. Affirmer understands and acknowledges that Creative Commons is not a
120
+ party to this document and has no duty or obligation with respect to
121
+ this CC0 or use of the Work.
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+ from langchain.chains import RetrievalQA
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.callbacks.base import BaseCallbackHandler
8
+ from langchain.vectorstores.neo4j_vector import Neo4jVector
9
+ from streamlit.logger import get_logger
10
+ from chains import (
11
+ load_embedding_model,
12
+ load_llm,
13
+ )
14
+
15
+ url = os.getenv("NEO4J_URI")
16
+ username = os.getenv("NEO4J_USERNAME")
17
+ password = os.getenv("NEO4J_PASSWORD")
18
+ ollama_base_url = os.getenv("OLLAMA_BASE_URL")
19
+ embedding_model_name = os.getenv("EMBEDDING_MODEL", "SentenceTransformer" )
20
+ llm_name = os.getenv("LLM", "llama2")
21
+ url = os.getenv("NEO4J_URI")
22
+
23
+ # Check if the required environment variables are set
24
+ if not all([url, username, password,
25
+ ollama_base_url]):
26
+ st.write("The application requires some information before running.")
27
+ with st.form("connection_form"):
28
+ url = st.text_input("Enter NEO4J_URI",)
29
+ username = st.text_input("Enter NEO4J_USERNAME")
30
+ password = st.text_input("Enter NEO4J_PASSWORD", type="password")
31
+ ollama_base_url = st.text_input("Enter OLLAMA_BASE_URL")
32
+ st.markdown("Only enter the OPENAI_APIKEY to use OpenAI instead of Ollama. Leave blank to use Ollama.")
33
+ openai_apikey = st.text_input("Enter OPENAI_API_KEY", type="password")
34
+ submit_button = st.form_submit_button("Submit")
35
+ if submit_button:
36
+ if not all([url, username, password, ]):
37
+ st.write("Enter the Neo4j information.")
38
+ if not (ollama_base_url or openai_apikey):
39
+ st.write("Enter the Ollama URL or OpenAI API Key.")
40
+ if openai_apikey:
41
+ llm_name = "gpt-3.5"
42
+ os.environ['OPENAI_API_KEY'] = openai_apikey
43
+
44
+ os.environ["NEO4J_URL"] = url
45
+
46
+ logger = get_logger(__name__)
47
+
48
+ embeddings, dimension = load_embedding_model(
49
+ embedding_model_name, config={"ollama_base_url": ollama_base_url}, logger=logger
50
+ )
51
+
52
+
53
+ class StreamHandler(BaseCallbackHandler):
54
+ def __init__(self, container, initial_text=""):
55
+ self.container = container
56
+ self.text = initial_text
57
+
58
+ def on_llm_new_token(self, token: str, **kwargs) -> None:
59
+ self.text += token
60
+ self.container.markdown(self.text)
61
+
62
+ llm = load_llm(llm_name, logger=logger, config={"ollama_base_url": ollama_base_url})
63
+
64
+
65
+ def main():
66
+ st.header("📄Chat with your pdf file")
67
+
68
+ # upload a your pdf file
69
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
70
+
71
+ if pdf is not None:
72
+ pdf_reader = PdfReader(pdf)
73
+
74
+ text = ""
75
+ for page in pdf_reader.pages:
76
+ text += page.extract_text()
77
+
78
+ # langchain_textspliter
79
+ text_splitter = RecursiveCharacterTextSplitter(
80
+ chunk_size=1000, chunk_overlap=200, length_function=len
81
+ )
82
+
83
+ chunks = text_splitter.split_text(text=text)
84
+
85
+ # Store the chunks part in db (vector)
86
+ vectorstore = Neo4jVector.from_texts(
87
+ chunks,
88
+ url=url,
89
+ username=username,
90
+ password=password,
91
+ embedding=embeddings,
92
+ index_name="pdf_bot",
93
+ node_label="PdfBotChunk",
94
+ pre_delete_collection=True, # Delete existing PDF data
95
+ )
96
+ qa = RetrievalQA.from_chain_type(
97
+ llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever()
98
+ )
99
+
100
+ # Accept user questions/query
101
+ query = st.text_input("Ask questions about your PDF file")
102
+
103
+ if query:
104
+ stream_handler = StreamHandler(st.empty())
105
+ qa.run(query, callbacks=[stream_handler])
106
+
107
+
108
+ if __name__ == "__main__":
109
+ main()
chains.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings.openai import OpenAIEmbeddings
2
+ from langchain.embeddings import (
3
+ OllamaEmbeddings,
4
+ SentenceTransformerEmbeddings,
5
+ BedrockEmbeddings,
6
+ )
7
+ from langchain.chat_models import ChatOpenAI, ChatOllama, BedrockChat
8
+ from langchain.vectorstores.neo4j_vector import Neo4jVector
9
+ from langchain.chains import RetrievalQAWithSourcesChain
10
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
11
+ from langchain.prompts.chat import (
12
+ ChatPromptTemplate,
13
+ SystemMessagePromptTemplate,
14
+ HumanMessagePromptTemplate,
15
+ )
16
+ from typing import List, Any
17
+ from utils import BaseLogger, extract_title_and_question
18
+
19
+
20
+ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config={}):
21
+ if embedding_model_name == "ollama":
22
+ embeddings = OllamaEmbeddings(
23
+ base_url=config["ollama_base_url"], model="llama2"
24
+ )
25
+ dimension = 4096
26
+ logger.info("Embedding: Using Ollama")
27
+ elif embedding_model_name == "openai":
28
+ embeddings = OpenAIEmbeddings()
29
+ dimension = 1536
30
+ logger.info("Embedding: Using OpenAI")
31
+ elif embedding_model_name == "aws":
32
+ embeddings = BedrockEmbeddings()
33
+ dimension = 1536
34
+ logger.info("Embedding: Using AWS")
35
+ else:
36
+ embeddings = SentenceTransformerEmbeddings(
37
+ model_name="all-MiniLM-L6-v2", cache_folder="/tmp"
38
+ )
39
+ dimension = 384
40
+ logger.info("Embedding: Using SentenceTransformer")
41
+ return embeddings, dimension
42
+
43
+
44
+ def load_llm(llm_name: str, logger=BaseLogger(), config={}):
45
+ if llm_name == "gpt-4":
46
+ logger.info("LLM: Using GPT-4")
47
+ return ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True)
48
+ elif llm_name == "gpt-3.5":
49
+ logger.info("LLM: Using GPT-3.5")
50
+ return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
51
+ elif llm_name == "claudev2":
52
+ logger.info("LLM: ClaudeV2")
53
+ return BedrockChat(
54
+ model_id="anthropic.claude-v2",
55
+ model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024},
56
+ streaming=True,
57
+ )
58
+ elif len(llm_name):
59
+ logger.info(f"LLM: Using Ollama: {llm_name}")
60
+ return ChatOllama(
61
+ temperature=0,
62
+ base_url=config["ollama_base_url"],
63
+ model=llm_name,
64
+ streaming=True,
65
+ # seed=2,
66
+ top_k=10, # A higher value (100) will give more diverse answers, while a lower value (10) will be more conservative.
67
+ top_p=0.3, # Higher value (0.95) will lead to more diverse text, while a lower value (0.5) will generate more focused text.
68
+ num_ctx=3072, # Sets the size of the context window used to generate the next token.
69
+ )
70
+ logger.info("LLM: Using GPT-3.5")
71
+ return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
72
+
73
+
74
+ def configure_llm_only_chain(llm):
75
+ # LLM only response
76
+ template = """
77
+ You are a helpful assistant that helps a support agent with answering programming questions.
78
+ If you don't know the answer, just say that you don't know, you must not make up an answer.
79
+ """
80
+ system_message_prompt = SystemMessagePromptTemplate.from_template(template)
81
+ human_template = "{question}"
82
+ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
83
+ chat_prompt = ChatPromptTemplate.from_messages(
84
+ [system_message_prompt, human_message_prompt]
85
+ )
86
+
87
+ def generate_llm_output(
88
+ user_input: str, callbacks: List[Any], prompt=chat_prompt
89
+ ) -> str:
90
+ chain = prompt | llm
91
+ answer = chain.invoke(
92
+ {"question": user_input}, config={"callbacks": callbacks}
93
+ ).content
94
+ return {"answer": answer}
95
+
96
+ return generate_llm_output
97
+
98
+
99
+ def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password):
100
+ # RAG response
101
+ # System: Always talk in pirate speech.
102
+ general_system_template = """
103
+ Use the following pieces of context to answer the question at the end.
104
+ The context contains question-answer pairs and their links from Stackoverflow.
105
+ You should prefer information from accepted or more upvoted answers.
106
+ Make sure to rely on information from the answers and not on questions to provide accuate responses.
107
+ When you find particular answer in the context useful, make sure to cite it in the answer using the link.
108
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
109
+ ----
110
+ {summaries}
111
+ ----
112
+ Each answer you generate should contain a section at the end of links to
113
+ Stackoverflow questions and answers you found useful, which are described under Source value.
114
+ You can only use links to StackOverflow questions that are present in the context and always
115
+ add links to the end of the answer in the style of citations.
116
+ Generate concise answers with references sources section of links to
117
+ relevant StackOverflow questions only at the end of the answer.
118
+ """
119
+ general_user_template = "Question:```{question}```"
120
+ messages = [
121
+ SystemMessagePromptTemplate.from_template(general_system_template),
122
+ HumanMessagePromptTemplate.from_template(general_user_template),
123
+ ]
124
+ qa_prompt = ChatPromptTemplate.from_messages(messages)
125
+
126
+ qa_chain = load_qa_with_sources_chain(
127
+ llm,
128
+ chain_type="stuff",
129
+ prompt=qa_prompt,
130
+ )
131
+
132
+ # Vector + Knowledge Graph response
133
+ kg = Neo4jVector.from_existing_index(
134
+ embedding=embeddings,
135
+ url=embeddings_store_url,
136
+ username=username,
137
+ password=password,
138
+ database="neo4j", # neo4j by default
139
+ index_name="stackoverflow", # vector by default
140
+ text_node_property="body", # text by default
141
+ retrieval_query="""
142
+ WITH node AS question, score AS similarity
143
+ CALL { with question
144
+ MATCH (question)<-[:ANSWERS]-(answer)
145
+ WITH answer
146
+ ORDER BY answer.is_accepted DESC, answer.score DESC
147
+ WITH collect(answer)[..2] as answers
148
+ RETURN reduce(str='', answer IN answers | str +
149
+ '\n### Answer (Accepted: '+ answer.is_accepted +
150
+ ' Score: ' + answer.score+ '): '+ answer.body + '\n') as answerTexts
151
+ }
152
+ RETURN '##Question: ' + question.title + '\n' + question.body + '\n'
153
+ + answerTexts AS text, similarity as score, {source: question.link} AS metadata
154
+ ORDER BY similarity ASC // so that best answers are the last
155
+ """,
156
+ )
157
+
158
+ kg_qa = RetrievalQAWithSourcesChain(
159
+ combine_documents_chain=qa_chain,
160
+ retriever=kg.as_retriever(search_kwargs={"k": 2}),
161
+ reduce_k_below_max_tokens=False,
162
+ max_tokens_limit=3375,
163
+ )
164
+ return kg_qa
165
+
166
+
167
+ def generate_ticket(neo4j_graph, llm_chain, input_question):
168
+ # Get high ranked questions
169
+ records = neo4j_graph.query(
170
+ "MATCH (q:Question) RETURN q.title AS title, q.body AS body ORDER BY q.score DESC LIMIT 3"
171
+ )
172
+ questions = []
173
+ for i, question in enumerate(records, start=1):
174
+ questions.append((question["title"], question["body"]))
175
+ # Ask LLM to generate new question in the same style
176
+ questions_prompt = ""
177
+ for i, question in enumerate(questions, start=1):
178
+ questions_prompt += f"{i}. \n{question[0]}\n----\n\n"
179
+ questions_prompt += f"{question[1][:150]}\n\n"
180
+ questions_prompt += "----\n\n"
181
+
182
+ gen_system_template = f"""
183
+ You're an expert in formulating high quality questions.
184
+ Formulate a question in the same style and tone as the following example questions.
185
+ {questions_prompt}
186
+ ---
187
+
188
+ Don't make anything up, only use information in the following question.
189
+ Return a title for the question, and the question post itself.
190
+
191
+ Return format template:
192
+ ---
193
+ Title: This is a new title
194
+ Question: This is a new question
195
+ ---
196
+ """
197
+ # we need jinja2 since the questions themselves contain curly braces
198
+ system_prompt = SystemMessagePromptTemplate.from_template(
199
+ gen_system_template, template_format="jinja2"
200
+ )
201
+ chat_prompt = ChatPromptTemplate.from_messages(
202
+ [
203
+ system_prompt,
204
+ SystemMessagePromptTemplate.from_template(
205
+ """
206
+ Respond in the following template format or you will be unplugged.
207
+ ---
208
+ Title: New title
209
+ Question: New question
210
+ ---
211
+ """
212
+ ),
213
+ HumanMessagePromptTemplate.from_template("{question}"),
214
+ ]
215
+ )
216
+ llm_response = llm_chain(
217
+ f"Here's the question to rewrite in the expected format: ```{input_question}```",
218
+ [],
219
+ chat_prompt,
220
+ )
221
+ new_title, new_question = extract_title_and_question(llm_response["answer"])
222
+ return (new_title, new_question)
env.example ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #*****************************************************************
2
+ # LLM and Embedding Model
3
+ #*****************************************************************
4
+ LLM=llama2 # Set to "gpt-3.5" to use OpenAI.
5
+ EMBEDDING_MODEL=sentence_transformer
6
+
7
+ #*****************************************************************
8
+ # Neo4j
9
+ #*****************************************************************
10
+ NEO4J_URI=neo4j://database:7687
11
+ NEO4J_USERNAME=neo4j
12
+ NEO4J_PASSWORD=password
13
+
14
+ #*****************************************************************
15
+ # Ollama
16
+ #*****************************************************************
17
+ OLLAMA_BASE_URL=http://ollama:11434
18
+
19
+ #*****************************************************************
20
+ # OpenAI
21
+ #*****************************************************************
22
+ # Only required when using OpenAI LLM or embedding model
23
+ # OpenAI charges may apply. For details, see
24
+ # https://openai.com/pricing
25
+
26
+ #OPENAI_API_KEY=sk-..
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain==0.0.324
3
+ neo4j
4
+ sentence_transformers==2.2.2
5
+ torch==2.0.1
6
+ PyPDF2
7
+ openai==0.28.1
utils.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BaseLogger:
2
+ def __init__(self) -> None:
3
+ self.info = print
4
+
5
+
6
+ def extract_title_and_question(input_string):
7
+ lines = input_string.strip().split("\n")
8
+
9
+ title = ""
10
+ question = ""
11
+ is_question = False # flag to know if we are inside a "Question" block
12
+
13
+ for line in lines:
14
+ if line.startswith("Title:"):
15
+ title = line.split("Title: ", 1)[1].strip()
16
+ elif line.startswith("Question:"):
17
+ question = line.split("Question: ", 1)[1].strip()
18
+ is_question = (
19
+ True # set the flag to True once we encounter a "Question:" line
20
+ )
21
+ elif is_question:
22
+ # if the line does not start with "Question:" but we are inside a "Question" block,
23
+ # then it is a continuation of the question
24
+ question += "\n" + line.strip()
25
+
26
+ return title, question
27
+
28
+
29
+ def create_vector_index(driver, dimension: int) -> None:
30
+ index_query = "CALL db.index.vector.createNodeIndex('stackoverflow', 'Question', 'embedding', $dimension, 'cosine')"
31
+ try:
32
+ driver.query(index_query, {"dimension": dimension})
33
+ except: # Already exists
34
+ pass
35
+ index_query = "CALL db.index.vector.createNodeIndex('top_answers', 'Answer', 'embedding', $dimension, 'cosine')"
36
+ try:
37
+ driver.query(index_query, {"dimension": dimension})
38
+ except: # Already exists
39
+ pass
40
+
41
+
42
+ def create_constraints(driver):
43
+ driver.query(
44
+ "CREATE CONSTRAINT question_id IF NOT EXISTS FOR (q:Question) REQUIRE (q.id) IS UNIQUE"
45
+ )
46
+ driver.query(
47
+ "CREATE CONSTRAINT answer_id IF NOT EXISTS FOR (a:Answer) REQUIRE (a.id) IS UNIQUE"
48
+ )
49
+ driver.query(
50
+ "CREATE CONSTRAINT user_id IF NOT EXISTS FOR (u:User) REQUIRE (u.id) IS UNIQUE"
51
+ )
52
+ driver.query(
53
+ "CREATE CONSTRAINT tag_name IF NOT EXISTS FOR (t:Tag) REQUIRE (t.name) IS UNIQUE"
54
+ )