davoodwadi commited on
Commit
65790ea
1 Parent(s): 48750dd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +145 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import json
4
+
5
+ import gradio as gr
6
+
7
+ from openai import OpenAI
8
+
9
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
10
+ from langchain_community.vectorstores import Chroma
11
+
12
+ from huggingface_hub import CommitScheduler
13
+ from pathlib import Path
14
+
15
+
16
+ client = OpenAI(
17
+ # base_url="https://api.endpoints.anyscale.com/v1",
18
+ api_key=os.environ['openai']
19
+ )
20
+
21
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
22
+
23
+ streamlit_collection = 'streamlit'
24
+
25
+ vectorstore_persisted = Chroma(
26
+ collection_name=streamlit_collection,
27
+ persist_directory='./streamlitdb',
28
+ embedding_function=embedding_model
29
+ )
30
+
31
+ retriever = vectorstore_persisted.as_retriever(
32
+ search_type='similarity',
33
+ search_kwargs={'k': 5}
34
+ )
35
+
36
+ # Prepare the logging functionality
37
+
38
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
39
+ log_folder = log_file.parent
40
+
41
+ scheduler = CommitScheduler(
42
+ repo_id="document-qna-chroma-anyscale-logs",
43
+ repo_type="dataset",
44
+ folder_path=log_folder,
45
+ path_in_repo="data",
46
+ every=2
47
+ )
48
+
49
+ qna_system_message = """
50
+ You are an assistant to a coder. Your task is to provide relevant information about the Python package Streamlit.
51
+
52
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
53
+ The context contains references to specific portions of documents relevant to the user's query, along with source links.
54
+ The source for a context will begin with the token ###Source
55
+
56
+ When crafting your response:
57
+ 1. Select the most relevant context or contexts to answer the question.
58
+ 2. Include the source links in your response.
59
+ 3. User questions will begin with the token: ###Question.
60
+ 4. If the question is irrelevant to streamlit respond with - "I am an assistant for streamlit Docs. I can only help you with questions related to streamlit"
61
+
62
+ Please adhere to the following guidelines:
63
+ - Answer only using the context provided.
64
+ - Do not mention anything about the context in your final answer.
65
+ - If the answer is not found in the context, it is very very important for you to respond with "I don't know. Please check the docs @ 'https://docs.streamlit.io/'"
66
+ - Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Sources:
67
+ - Do not make up sources. Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
68
+
69
+ Here is an example of how to structure your response:
70
+
71
+ Answer:
72
+ [Answer]
73
+
74
+ Source
75
+ [Source]
76
+ """
77
+
78
+ qna_user_message_template = """
79
+ ###Context
80
+ Here are some documents that are relevant to the question.
81
+ {context}
82
+ ```
83
+ {question}
84
+ ```
85
+ """
86
+
87
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
88
+ def predict(user_input):
89
+
90
+ relevant_document_chunks = retriever.invoke(user_input)
91
+ context_list = [d.page_content for d in relevant_document_chunks]
92
+ context_for_query = ".".join(context_list)
93
+
94
+ prompt = [
95
+ {'role':'system', 'content': qna_system_message},
96
+ {'role': 'user', 'content': qna_user_message_template.format(
97
+ context=context_for_query,
98
+ question=user_input
99
+ )
100
+ }
101
+ ]
102
+
103
+ try:
104
+ response = client.chat.completions.create(
105
+ model='mistralai/Mixtral-8x7B-Instruct-v0.1',
106
+ messages=prompt,
107
+ temperature=0
108
+ )
109
+
110
+ prediction = response.choices[0].message.content
111
+
112
+ except Exception as e:
113
+ prediction = e
114
+
115
+ # While the prediction is made, log both the inputs and outputs to a local log file
116
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
117
+ # access
118
+
119
+ with scheduler.lock:
120
+ with log_file.open("a") as f:
121
+ f.write(json.dumps(
122
+ {
123
+ 'user_input': user_input,
124
+ 'retrieved_context': context_for_query,
125
+ 'model_response': prediction
126
+ }
127
+ ))
128
+ f.write("\n")
129
+
130
+ return prediction
131
+
132
+
133
+ textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
134
+
135
+ # Create the interface
136
+ demo = gr.Interface(
137
+ inputs=textbox, fn=predict, outputs="text",
138
+ title="Streamlit Q&A System",
139
+ description="This web API presents an interface to ask questions on streamlit documentation",
140
+ article="Note that questions that are not relevant to streamlit or not within the sample documents will be answered with 'I don't know. Please check the docs @ 'https://docs.streamlit.io/''",
141
+ concurrency_limit=16
142
+ )
143
+
144
+ demo.queue()
145
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai==1.23.2
2
+ chromadb==0.4.22
3
+ langchain==0.1.9
4
+ langchain-community==0.0.32
5
+ sentence-transformers==2.3.1