Sean-Case
commited on
Commit
•
2e536f9
1
Parent(s):
84b25ff
Added working like buttons. Added model choice. Modified requirements txt
Browse files- Generation speed GPU test.txt +51 -0
- Link to images.txt +1 -1
- app.py +76 -19
- chatfuncs/chatfuncs.py +266 -345
- requirements.txt +3 -9
- thumbs_up_data.csv +36 -0
Generation speed GPU test.txt
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
With 5 gpu layers, batch size 8
|
2 |
+
|
3 |
+
Num of generated tokens: 113
|
4 |
+
Time for complete generation: 115.42684650421143s
|
5 |
+
Tokens per secound: 0.9789750255013432
|
6 |
+
Time per token: 1021.4765177363843ms
|
7 |
+
|
8 |
+
With 5 gpu layers, batch size 512
|
9 |
+
|
10 |
+
Num of generated tokens: 102
|
11 |
+
Time for complete generation: 40.369266986846924s
|
12 |
+
Tokens per secound: 2.5266745624396285
|
13 |
+
Time per token: 395.77712732202866ms
|
14 |
+
|
15 |
+
With 6 gpu layers -
|
16 |
+
|
17 |
+
Num of generated tokens: 113
|
18 |
+
Time for complete generation: 46.37785983085632s
|
19 |
+
Tokens per secound: 2.4365074285902764
|
20 |
+
Time per token: 410.42353832616215ms
|
21 |
+
|
22 |
+
With 6 gpu layers, batch size 1024 -
|
23 |
+
Five pillars Q:
|
24 |
+
Num of generated tokens: 102
|
25 |
+
Time for complete generation: 41.85241961479187s
|
26 |
+
Tokens per secound: 2.4371350793766346
|
27 |
+
Time per token: 410.31783936070457ms
|
28 |
+
|
29 |
+
With 8 threads
|
30 |
+
Num of generated tokens: 102
|
31 |
+
Time for complete generation: 40.64410996437073s
|
32 |
+
Tokens per secound: 2.5095887224351774
|
33 |
+
Time per token: 398.4716663173601ms
|
34 |
+
|
35 |
+
Vision statement Q:
|
36 |
+
Num of generated tokens: 84
|
37 |
+
Time for complete generation: 35.57932233810425s
|
38 |
+
Tokens per secound: 2.360921863597128
|
39 |
+
Time per token: 423.5633611679077ms
|
40 |
+
|
41 |
+
Commitments Q:
|
42 |
+
Num of generated tokens: 50
|
43 |
+
Time for complete generation: 23.73319172859192s
|
44 |
+
Tokens per secound: 2.106754142965266
|
45 |
+
Time per token: 474.6638345718384ms
|
46 |
+
|
47 |
+
Outcomes Q
|
48 |
+
Num of generated tokens: 167
|
49 |
+
Time for complete generation: 52.302518367767334s
|
50 |
+
Tokens per secound: 3.1929628861412094
|
51 |
+
Time per token: 313.1887327411217ms
|
Link to images.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
Robot emoji: https://
|
2 |
|
3 |
Bing smile emoji: https://www.bing.com/images/create/a-black-and-white-emoji-with-a-simple-smile2c-black/6523d2c320df409581e85bec80ef3ba8?id=KTdVbixG8oRqR9BzF6AblQ%3d%3d&view=detailv2&idpp=genimg&idpclose=1&FORM=SYDBIC
|
4 |
|
|
|
1 |
+
Robot emoji: https://commons.wikimedia.org/wiki/File:Fluent_Emoji_high_contrast_1f916.svg
|
2 |
|
3 |
Bing smile emoji: https://www.bing.com/images/create/a-black-and-white-emoji-with-a-simple-smile2c-black/6523d2c320df409581e85bec80ef3ba8?id=KTdVbixG8oRqR9BzF6AblQ%3d%3d&view=detailv2&idpp=genimg&idpclose=1&FORM=SYDBIC
|
4 |
|
app.py
CHANGED
@@ -5,7 +5,13 @@ import os
|
|
5 |
from typing import TypeVar
|
6 |
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
|
7 |
from langchain.vectorstores import FAISS
|
|
|
|
|
|
|
|
|
8 |
|
|
|
|
|
9 |
|
10 |
#PandasDataFrame: type[pd.core.frame.DataFrame]
|
11 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
@@ -16,7 +22,8 @@ PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
|
16 |
#from chatfuncs.chatfuncs import *
|
17 |
import chatfuncs.ingest as ing
|
18 |
|
19 |
-
|
|
|
20 |
|
21 |
embeddings_name = "thenlper/gte-base"
|
22 |
|
@@ -58,6 +65,55 @@ import chatfuncs.chatfuncs as chatf
|
|
58 |
chatf.embeddings = load_embeddings(embeddings_name)
|
59 |
chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
62 |
|
63 |
print(f"> Total split documents: {len(docs_out)}")
|
@@ -75,14 +131,6 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
|
75 |
|
76 |
# Gradio chat
|
77 |
|
78 |
-
import gradio as gr
|
79 |
-
|
80 |
-
def vote(data: gr.LikeData):
|
81 |
-
if data.liked:
|
82 |
-
print("You upvoted this response: " + data.value)
|
83 |
-
else:
|
84 |
-
print("You downvoted this response: " + data.value)
|
85 |
-
|
86 |
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
87 |
|
88 |
with block:
|
@@ -90,22 +138,26 @@ with block:
|
|
90 |
ingest_metadata = gr.State()
|
91 |
ingest_docs = gr.State()
|
92 |
|
|
|
93 |
embeddings_state = gr.State(globals()["embeddings"])
|
94 |
vectorstore_state = gr.State(globals()["vectorstore"])
|
95 |
|
|
|
|
|
|
|
96 |
chat_history_state = gr.State()
|
97 |
instruction_prompt_out = gr.State()
|
98 |
|
99 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
100 |
|
101 |
-
gr.Markdown("Chat with
|
102 |
|
103 |
current_source = gr.Textbox(label="Current data source that is loaded into the app", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf")
|
104 |
|
105 |
with gr.Tab("Chatbot"):
|
106 |
|
107 |
with gr.Row():
|
108 |
-
chat_height =
|
109 |
chatbot = gr.Chatbot(height=chat_height, avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False)
|
110 |
sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
|
111 |
|
@@ -143,12 +195,17 @@ with block:
|
|
143 |
|
144 |
ingest_embed_out = gr.Textbox(label="File/webpage preparation progress")
|
145 |
|
|
|
|
|
|
|
146 |
gr.HTML(
|
147 |
-
"<center>
|
148 |
)
|
149 |
|
150 |
examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
|
151 |
|
|
|
|
|
152 |
# Load in a pdf
|
153 |
load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
|
154 |
then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
|
@@ -164,25 +221,25 @@ with block:
|
|
164 |
# Load in a webpage
|
165 |
|
166 |
# Click/enter to send message action
|
167 |
-
response_click = submit.click(chatf.
|
168 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
169 |
-
then(chatf.
|
170 |
response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
171 |
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
172 |
-
then(lambda:
|
173 |
|
174 |
-
response_enter = message.submit(chatf.
|
175 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
176 |
-
then(chatf.
|
177 |
response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
178 |
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
179 |
-
then(lambda:
|
180 |
|
181 |
# Clear box
|
182 |
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
183 |
clear.click(lambda: None, None, chatbot, queue=False)
|
184 |
|
185 |
-
chatbot.like(vote,
|
186 |
|
187 |
block.queue(concurrency_count=1).launch(debug=True)
|
188 |
# -
|
|
|
5 |
from typing import TypeVar
|
6 |
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
|
7 |
from langchain.vectorstores import FAISS
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
from transformers import AutoTokenizer#, pipeline, TextIteratorStreamer
|
11 |
+
from dataclasses import asdict, dataclass
|
12 |
|
13 |
+
# Alternative model sources
|
14 |
+
from ctransformers import AutoModelForCausalLM#, AutoTokenizer
|
15 |
|
16 |
#PandasDataFrame: type[pd.core.frame.DataFrame]
|
17 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
|
|
22 |
#from chatfuncs.chatfuncs import *
|
23 |
import chatfuncs.ingest as ing
|
24 |
|
25 |
+
|
26 |
+
## Load preset embeddings, vectorstore, and model
|
27 |
|
28 |
embeddings_name = "thenlper/gte-base"
|
29 |
|
|
|
65 |
chatf.embeddings = load_embeddings(embeddings_name)
|
66 |
chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
|
67 |
|
68 |
+
model_type = "Flan Alpaca"
|
69 |
+
|
70 |
+
|
71 |
+
def load_model(model_type, CtransInitConfig_gpu=chatf.CtransInitConfig_gpu, CtransInitConfig_cpu=chatf.CtransInitConfig_cpu, torch_device=chatf.torch_device):
|
72 |
+
print("Loading model")
|
73 |
+
if model_type == "Orca Mini":
|
74 |
+
try:
|
75 |
+
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(CtransInitConfig_gpu()))
|
76 |
+
except:
|
77 |
+
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(CtransInitConfig_cpu()))
|
78 |
+
|
79 |
+
tokenizer = []
|
80 |
+
|
81 |
+
if model_type == "Flan Alpaca":
|
82 |
+
# Huggingface chat model
|
83 |
+
hf_checkpoint = 'declare-lab/flan-alpaca-large'
|
84 |
+
|
85 |
+
def create_hf_model(model_name):
|
86 |
+
|
87 |
+
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
88 |
+
|
89 |
+
# model_id = model_name
|
90 |
+
|
91 |
+
if torch_device == "cuda":
|
92 |
+
if "flan" in model_name:
|
93 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
94 |
+
else:
|
95 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
96 |
+
else:
|
97 |
+
if "flan" in model_name:
|
98 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
99 |
+
else:
|
100 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
101 |
+
|
102 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
|
103 |
+
|
104 |
+
return model, tokenizer, model_type
|
105 |
+
|
106 |
+
model, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
|
107 |
+
|
108 |
+
chatf.model = model
|
109 |
+
chatf.tokenizer = tokenizer
|
110 |
+
chatf.model_type = model_type
|
111 |
+
|
112 |
+
print("Finished loading model: ", model_type)
|
113 |
+
return model_type
|
114 |
+
|
115 |
+
load_model(model_type, chatf.CtransInitConfig_gpu, chatf.CtransInitConfig_cpu, chatf.torch_device)
|
116 |
+
|
117 |
def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
118 |
|
119 |
print(f"> Total split documents: {len(docs_out)}")
|
|
|
131 |
|
132 |
# Gradio chat
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
135 |
|
136 |
with block:
|
|
|
138 |
ingest_metadata = gr.State()
|
139 |
ingest_docs = gr.State()
|
140 |
|
141 |
+
model_type_state = gr.State(model_type)
|
142 |
embeddings_state = gr.State(globals()["embeddings"])
|
143 |
vectorstore_state = gr.State(globals()["vectorstore"])
|
144 |
|
145 |
+
model_state = gr.State() # chatf.model (gives error)
|
146 |
+
tokenizer_state = gr.State() # chatf.tokenizer (gives error)
|
147 |
+
|
148 |
chat_history_state = gr.State()
|
149 |
instruction_prompt_out = gr.State()
|
150 |
|
151 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
152 |
|
153 |
+
gr.Markdown("Chat with PDF or web page documents. The default is a small model (Flan Alpaca), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Orca Mini), can reason a little better, but is much slower (See advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Likes and dislike responses will be saved to disk to improve the model. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
|
154 |
|
155 |
current_source = gr.Textbox(label="Current data source that is loaded into the app", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf")
|
156 |
|
157 |
with gr.Tab("Chatbot"):
|
158 |
|
159 |
with gr.Row():
|
160 |
+
chat_height = 500
|
161 |
chatbot = gr.Chatbot(height=chat_height, avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False)
|
162 |
sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
|
163 |
|
|
|
195 |
|
196 |
ingest_embed_out = gr.Textbox(label="File/webpage preparation progress")
|
197 |
|
198 |
+
with gr.Tab("Advanced features"):
|
199 |
+
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
200 |
+
|
201 |
gr.HTML(
|
202 |
+
"<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
|
203 |
)
|
204 |
|
205 |
examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
|
206 |
|
207 |
+
model_choice.change(fn=load_model, inputs=[model_choice], outputs = [model_type_state])
|
208 |
+
|
209 |
# Load in a pdf
|
210 |
load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
|
211 |
then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
|
|
|
221 |
# Load in a webpage
|
222 |
|
223 |
# Click/enter to send message action
|
224 |
+
response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False, api_name="retrieval").\
|
225 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
226 |
+
then(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state], outputs=chatbot)
|
227 |
response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
228 |
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
229 |
+
then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
|
230 |
|
231 |
+
response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
|
232 |
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
233 |
+
then(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state], chatbot)
|
234 |
response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
235 |
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
236 |
+
then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
|
237 |
|
238 |
# Clear box
|
239 |
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
240 |
clear.click(lambda: None, None, chatbot, queue=False)
|
241 |
|
242 |
+
chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], None)
|
243 |
|
244 |
block.queue(concurrency_count=1).launch(debug=True)
|
245 |
# -
|
chatfuncs/chatfuncs.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
import re
|
2 |
import datetime
|
3 |
from typing import TypeVar, Dict, List, Tuple
|
|
|
4 |
from itertools import compress
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
|
8 |
# Model packages
|
9 |
import torch
|
10 |
-
torch.cuda.empty_cache()
|
11 |
from threading import Thread
|
12 |
from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
|
13 |
|
@@ -16,7 +16,6 @@ from ctransformers import AutoModelForCausalLM#, AutoTokenizer
|
|
16 |
from dataclasses import asdict, dataclass
|
17 |
|
18 |
# Langchain functions
|
19 |
-
from langchain import PromptTemplate
|
20 |
from langchain.prompts import PromptTemplate
|
21 |
from langchain.vectorstores import FAISS
|
22 |
from langchain.retrievers import SVMRetriever
|
@@ -41,26 +40,46 @@ from gensim.similarities import SparseMatrixSimilarity
|
|
41 |
|
42 |
import gradio as gr
|
43 |
|
44 |
-
|
45 |
-
torch_device = "cuda"
|
46 |
-
gpu_layers = 5
|
47 |
-
else: torch_device = "cpu"
|
48 |
-
|
49 |
-
print("Running on device:", torch_device)
|
50 |
-
threads = 8#torch.get_num_threads()
|
51 |
-
print("CPU threads:", threads)
|
52 |
|
53 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
54 |
|
55 |
embeddings = None # global variable setup
|
56 |
vectorstore = None # global variable setup
|
|
|
57 |
|
58 |
max_memory_length = 0 # How long should the memory of the conversation last?
|
59 |
|
60 |
full_text = "" # Define dummy source text (full text) just to enable highlight function to load
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
|
|
|
|
|
|
|
|
|
|
64 |
temperature: float = 0.1
|
65 |
top_k: int = 3
|
66 |
top_p: float = 1
|
@@ -68,24 +87,26 @@ repetition_penalty: float = 1.05
|
|
68 |
flan_alpaca_repetition_penalty: float = 1.3
|
69 |
last_n_tokens: int = 64
|
70 |
max_new_tokens: int = 125
|
71 |
-
|
72 |
reset: bool = False
|
73 |
stream: bool = True
|
74 |
threads: int = threads
|
75 |
-
batch_size:int =
|
76 |
context_length:int = 4096
|
77 |
-
gpu_layers:int = 0#5#gpu_layers For serving on Huggingface set to 0 as using free CPU instance
|
78 |
sample = True
|
79 |
|
|
|
|
|
|
|
80 |
@dataclass
|
81 |
-
class
|
82 |
temperature: float = temperature
|
83 |
top_k: int = top_k
|
84 |
top_p: float = top_p
|
85 |
repetition_penalty: float = repetition_penalty
|
86 |
last_n_tokens: int = last_n_tokens
|
87 |
max_new_tokens: int = max_new_tokens
|
88 |
-
|
89 |
reset: bool = reset
|
90 |
stream: bool = stream
|
91 |
threads: int = threads
|
@@ -94,60 +115,33 @@ class GenerationConfig:
|
|
94 |
gpu_layers:int = gpu_layers
|
95 |
#stop: list[str] = field(default_factory=lambda: [stop_string])
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
## Set model type ##
|
111 |
-
model_type = "ctrans"
|
112 |
-
|
113 |
-
## Chat models ##
|
114 |
-
|
115 |
-
if model_type == "ctrans":
|
116 |
-
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
117 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
118 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
119 |
-
|
120 |
-
if model_type == "hf":
|
121 |
-
# Huggingface chat model
|
122 |
-
#hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
|
123 |
-
hf_checkpoint = 'declare-lab/flan-alpaca-large'
|
124 |
-
|
125 |
-
def create_hf_model(model_name):
|
126 |
-
|
127 |
-
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
128 |
-
|
129 |
-
# model_id = model_name
|
130 |
-
|
131 |
-
if torch_device == "cuda":
|
132 |
-
if "flan" in model_name:
|
133 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
134 |
-
elif "mpt" in model_name:
|
135 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto", trust_remote_code=True)
|
136 |
-
else:
|
137 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
138 |
-
else:
|
139 |
-
if "flan" in model_name:
|
140 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
141 |
-
elif "mpt" in model_name:
|
142 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
143 |
-
else:
|
144 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
145 |
-
|
146 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
|
147 |
-
|
148 |
-
return model, tokenizer, torch_device
|
149 |
-
|
150 |
-
model, tokenizer, torch_device = create_hf_model(model_name = hf_checkpoint)
|
151 |
|
152 |
# Vectorstore funcs
|
153 |
|
@@ -179,9 +173,9 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
|
179 |
|
180 |
return out_message
|
181 |
|
182 |
-
#
|
183 |
|
184 |
-
def
|
185 |
|
186 |
#EXAMPLE_PROMPT = PromptTemplate(
|
187 |
# template="\nCONTENT:\n\n{page_content}\n\nSOURCE: {source}\n\n",
|
@@ -193,7 +187,6 @@ def create_prompt_templates():
|
|
193 |
input_variables=["page_content"]
|
194 |
)
|
195 |
|
196 |
-
|
197 |
# The main prompt:
|
198 |
|
199 |
instruction_prompt_template_alpaca_quote = """### Instruction:
|
@@ -205,31 +198,168 @@ def create_prompt_templates():
|
|
205 |
|
206 |
Response:"""
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
instruction_prompt_template_orca = """
|
209 |
### System:
|
210 |
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
211 |
### User:
|
212 |
-
Answer the QUESTION using information from the following CONTENT.
|
213 |
CONTENT: {summaries}
|
214 |
QUESTION: {question}
|
215 |
|
216 |
### Response:"""
|
217 |
|
218 |
-
|
219 |
instruction_prompt_mistral_orca = """<|im_start|>system\n
|
220 |
-
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
221 |
-
<|im_start|>user\n
|
222 |
-
Answer the QUESTION using information from the following CONTENT.
|
223 |
-
CONTENT: {summaries}
|
224 |
-
QUESTION: {question}\n
|
225 |
-
<|im_end|>"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
|
|
|
|
|
|
227 |
|
|
|
228 |
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
def adapt_q_from_chat_history(question, chat_history, extracted_memory, keyword_model=""):#keyword_model): # new_question_keywords,
|
235 |
|
@@ -485,12 +615,6 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
485 |
|
486 |
# Step 1: Filter vstore_docs
|
487 |
vstore_docs = get_docs_from_vstore(vectorstore)
|
488 |
-
print("Inside get_expanded_passages")
|
489 |
-
print("Docs:", docs)
|
490 |
-
print("Type of Docs:", type(docs))
|
491 |
-
print("Type of first element in Docs:", type(docs[0]))
|
492 |
-
print("Length of first tuple in Docs:", len(docs[0]))
|
493 |
-
|
494 |
doc_sources = {doc.metadata['source'] for doc, _ in docs}
|
495 |
vstore_docs = [(k, v) for k, v in vstore_docs if v.metadata.get('source') in doc_sources]
|
496 |
|
@@ -516,162 +640,6 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
516 |
|
517 |
return expanded_docs, doc_df
|
518 |
|
519 |
-
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
520 |
-
|
521 |
-
question = inputs["question"]
|
522 |
-
chat_history = inputs["chat_history"]
|
523 |
-
|
524 |
-
|
525 |
-
new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
|
526 |
-
|
527 |
-
|
528 |
-
#print("The question passed to the vector search is:")
|
529 |
-
#print(new_question_kworded)
|
530 |
-
|
531 |
-
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 5, out_passages = 2,
|
532 |
-
vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
|
533 |
-
#vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
|
534 |
-
|
535 |
-
# Expand the found passages to the neighbouring context
|
536 |
-
docs_keep_as_doc, doc_df = get_expanded_passages(vectorstore, docs_keep_out, width=1)
|
537 |
-
|
538 |
-
if docs_keep_as_doc == []:
|
539 |
-
{"answer": "I'm sorry, I couldn't find a relevant answer to this question.", "sources":"I'm sorry, I couldn't find a relevant source for this question."}
|
540 |
-
|
541 |
-
#new_inputs = inputs.copy()
|
542 |
-
#new_inputs["question"] = new_question
|
543 |
-
#new_inputs["chat_history"] = chat_history_str
|
544 |
-
|
545 |
-
#print(docs_url)
|
546 |
-
#print(doc_df['metadata'])
|
547 |
-
|
548 |
-
# Build up sources content to add to user display
|
549 |
-
|
550 |
-
doc_df['meta_clean'] = [f"<b>{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}</b>" for d in doc_df['metadata']]
|
551 |
-
doc_df['content_meta'] = doc_df['meta_clean'].astype(str) + ".<br><br>" + doc_df['page_content'].astype(str)
|
552 |
-
|
553 |
-
modified_page_content = [f" SOURCE {i+1} - {word}" for i, word in enumerate(doc_df['page_content'])]
|
554 |
-
docs_content_string = ''.join(modified_page_content)
|
555 |
-
|
556 |
-
#docs_content_string = '<br><br>\n\n SOURCE '.join(doc_df['page_content'])#.replace(" "," ")#.strip()
|
557 |
-
sources_docs_content_string = '<br><br>'.join(doc_df['content_meta'])#.replace(" "," ")#.strip()
|
558 |
-
#sources_docs_content_tup = [(sources_docs_content,None)]
|
559 |
-
#print("The draft instruction prompt is:")
|
560 |
-
#print(instruction_prompt)
|
561 |
-
|
562 |
-
instruction_prompt_out = instruction_prompt.format(question=new_question_kworded, summaries=docs_content_string)
|
563 |
-
#print("The final instruction prompt:")
|
564 |
-
#print(instruction_prompt_out)
|
565 |
-
|
566 |
-
print('Final prompt is: ')
|
567 |
-
print(instruction_prompt_out)
|
568 |
-
|
569 |
-
return instruction_prompt_out, sources_docs_content_string, new_question_kworded
|
570 |
-
|
571 |
-
def get_history_sources_final_input_prompt(user_input, history, extracted_memory, vectorstore, embeddings):#):
|
572 |
-
|
573 |
-
#if chain_agent is None:
|
574 |
-
# history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
|
575 |
-
# return history, history, "", ""
|
576 |
-
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
577 |
-
print("User input: " + user_input)
|
578 |
-
|
579 |
-
history = history or []
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
# Create instruction prompt
|
584 |
-
instruction_prompt, content_prompt = create_prompt_templates()
|
585 |
-
instruction_prompt_out, docs_content_string, new_question_kworded =\
|
586 |
-
create_final_prompt({"question": user_input, "chat_history": history}, #vectorstore,
|
587 |
-
instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings)
|
588 |
-
|
589 |
-
|
590 |
-
history.append(user_input)
|
591 |
-
|
592 |
-
print("Output history is:")
|
593 |
-
print(history)
|
594 |
-
|
595 |
-
#print("The output prompt is:")
|
596 |
-
#print(instruction_prompt_out)
|
597 |
-
|
598 |
-
return history, docs_content_string, instruction_prompt_out
|
599 |
-
|
600 |
-
def highlight_found_text_single(search_text:str, full_text:str, hlt_chunk_size:int=hlt_chunk_size, hlt_strat:List=hlt_strat, hlt_overlap:int=hlt_overlap) -> str:
|
601 |
-
"""
|
602 |
-
Highlights occurrences of search_text within full_text.
|
603 |
-
|
604 |
-
Parameters:
|
605 |
-
- search_text (str): The text to be searched for within full_text.
|
606 |
-
- full_text (str): The text within which search_text occurrences will be highlighted.
|
607 |
-
|
608 |
-
Returns:
|
609 |
-
- str: A string with occurrences of search_text highlighted.
|
610 |
-
|
611 |
-
Example:
|
612 |
-
>>> highlight_found_text("world", "Hello, world! This is a test. Another world awaits.")
|
613 |
-
'Hello, <mark style="color:black;">world</mark>! This is a test. Another world awaits.'
|
614 |
-
"""
|
615 |
-
|
616 |
-
def extract_text_from_input(text,i=0):
|
617 |
-
if isinstance(text, str):
|
618 |
-
return text.replace(" ", " ").strip()#.replace("\r", " ").replace("\n", " ")
|
619 |
-
elif isinstance(text, list):
|
620 |
-
return text[i][0].replace(" ", " ").strip()#.replace("\r", " ").replace("\n", " ")
|
621 |
-
else:
|
622 |
-
return ""
|
623 |
-
|
624 |
-
def extract_search_text_from_input(text):
|
625 |
-
if isinstance(text, str):
|
626 |
-
return text.replace(" ", " ").strip()#.replace("\r", " ").replace("\n", " ").replace(" ", " ").strip()
|
627 |
-
elif isinstance(text, list):
|
628 |
-
return text[-1][1].replace(" ", " ").strip()#.replace("\r", " ").replace("\n", " ").replace(" ", " ").strip()
|
629 |
-
else:
|
630 |
-
return ""
|
631 |
-
|
632 |
-
full_text = extract_text_from_input(full_text)
|
633 |
-
search_text = extract_search_text_from_input(search_text)
|
634 |
-
|
635 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
636 |
-
chunk_size=hlt_chunk_size,
|
637 |
-
separators=hlt_strat,
|
638 |
-
chunk_overlap=hlt_overlap,
|
639 |
-
)
|
640 |
-
sections = text_splitter.split_text(search_text)
|
641 |
-
|
642 |
-
#print(sections)
|
643 |
-
|
644 |
-
found_positions = {}
|
645 |
-
for x in sections:
|
646 |
-
text_start_pos = full_text.find(x)
|
647 |
-
|
648 |
-
if text_start_pos != -1:
|
649 |
-
found_positions[text_start_pos] = text_start_pos + len(x)
|
650 |
-
|
651 |
-
# Combine overlapping or adjacent positions
|
652 |
-
sorted_starts = sorted(found_positions.keys())
|
653 |
-
combined_positions = []
|
654 |
-
if sorted_starts:
|
655 |
-
current_start, current_end = sorted_starts[0], found_positions[sorted_starts[0]]
|
656 |
-
for start in sorted_starts[1:]:
|
657 |
-
if start <= (current_end + 1):
|
658 |
-
current_end = max(current_end, found_positions[start])
|
659 |
-
else:
|
660 |
-
combined_positions.append((current_start, current_end))
|
661 |
-
current_start, current_end = start, found_positions[start]
|
662 |
-
combined_positions.append((current_start, current_end))
|
663 |
-
|
664 |
-
# Construct pos_tokens
|
665 |
-
pos_tokens = []
|
666 |
-
prev_end = 0
|
667 |
-
for start, end in combined_positions:
|
668 |
-
pos_tokens.append(full_text[prev_end:start]) # ((full_text[prev_end:start], None))
|
669 |
-
pos_tokens.append('<mark style="color:black;">' + full_text[start:end] + '</mark>')# ("<mark>" + full_text[start:end] + "</mark>",'found')
|
670 |
-
prev_end = end
|
671 |
-
pos_tokens.append(full_text[prev_end:])
|
672 |
-
|
673 |
-
return "".join(pos_tokens)
|
674 |
-
|
675 |
def highlight_found_text(search_text: str, full_text: str, hlt_chunk_size:int=hlt_chunk_size, hlt_strat:List=hlt_strat, hlt_overlap:int=hlt_overlap) -> str:
|
676 |
"""
|
677 |
Highlights occurrences of search_text within full_text.
|
@@ -742,110 +710,14 @@ def highlight_found_text(search_text: str, full_text: str, hlt_chunk_size:int=hl
|
|
742 |
pos_tokens = []
|
743 |
prev_end = 0
|
744 |
for start, end in combined_positions:
|
745 |
-
|
746 |
-
|
747 |
-
|
|
|
748 |
pos_tokens.append(full_text[prev_end:])
|
749 |
|
750 |
return "".join(pos_tokens)
|
751 |
|
752 |
-
# # Chat functions
|
753 |
-
def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
754 |
-
|
755 |
-
#print("The question is: ")
|
756 |
-
#print(full_prompt)
|
757 |
-
|
758 |
-
# Get the model and tokenizer, and tokenize the user text.
|
759 |
-
model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device) # return_attention_mask=False was added
|
760 |
-
|
761 |
-
# Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
|
762 |
-
# in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
|
763 |
-
streamer = TextIteratorStreamer(tokenizer, timeout=120., skip_prompt=True, skip_special_tokens=True)
|
764 |
-
generate_kwargs = dict(
|
765 |
-
model_inputs,
|
766 |
-
streamer=streamer,
|
767 |
-
max_new_tokens=max_new_tokens,
|
768 |
-
do_sample=sample,
|
769 |
-
repetition_penalty=flan_alpaca_repetition_penalty,
|
770 |
-
top_p=top_p,
|
771 |
-
temperature=temperature,
|
772 |
-
top_k=top_k
|
773 |
-
)
|
774 |
-
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
775 |
-
t.start()
|
776 |
-
|
777 |
-
# Pull the generated text from the streamer, and update the model output.
|
778 |
-
import time
|
779 |
-
start = time.time()
|
780 |
-
NUM_TOKENS=0
|
781 |
-
print('-'*4+'Start Generation'+'-'*4)
|
782 |
-
|
783 |
-
history[-1][1] = ""
|
784 |
-
for new_text in streamer:
|
785 |
-
if new_text == None: new_text = ""
|
786 |
-
history[-1][1] += new_text
|
787 |
-
NUM_TOKENS+=1
|
788 |
-
yield history
|
789 |
-
|
790 |
-
time_generate = time.time() - start
|
791 |
-
print('\n')
|
792 |
-
print('-'*4+'End Generation'+'-'*4)
|
793 |
-
print(f'Num of generated tokens: {NUM_TOKENS}')
|
794 |
-
print(f'Time for complete generation: {time_generate}s')
|
795 |
-
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
796 |
-
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
797 |
-
|
798 |
-
def produce_streaming_answer_chatbot_ctrans(history, full_prompt):
|
799 |
-
|
800 |
-
print("The question is: ")
|
801 |
-
print(full_prompt)
|
802 |
-
|
803 |
-
tokens = ctrans_llm.tokenize(full_prompt)
|
804 |
-
|
805 |
-
#config = GenerationConfig(reset=True)
|
806 |
-
|
807 |
-
# Pull the generated text from the streamer, and update the model output.
|
808 |
-
import time
|
809 |
-
start = time.time()
|
810 |
-
NUM_TOKENS=0
|
811 |
-
print('-'*4+'Start Generation'+'-'*4)
|
812 |
-
|
813 |
-
history[-1][1] = ""
|
814 |
-
for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
|
815 |
-
if new_text == None: new_text = ""
|
816 |
-
history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
|
817 |
-
NUM_TOKENS+=1
|
818 |
-
yield history
|
819 |
-
|
820 |
-
time_generate = time.time() - start
|
821 |
-
print('\n')
|
822 |
-
print('-'*4+'End Generation'+'-'*4)
|
823 |
-
print(f'Num of generated tokens: {NUM_TOKENS}')
|
824 |
-
print(f'Time for complete generation: {time_generate}s')
|
825 |
-
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
826 |
-
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
827 |
-
|
828 |
-
|
829 |
-
def ctrans_generate(
|
830 |
-
prompt: str,
|
831 |
-
llm=ctrans_llm,
|
832 |
-
config: GenerationConfig = GenerationConfig(),
|
833 |
-
):
|
834 |
-
"""Run model inference, will return a Generator if streaming is true."""
|
835 |
-
|
836 |
-
return llm(
|
837 |
-
prompt,
|
838 |
-
**asdict(config),
|
839 |
-
)
|
840 |
-
|
841 |
-
def turn_off_interactivity(user_message, history):
|
842 |
-
return gr.update(value="", interactive=False), history + [[user_message, None]]
|
843 |
-
|
844 |
-
def update_message(dropdown_value):
|
845 |
-
return gr.Textbox.update(value=dropdown_value)
|
846 |
-
|
847 |
-
def hide_block():
|
848 |
-
return gr.Radio.update(visible=False)
|
849 |
|
850 |
# # Chat history functions
|
851 |
|
@@ -923,6 +795,8 @@ def add_inputs_answer_to_history(user_message, history, current_topic):
|
|
923 |
|
924 |
return history, extracted_memory
|
925 |
|
|
|
|
|
926 |
def remove_q_stopwords(question): # Remove stopwords from question. Not used at the moment
|
927 |
# Prepare keywords from question by removing stopwords
|
928 |
text = question.lower()
|
@@ -1003,4 +877,51 @@ def keybert_keywords(text, n, kw_model):
|
|
1003 |
keywords_list = [item[0] for item in keywords_text]
|
1004 |
|
1005 |
return keywords_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1006 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import re
|
2 |
import datetime
|
3 |
from typing import TypeVar, Dict, List, Tuple
|
4 |
+
import time
|
5 |
from itertools import compress
|
6 |
import pandas as pd
|
7 |
import numpy as np
|
8 |
|
9 |
# Model packages
|
10 |
import torch
|
|
|
11 |
from threading import Thread
|
12 |
from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
|
13 |
|
|
|
16 |
from dataclasses import asdict, dataclass
|
17 |
|
18 |
# Langchain functions
|
|
|
19 |
from langchain.prompts import PromptTemplate
|
20 |
from langchain.vectorstores import FAISS
|
21 |
from langchain.retrievers import SVMRetriever
|
|
|
40 |
|
41 |
import gradio as gr
|
42 |
|
43 |
+
torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
PandasDataFrame = TypeVar('pd.core.frame.DataFrame')
|
46 |
|
47 |
embeddings = None # global variable setup
|
48 |
vectorstore = None # global variable setup
|
49 |
+
model_type = None # global variable setup
|
50 |
|
51 |
max_memory_length = 0 # How long should the memory of the conversation last?
|
52 |
|
53 |
full_text = "" # Define dummy source text (full text) just to enable highlight function to load
|
54 |
|
55 |
+
model = [] # Define empty list for model functions to run
|
56 |
+
tokenizer = [] # Define empty list for model functions to run
|
57 |
+
|
58 |
+
## Highlight text constants
|
59 |
+
hlt_chunk_size = 15
|
60 |
+
hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
|
61 |
+
hlt_overlap = 4
|
62 |
+
|
63 |
+
## Initialise NER model ##
|
64 |
+
ner_model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base-multinerd")
|
65 |
+
|
66 |
+
## Initialise keyword model ##
|
67 |
+
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
68 |
+
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
69 |
+
|
70 |
+
|
71 |
+
if torch.cuda.is_available():
|
72 |
+
torch_device = "cuda"
|
73 |
+
gpu_layers = 5
|
74 |
+
else:
|
75 |
+
torch_device = "cpu"
|
76 |
+
gpu_layers = 0
|
77 |
|
78 |
+
print("Running on device:", torch_device)
|
79 |
+
threads = 8 #torch.get_num_threads()
|
80 |
+
print("CPU threads:", threads)
|
81 |
+
|
82 |
+
# Flan Alpaca Model parameters
|
83 |
temperature: float = 0.1
|
84 |
top_k: int = 3
|
85 |
top_p: float = 1
|
|
|
87 |
flan_alpaca_repetition_penalty: float = 1.3
|
88 |
last_n_tokens: int = 64
|
89 |
max_new_tokens: int = 125
|
90 |
+
seed: int = 42
|
91 |
reset: bool = False
|
92 |
stream: bool = True
|
93 |
threads: int = threads
|
94 |
+
batch_size:int = 1024
|
95 |
context_length:int = 4096
|
|
|
96 |
sample = True
|
97 |
|
98 |
+
# CtransGen model parameters
|
99 |
+
gpu_layers:int = 6 #gpu_layers For serving on Huggingface set to 0 as using free CPU instance
|
100 |
+
|
101 |
@dataclass
|
102 |
+
class CtransInitConfig_gpu:
|
103 |
temperature: float = temperature
|
104 |
top_k: int = top_k
|
105 |
top_p: float = top_p
|
106 |
repetition_penalty: float = repetition_penalty
|
107 |
last_n_tokens: int = last_n_tokens
|
108 |
max_new_tokens: int = max_new_tokens
|
109 |
+
seed: int = seed
|
110 |
reset: bool = reset
|
111 |
stream: bool = stream
|
112 |
threads: int = threads
|
|
|
115 |
gpu_layers:int = gpu_layers
|
116 |
#stop: list[str] = field(default_factory=lambda: [stop_string])
|
117 |
|
118 |
+
class CtransInitConfig_cpu:
|
119 |
+
temperature: float = temperature
|
120 |
+
top_k: int = top_k
|
121 |
+
top_p: float = top_p
|
122 |
+
repetition_penalty: float = repetition_penalty
|
123 |
+
last_n_tokens: int = last_n_tokens
|
124 |
+
max_new_tokens: int = max_new_tokens
|
125 |
+
seed: int = seed
|
126 |
+
reset: bool = reset
|
127 |
+
stream: bool = stream
|
128 |
+
threads: int = threads
|
129 |
+
batch_size:int = batch_size
|
130 |
+
context_length:int = context_length
|
131 |
+
gpu_layers:int = 0
|
132 |
+
#stop: list[str] = field(default_factory=lambda: [stop_string])
|
133 |
|
134 |
+
@dataclass
|
135 |
+
class CtransGenGenerationConfig:
|
136 |
+
top_k: int = top_k
|
137 |
+
top_p: float = top_p
|
138 |
+
temperature: float = temperature
|
139 |
+
repetition_penalty: float = repetition_penalty
|
140 |
+
last_n_tokens: int = last_n_tokens
|
141 |
+
seed: int = seed
|
142 |
+
batch_size:int = batch_size
|
143 |
+
threads: int = threads
|
144 |
+
reset: bool = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
# Vectorstore funcs
|
147 |
|
|
|
173 |
|
174 |
return out_message
|
175 |
|
176 |
+
# Prompt functions
|
177 |
|
178 |
+
def base_prompt_templates(model_type = "Flan Alpaca"):
|
179 |
|
180 |
#EXAMPLE_PROMPT = PromptTemplate(
|
181 |
# template="\nCONTENT:\n\n{page_content}\n\nSOURCE: {source}\n\n",
|
|
|
187 |
input_variables=["page_content"]
|
188 |
)
|
189 |
|
|
|
190 |
# The main prompt:
|
191 |
|
192 |
instruction_prompt_template_alpaca_quote = """### Instruction:
|
|
|
198 |
|
199 |
Response:"""
|
200 |
|
201 |
+
instruction_prompt_template_alpaca = """### Instruction:
|
202 |
+
### User:
|
203 |
+
Answer the QUESTION using information from the following CONTENT.
|
204 |
+
CONTENT: {summaries}
|
205 |
+
QUESTION: {question}
|
206 |
+
|
207 |
+
Response:"""
|
208 |
+
|
209 |
instruction_prompt_template_orca = """
|
210 |
### System:
|
211 |
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
212 |
### User:
|
213 |
+
Answer the QUESTION with a short response using information from the following CONTENT.
|
214 |
CONTENT: {summaries}
|
215 |
QUESTION: {question}
|
216 |
|
217 |
### Response:"""
|
218 |
|
|
|
219 |
instruction_prompt_mistral_orca = """<|im_start|>system\n
|
220 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
221 |
+
<|im_start|>user\n
|
222 |
+
Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
|
223 |
+
CONTENT: {summaries}
|
224 |
+
QUESTION: {question}\n
|
225 |
+
<|im_end|>"""
|
226 |
+
|
227 |
+
if model_type == "Flan Alpaca":
|
228 |
+
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_alpaca, input_variables=['question', 'summaries'])
|
229 |
+
elif model_type == "Orca Mini":
|
230 |
+
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
|
231 |
+
|
232 |
+
return INSTRUCTION_PROMPT, CONTENT_PROMPT
|
233 |
+
|
234 |
+
def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
235 |
+
|
236 |
+
question = inputs["question"]
|
237 |
+
chat_history = inputs["chat_history"]
|
238 |
+
|
239 |
+
|
240 |
+
new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
|
241 |
+
|
242 |
+
|
243 |
+
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 5, out_passages = 2,
|
244 |
+
vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
|
245 |
+
#vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
|
246 |
+
|
247 |
+
# Expand the found passages to the neighbouring context
|
248 |
+
docs_keep_as_doc, doc_df = get_expanded_passages(vectorstore, docs_keep_out, width=1)
|
249 |
|
250 |
+
if docs_keep_as_doc == []:
|
251 |
+
{"answer": "I'm sorry, I couldn't find a relevant answer to this question.", "sources":"I'm sorry, I couldn't find a relevant source for this question."}
|
252 |
+
|
253 |
|
254 |
+
# Build up sources content to add to user display
|
255 |
|
256 |
+
doc_df['meta_clean'] = [f"<b>{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}</b>" for d in doc_df['metadata']]
|
257 |
+
doc_df['content_meta'] = doc_df['meta_clean'].astype(str) + ".<br><br>" + doc_df['page_content'].astype(str)
|
258 |
+
|
259 |
+
modified_page_content = [f" SOURCE {i+1} - {word}" for i, word in enumerate(doc_df['page_content'])]
|
260 |
+
docs_content_string = ''.join(modified_page_content)
|
261 |
+
|
262 |
+
sources_docs_content_string = '<br><br>'.join(doc_df['content_meta'])#.replace(" "," ")#.strip()
|
263 |
+
|
264 |
+
instruction_prompt_out = instruction_prompt.format(question=new_question_kworded, summaries=docs_content_string)
|
265 |
+
|
266 |
+
print('Final prompt is: ')
|
267 |
+
print(instruction_prompt_out)
|
268 |
+
|
269 |
+
return instruction_prompt_out, sources_docs_content_string, new_question_kworded
|
270 |
+
|
271 |
+
def create_full_prompt(user_input, history, extracted_memory, vectorstore, embeddings, model_type):
|
272 |
|
273 |
+
#if chain_agent is None:
|
274 |
+
# history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
|
275 |
+
# return history, history, "", ""
|
276 |
+
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
277 |
+
print("User input: " + user_input)
|
278 |
+
|
279 |
+
history = history or []
|
280 |
+
|
281 |
+
# Create instruction prompt
|
282 |
+
instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
|
283 |
+
instruction_prompt_out, docs_content_string, new_question_kworded =\
|
284 |
+
generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
|
285 |
+
instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings)
|
286 |
+
|
287 |
+
|
288 |
+
history.append(user_input)
|
289 |
+
|
290 |
+
print("Output history is:")
|
291 |
+
print(history)
|
292 |
+
|
293 |
+
return history, docs_content_string, instruction_prompt_out
|
294 |
+
|
295 |
+
# Chat functions
|
296 |
+
def produce_streaming_answer_chatbot(history, full_prompt, model_type):
|
297 |
+
#print("Model type is: ", model_type)
|
298 |
+
|
299 |
+
if model_type == "Flan Alpaca":
|
300 |
+
# Get the model and tokenizer, and tokenize the user text.
|
301 |
+
model_inputs = tokenizer(text=full_prompt, return_tensors="pt", return_attention_mask=False).to(torch_device) # return_attention_mask=False was added
|
302 |
+
|
303 |
+
# Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
|
304 |
+
# in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
|
305 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=120., skip_prompt=True, skip_special_tokens=True)
|
306 |
+
generate_kwargs = dict(
|
307 |
+
model_inputs,
|
308 |
+
streamer=streamer,
|
309 |
+
max_new_tokens=max_new_tokens,
|
310 |
+
do_sample=sample,
|
311 |
+
repetition_penalty=flan_alpaca_repetition_penalty,
|
312 |
+
top_p=top_p,
|
313 |
+
temperature=temperature,
|
314 |
+
top_k=top_k
|
315 |
+
)
|
316 |
+
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
317 |
+
t.start()
|
318 |
+
|
319 |
+
# Pull the generated text from the streamer, and update the model output.
|
320 |
+
start = time.time()
|
321 |
+
NUM_TOKENS=0
|
322 |
+
print('-'*4+'Start Generation'+'-'*4)
|
323 |
+
|
324 |
+
history[-1][1] = ""
|
325 |
+
for new_text in streamer:
|
326 |
+
if new_text == None: new_text = ""
|
327 |
+
history[-1][1] += new_text
|
328 |
+
NUM_TOKENS+=1
|
329 |
+
yield history
|
330 |
+
|
331 |
+
time_generate = time.time() - start
|
332 |
+
print('\n')
|
333 |
+
print('-'*4+'End Generation'+'-'*4)
|
334 |
+
print(f'Num of generated tokens: {NUM_TOKENS}')
|
335 |
+
print(f'Time for complete generation: {time_generate}s')
|
336 |
+
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
337 |
+
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
338 |
+
|
339 |
+
elif model_type == "Orca Mini":
|
340 |
+
tokens = model.tokenize(full_prompt)
|
341 |
+
|
342 |
+
# Pull the generated text from the streamer, and update the model output.
|
343 |
+
start = time.time()
|
344 |
+
NUM_TOKENS=0
|
345 |
+
print('-'*4+'Start Generation'+'-'*4)
|
346 |
+
|
347 |
+
history[-1][1] = ""
|
348 |
+
for new_text in model.generate(tokens, **asdict(CtransGenGenerationConfig())): #CtransGen_generate(prompt=full_prompt)#, config=CtransGenGenerationConfig()): # #top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty,
|
349 |
+
if new_text == None: new_text = ""
|
350 |
+
history[-1][1] += model.detokenize(new_text) #new_text
|
351 |
+
NUM_TOKENS+=1
|
352 |
+
yield history
|
353 |
+
|
354 |
+
time_generate = time.time() - start
|
355 |
+
print('\n')
|
356 |
+
print('-'*4+'End Generation'+'-'*4)
|
357 |
+
print(f'Num of generated tokens: {NUM_TOKENS}')
|
358 |
+
print(f'Time for complete generation: {time_generate}s')
|
359 |
+
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
360 |
+
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
361 |
+
|
362 |
+
# Chat helper functions
|
363 |
|
364 |
def adapt_q_from_chat_history(question, chat_history, extracted_memory, keyword_model=""):#keyword_model): # new_question_keywords,
|
365 |
|
|
|
615 |
|
616 |
# Step 1: Filter vstore_docs
|
617 |
vstore_docs = get_docs_from_vstore(vectorstore)
|
|
|
|
|
|
|
|
|
|
|
|
|
618 |
doc_sources = {doc.metadata['source'] for doc, _ in docs}
|
619 |
vstore_docs = [(k, v) for k, v in vstore_docs if v.metadata.get('source') in doc_sources]
|
620 |
|
|
|
640 |
|
641 |
return expanded_docs, doc_df
|
642 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
def highlight_found_text(search_text: str, full_text: str, hlt_chunk_size:int=hlt_chunk_size, hlt_strat:List=hlt_strat, hlt_overlap:int=hlt_overlap) -> str:
|
644 |
"""
|
645 |
Highlights occurrences of search_text within full_text.
|
|
|
710 |
pos_tokens = []
|
711 |
prev_end = 0
|
712 |
for start, end in combined_positions:
|
713 |
+
if end-start > 15: # Only combine if there is a significant amount of matched text. Avoids picking up single words like 'and' etc.
|
714 |
+
pos_tokens.append(full_text[prev_end:start])
|
715 |
+
pos_tokens.append('<mark style="color:black;">' + full_text[start:end] + '</mark>')
|
716 |
+
prev_end = end
|
717 |
pos_tokens.append(full_text[prev_end:])
|
718 |
|
719 |
return "".join(pos_tokens)
|
720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
721 |
|
722 |
# # Chat history functions
|
723 |
|
|
|
795 |
|
796 |
return history, extracted_memory
|
797 |
|
798 |
+
# Keyword functions
|
799 |
+
|
800 |
def remove_q_stopwords(question): # Remove stopwords from question. Not used at the moment
|
801 |
# Prepare keywords from question by removing stopwords
|
802 |
text = question.lower()
|
|
|
877 |
keywords_list = [item[0] for item in keywords_text]
|
878 |
|
879 |
return keywords_list
|
880 |
+
|
881 |
+
# Gradio functions
|
882 |
+
def turn_off_interactivity(user_message, history):
|
883 |
+
return gr.update(value="", interactive=False), history + [[user_message, None]]
|
884 |
+
|
885 |
+
def restore_interactivity():
|
886 |
+
return gr.update(interactive=True)
|
887 |
+
|
888 |
+
def update_message(dropdown_value):
|
889 |
+
return gr.Textbox.update(value=dropdown_value)
|
890 |
+
|
891 |
+
def hide_block():
|
892 |
+
return gr.Radio.update(visible=False)
|
893 |
+
|
894 |
+
# Vote function
|
895 |
+
|
896 |
+
def vote(data: gr.LikeData, chat_history, instruction_prompt_out, model_type):
|
897 |
+
import os
|
898 |
+
import pandas as pd
|
899 |
+
|
900 |
+
chat_history_last = str(str(chat_history[-1][0]) + " - " + str(chat_history[-1][1]))
|
901 |
|
902 |
+
response_df = pd.DataFrame(data={"thumbs_up":data.liked,
|
903 |
+
"chosen_response":data.value,
|
904 |
+
"input_prompt":instruction_prompt_out,
|
905 |
+
"chat_history":chat_history_last,
|
906 |
+
"model_type": model_type,
|
907 |
+
"date_time": pd.Timestamp.now()}, index=[0])
|
908 |
+
|
909 |
+
if data.liked:
|
910 |
+
print("You upvoted this response: " + data.value)
|
911 |
+
|
912 |
+
if os.path.isfile("thumbs_up_data.csv"):
|
913 |
+
existing_thumbs_up_df = pd.read_csv("thumbs_up_data.csv")
|
914 |
+
thumbs_up_df_concat = pd.concat([existing_thumbs_up_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
|
915 |
+
thumbs_up_df_concat.to_csv("thumbs_up_data.csv")
|
916 |
+
else:
|
917 |
+
response_df.to_csv("thumbs_up_data.csv")
|
918 |
+
|
919 |
+
else:
|
920 |
+
print("You downvoted this response: " + data.value)
|
921 |
+
|
922 |
+
if os.path.isfile("thumbs_down_data.csv"):
|
923 |
+
existing_thumbs_down_df = pd.read_csv("thumbs_down_data.csv")
|
924 |
+
thumbs_down_df_concat = pd.concat([existing_thumbs_down_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
|
925 |
+
thumbs_down_df_concat.to_csv("thumbs_down_data.csv")
|
926 |
+
else:
|
927 |
+
response_df.to_csv("thumbs_down_data.csv")
|
requirements.txt
CHANGED
@@ -1,23 +1,17 @@
|
|
1 |
langchain
|
2 |
beautifulsoup4
|
3 |
pandas
|
4 |
-
black
|
5 |
-
isort
|
6 |
-
Flask
|
7 |
transformers
|
8 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
9 |
torch
|
10 |
sentence_transformers
|
11 |
faiss-cpu
|
12 |
bitsandbytes
|
13 |
-
accelerate
|
14 |
-
optimum
|
15 |
pypdf
|
16 |
-
gradio==3.47.1
|
17 |
-
gradio_client==0.6.0
|
18 |
python-docx
|
19 |
-
gpt4all
|
20 |
ctransformers[cuda]
|
21 |
keybert
|
22 |
span_marker
|
23 |
-
gensim
|
|
|
|
|
|
1 |
langchain
|
2 |
beautifulsoup4
|
3 |
pandas
|
|
|
|
|
|
|
4 |
transformers
|
5 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
6 |
torch
|
7 |
sentence_transformers
|
8 |
faiss-cpu
|
9 |
bitsandbytes
|
|
|
|
|
10 |
pypdf
|
|
|
|
|
11 |
python-docx
|
|
|
12 |
ctransformers[cuda]
|
13 |
keybert
|
14 |
span_marker
|
15 |
+
gensim
|
16 |
+
gradio==3.42.0
|
17 |
+
gradio_client
|
thumbs_up_data.csv
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,thumbs_up,chosen_response,input_prompt,chat_history,model_type,date_time
|
2 |
+
0,True,"The vision statement for Lambeth is ""a borough with social and climate justice at its heart"".","### Instruction:
|
3 |
+
### User:
|
4 |
+
Answer the QUESTION using information from the following CONTENT.
|
5 |
+
CONTENT: SOURCE 1 - - attended by over 150 people7 focussed workshops with local Lambeth organisations and their services-users - attended by over 80 people 2 weeks of market research across public spaces in Lambeth, asking people their vision for Lambeth in 20301 Lambeth 2030 consultation survey open for 6 weeks to the public with over 600 responses 14 | Lambeth 2030 Our Future, Our Lambeth Lambeth 2030 Our Future, Our Lambeth | 15The vision Through listening and building on what we already know, we’ve created a vision for the future of Lambeth that’s rooted in what people want. This is a vision that belongs to everyone.Achieving this future vision of Lambeth comes down to all of us. We are all connected, and we all have a stake in Lambeth to make it the best place to live, work and visit in the UK. From our conversations we know people agree with a group of core priorities and ambitions for the future of Lambeth. They are ready to come together and bring this vision to life, and there is also strong support in the shift towards taking a longer-term view, so that we are ready for the unforeseen challenges of t SOURCE 2 - e of Lambeth. This Borough Plan will not have all the answers to the challenges we face but it is our commitment to everyone in Lambeth that we will strive to get the basics right, and that we will harness the abundance of local expertise, energy and passion in our design and decision-making so that everybody in the borough is empowered to create Lambeth 2030. This is Our Future; This is Our Lambeth. 06 | Lambeth 2030 Our Future, Our Lambeth 07 Lambeth 2030 Our Future, Our Lambeth |Lambeth 2030 Vision Statement Lambeth – a borough with social and climate justice at its heart. By harnessing the power and pride of our people and partnerships, we will proactively tackle inequalities so that children and young people can have the best start in life and so everyone can feel safe and thrive in a place of opportunity. SUSTAINABLE OPPORTUNITY HEALTHY COMMUNITY SAFER 08 | Lambeth 2030 Our Future, Our Lambeth 09 Lambeth 2030 Our Future, Our Lambeth |State of the Borough At 22,200 Lambeth has the largest LGBTQ+ population in London The (mean) average house price in Lambeth is £689,009 12th highest in London 17.3% of Lambeth is green space 5th lowest in London 317,600 Lambeth is an inner south London borough with 317,600 residents 9th largest population in London Lambeth’s population is diverse and multicultural Asian, Asian British – 7.3% Black British, African or Caribbean – 24.0% Mixed or Multiple Ethnic groups – 8.1% White British – 4
|
6 |
+
QUESTION: What is the vision statement for Lambeth?
|
7 |
+
|
8 |
+
Response:","What is the vision statement for Lambeth? - The vision statement for Lambeth is ""a borough with social and climate justice at its heart"".",Flan Alpaca,2023-10-09 23:10:04.119596
|
9 |
+
1,True,"The commitments for Lambeth are: 1. We will take a one borough approach to deliver our services consistently and well 2. People have a say and stake in the decisions that matter 3. We will collaborate with our people and partners to innovate and implement together 4. We will focus on what our residents want and be honest about what we can and can’t do, whilst being courageous to take bold action.","### Instruction:
|
10 |
+
### User:
|
11 |
+
Answer the QUESTION using information from the following CONTENT.
|
12 |
+
CONTENT: SOURCE 1 - at the changes we make, and the partnerships we form, are all contributing to improving the lives of those who live, work, learn and visit our borough. -Our Future, Our Lambeth Our Vision Statement Lambeth – a borough with social and climate justice at its heart. By harnessing the power and pride of our people and partnerships, we will proactively tackle inequalities so that children and young people can have the best start in life and so everyone can feel safe and thrive in a place of opportunity. Our Commitments for Lambeth 1. We get the basics right and deliver great public services that fit what people need -We will take a one borough approach to deliver our services consistently and well 2. People have a say and stake in the decisions that matter -We will be a listening and open borough that recognises and values our community voices 3. We work together in partnership, to harness what makes Lambeth special - We will collaborate with our people and partners to innovate and implement together 4. We are accessible, transparent and we stand up to challenges -We will focus on what our residents want and be honest about what we can and can’t do, whilst being courageous to take bold action. SOURCE 2 - Achieving this future vision of Lambeth comes down to all of us. We are all connected, and we all have a stake in Lambeth to make it the best place to live, work and visit in the UK. From our conversations we know people agree with a group of core priorities and ambitions for the future of Lambeth. They are ready to come together and bring this vision to life, and there is also strong support in the shift towards taking a longer-term view, so that we are ready for the unforeseen challenges of the future. Our Commitments for Lambeth To deliver this vision requires individual and collective commitment and action. This means the Council and Lambeth’s communities and organisations coming together and standing as one to transform the ways we work. By listening to our communities, understanding their experiences, and aligning our priorities with theirs, we can build confidence between Lambeth’s institutions, businesses, community groups and organisations, and residents, and we can ensure that the changes we make, and the partnerships we form, are all contributing to improving the lives of those who live, work, learn and visit our borough. -Our Future, Our Lambeth Our Vision Statement Lambeth – a borough with social and climate justice at its heart. By harnessing the power and pride of our people and partnerships, we will proactively tackle inequalities so that children and young people can have the best start in life and so everyone can feel safe and thrive in a place of opportuni
|
13 |
+
QUESTION: What are the commitments for Lambeth?
|
14 |
+
|
15 |
+
Response:","What are the commitments for Lambeth? - The commitments for Lambeth are: 1. We will take a one borough approach to deliver our services consistently and well 2. People have a say and stake in the decisions that matter 3. We will collaborate with our people and partners to innovate and implement together 4. We will focus on what our residents want and be honest about what we can and can’t do, whilst being courageous to take bold action.",Flan Alpaca,2023-10-09 23:10:43.075937
|
16 |
+
2,True,"The 2030 outcomes for Lambeth are:
|
17 |
+
1. Lambeth will have lower levels of deprivation, with fewer children growing up in poverty.
|
18 |
+
2. Lambeth will tackle the structural inequalities adversely impacting Black, Asian and Multi-Ethnic residents by being a borough of antiracism.
|
19 |
+
3. Lambeth will be a borough of progress, working with LGBTQ+ communities and disabled residents to tackle the biggest challenges they face.
|
20 |
+
4. By 2030, Lambeth will be a Net Zero Borough.
|
21 |
+
5. Lambeth residents will experience good health and wellbeing, with an improved healthy life expectancy for those with the poorest outcomes.
|
22 |
+
6. Lambeth will be a borough of equity and justice, with a focus on making neighbourhoods fit for the future.","
|
23 |
+
### System:
|
24 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
25 |
+
### User:
|
26 |
+
Answer the QUESTION with a short response using information from the following CONTENT.
|
27 |
+
CONTENT: SOURCE 1 - eekers and raise the voices of people with lived-experience. We will be led by five core values: Inclusivity, Openness, Participation, Inspiration and Integrity. To drive this forward, we have created the Lambeth Sanctuary Forum, a multi-agency group working with the voluntary and community sector, structured to deliver the priorities of our sanctuary-seekers, with humanity and compassion. 36 | Lambeth 2030 Our Future, Our Lambeth Lambeth 2030 Our Future, Our Lambeth | 37Our Lambeth 2030 Outcomes Our ambitions are bold – it is going to take everyone in the borough to play their part in delivering for Lambeth, ensuring that we are all accountable and committed to a better future for everyone. Our Lambeth Outcomes have been shaped to unite us in that effort. A Borough of Equity and Justice • By 2030, Lambeth will have lower levels of deprivation, with fewer children growing up in poverty. • By 2030, Lambeth will tackle the structural inequalities adversely impacting Black, Asian and Multi-Ethnic residents by being a borough of antiracism. • By 2030, Lambeth will be a borough of progress, working with LGBTQ+ communities and disabled residents to tackle the biggest challenges they face. Our 2030 Ambition: Making Lambeth Neighbourhoods Fit for the Future • By 2030, Lambeth will be a Net Zero Borough. • By 2030, Lambeth residents will experience good health and wellbeing, with an improved healthy life expectancy for those with the poorest outcomes. • By 203 SOURCE 2 - Lambeth 2030 Contents Forewords 04 Introduction 06 State of the Borough 10 Our Previous Borough Plan 12 Our Shared Vision for Lambeth 2030 14 Our Ambitions for Lambeth 2030 18 The Lambeth Golden Thread – A Borough of Equity and Justice 20 Ambition 1 – Making Lambeth Neighbourhoods Fit for the Future 22 Ambition 2 – Making Lambeth One of the Safest Boroughs in London 28 Ambition 3 – Making Lambeth A Place We Can All Call Home 32 Our Lambeth 2030 Outcomes 38 03 Lambeth 2030 Our Future, Our Lambeth| 02 | Lambeth 2030 Our Future, Our Lambeth
|
28 |
+
QUESTION: What are the 2030 outcomes for Lambeth?
|
29 |
+
|
30 |
+
### Response:","What are the 2030 outcomes for Lambeth? - The 2030 outcomes for Lambeth are:
|
31 |
+
1. Lambeth will have lower levels of deprivation, with fewer children growing up in poverty.
|
32 |
+
2. Lambeth will tackle the structural inequalities adversely impacting Black, Asian and Multi-Ethnic residents by being a borough of antiracism.
|
33 |
+
3. Lambeth will be a borough of progress, working with LGBTQ+ communities and disabled residents to tackle the biggest challenges they face.
|
34 |
+
4. By 2030, Lambeth will be a Net Zero Borough.
|
35 |
+
5. Lambeth residents will experience good health and wellbeing, with an improved healthy life expectancy for those with the poorest outcomes.
|
36 |
+
6. Lambeth will be a borough of equity and justice, with a focus on making neighbourhoods fit for the future.",Orca Mini,2023-10-09 23:13:38.912570
|