Grant Kim commited on
Commit
56c1e47
·
1 Parent(s): 1bac333

Add model and req

Browse files
Files changed (2) hide show
  1. app.py +179 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import io
3
+ import fitz
4
+ import requests
5
+ from langchain.llms import LlamaCpp
6
+ from langchain.callbacks.base import BaseCallbackHandler
7
+ from langchain.docstore.document import Document
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.vectorstores import FAISS
13
+ from huggingface_hub import hf_hub_download
14
+
15
+ import pandas as pd
16
+
17
+
18
+ # StreamHandler to intercept streaming output from the LLM.
19
+ # This makes it appear that the Language Model is "typing"
20
+ # in realtime.
21
+ class StreamHandler(BaseCallbackHandler):
22
+ def __init__(self, container, initial_text=""):
23
+ self.container = container
24
+ self.text = initial_text
25
+
26
+ def on_llm_new_token(self, token: str, **kwargs) -> None:
27
+ self.text += token
28
+ self.container.markdown(self.text)
29
+
30
+
31
+ @st.cache_data
32
+ def load_reviews(url):
33
+ #url = "https://raw.githubusercontent.com/grantjw/aesop-review/main/output_transcripts.csv"
34
+ df = pd.read_csv(url)
35
+ # remove non-scraped transcript
36
+ df = df[(df['Transcript'] != ' ') & (df['Transcript'] != '')]
37
+ # Assuming df DataFrame containing 'Transcript' and 'Video URL' columns
38
+ review = df['Transcript'].str.cat(sep='\n')
39
+ return review
40
+
41
+ @st.cache_resource
42
+ def get_retriever(url):
43
+ reviews = load_reviews(url)
44
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=40,separators=['\n',"',",' ', ''])
45
+ chunk_list = []
46
+ chunks = text_splitter.split_text(reviews)
47
+ chunk_list.extend(chunks)
48
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
49
+ db = FAISS.from_texts(chunk_list, embeddings)
50
+ #db.similarity_search("customer service",k=5)
51
+ retriever = db.as_retriever()
52
+ return retriever
53
+
54
+
55
+ @st.cache_resource
56
+ def create_chain(_retriever):
57
+ # A stream handler to direct streaming output on the chat screen.
58
+ # This will need to be handled somewhat differently.
59
+ # But it demonstrates what potential it carries.
60
+ # stream_handler = StreamHandler(st.empty())
61
+
62
+ # Callback manager is a way to intercept streaming output from the
63
+ # LLM and take some action on it. Here we are giving it our custom
64
+ # stream handler to make it appear as if the LLM is typing the
65
+ # responses in real time.
66
+ # callback_manager = CallbackManager([stream_handler])
67
+ (repo_id, model_file_name) = ("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
68
+ "mistral-7b-instruct-v0.1.Q5_0.gguf")
69
+
70
+ model_path = hf_hub_download(repo_id=repo_id,
71
+ filename=model_file_name,
72
+ repo_type="model")
73
+
74
+ n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.
75
+ n_batch = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
76
+
77
+ llm = LlamaCpp(
78
+ model_path=model_path,
79
+ n_batch=n_batch,
80
+ n_ctx=2048,
81
+ max_tokens=2048,
82
+ temperature=0,
83
+ # callback_manager=callback_manager,
84
+ verbose=False,
85
+ streaming=True,
86
+ )
87
+
88
+ # Template for the prompt.
89
+ # template = "{question}"
90
+
91
+ # We create a prompt from the template so we can use it with langchain
92
+ # prompt = PromptTemplate(template=template, input_variables=["question"])
93
+
94
+ # Setup memory for contextual conversation
95
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
96
+
97
+ # We create a qa chain with our llm, retriever, and memory
98
+ qa_chain = ConversationalRetrievalChain.from_llm(
99
+ llm, retriever=_retriever, memory=memory, verbose=False
100
+ )
101
+
102
+ return qa_chain
103
+
104
+
105
+ # Set the webpage title
106
+ st.set_page_config(
107
+ page_title="Youtube Aesop Product Reviewer"
108
+ )
109
+
110
+ # Create a header element
111
+ st.header("Youtube Aesop Product Reviewer")
112
+
113
+ system_prompt = st.text_area(
114
+ label="System Prompt",
115
+ value="You will answer questions based on the context. You are a customer review analyzer. Answer in detail",
116
+ key="system_prompt")
117
+
118
+
119
+ default_url = "https://raw.githubusercontent.com/grantjw/product_chatbot_rag/main/data/output_transcripts.csv"
120
+
121
+ if "base_url" not in st.session_state:
122
+ st.session_state.base_url = default_url
123
+
124
+ # Optionally, you can also display the URL in a non-editable text input as an FYI to the user
125
+ base_url = st.text_input("Site URL", value=st.session_state.base_url, key="base_url", disabled=True)
126
+
127
+
128
+ if st.session_state.base_url != "":
129
+
130
+ retriever = get_retriever(base_url)
131
+
132
+ # We store the conversation in the session state.
133
+ # This will be used to render the chat conversation.
134
+ # We initialize it with the first message we want to be greeted with.
135
+ if "messages" not in st.session_state:
136
+ st.session_state.messages = [
137
+ {"role": "assistant", "content": "How may I help you today?"}
138
+ ]
139
+
140
+ if "current_response" not in st.session_state:
141
+ st.session_state.current_response = ""
142
+
143
+ # We loop through each message in the session state and render it as
144
+ # a chat message.
145
+ for message in st.session_state.messages:
146
+ with st.chat_message(message["role"]):
147
+ st.markdown(message["content"])
148
+
149
+ # We initialize the quantized LLM from a local path.
150
+ # Currently most parameters are fixed but we can make them
151
+ # configurable.
152
+ llm_chain = create_chain(retriever)
153
+
154
+ # We take questions/instructions from the chat input to pass to the LLM
155
+ if user_prompt := st.chat_input("Your message here", key="user_input"):
156
+
157
+ # Add our input to the session state
158
+ st.session_state.messages.append(
159
+ {"role": "user", "content": user_prompt}
160
+ )
161
+
162
+ # Add our input to the chat window
163
+ with st.chat_message("user"):
164
+ st.markdown(user_prompt)
165
+
166
+ # Pass our input to the llm chain and capture the final responses.
167
+ # It is worth noting that the Stream Handler is already receiving the
168
+ # streaming response as the llm is generating. We get our response
169
+ # here once the llm has finished generating the complete response.
170
+ response = llm_chain.run(user_prompt)
171
+
172
+ # Add the response to the session state
173
+ st.session_state.messages.append(
174
+ {"role": "assistant", "content": response}
175
+ )
176
+
177
+ # Add the response to the chat window
178
+ with st.chat_message("assistant"):
179
+ st.markdown(response)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ beautifulsoup4==4.12.2
3
+ docarray==0.39.1
4
+ langchain==0.0.321
5
+ llama_cpp_python==0.2.11
6
+ pydantic==1.10.8
7
+ PyMuPDF==1.23.5
8
+ sentence-transformers==2.2.2
9
+ streamlit==1.27.2
10
+ faiss-cpu==1.7.4