sanjeevl10 commited on
Commit
2519a3a
1 Parent(s): bbf6efd

Add Qdrant Changes

Browse files
Files changed (2) hide show
  1. app.py +255 -93
  2. requirements.txt +2 -1
app.py CHANGED
@@ -32,6 +32,11 @@ from tools import data_analyst, forecasting_expert_arima, forecasting_expert_rf,
32
  from chainlit.input_widget import Select
33
  import matplotlib.pyplot as plt
34
  from langgraph.checkpoint.memory import MemorySaver
 
 
 
 
 
35
 
36
  load_dotenv()
37
  HF_ACCESS_TOKEN = os.environ["HF_ACCESS_TOKEN"]
@@ -214,108 +219,265 @@ async def main(message: cl.Message):
214
  await cl.Message(content=f"You sent {counter} message(s)!").send()
215
  #if counter==1:
216
  inputs = {"messages": [HumanMessage(content=message.content)]}
217
- res_data = graph_data.invoke(inputs, config=RunnableConfig(callbacks=[
218
- cl.LangchainCallbackHandler(
219
- to_ignore=["ChannelRead", "RunnableLambda", "ChannelWrite", "__start__", "_execute"]
220
- # can add more into the to_ignore: "agent:edges", "call_model"
221
- # to_keep=
222
-
223
- )]))
224
- #print(res_data)
225
- await cl.Message(content=res_data["messages"][-1].content).send()
226
- #print('ticker',str(res_data).split(">>"))
227
- if len(str(res_data).split(">>")[1])<10:
228
- stockticker=(str(res_data).split(">>")[1])
229
- else:
230
- stockticker=(str(res_data).split(">>")[0])
231
- #print('ticker1',stockticker)
232
- print('here')
233
- df=u.get_stock_price(stockticker)
234
- df_history=u.historical_stock_prices(stockticker,90)
235
- df_history_to_msg1=eval(str(list((pd.DataFrame(df_history['Close'].values.reshape(1, -1)[0]).T).iloc[0,:])))
236
-
237
- inputs_all = {"messages": [HumanMessage(content=(f"Predict {stockticker}, historical prices are: {df_history_to_msg1}."))]}
238
- df_history=pd.DataFrame(df_history)
239
- df_history['stockticker']=np.repeat(stockticker,len(df_history))
240
- df_history.to_csv('df_history.csv')
241
- #df_history.to_csv('./tools/df_history.csv')
242
-
243
- print ("Running forecasting models on historical prices")
244
- res = graph.invoke(inputs_all, config=RunnableConfig(callbacks=[
245
- cl.LangchainCallbackHandler(
246
- to_ignore=["ChannelRead", "RunnableLambda", "ChannelWrite", "__start__", "_execute"]
247
- # can add more into the to_ignore: "agent:edges", "call_model"
248
- # to_keep=
249
-
250
- )]))
251
- await cl.Message(content= res["messages"][-2].content + '\n\n' + res["messages"][-1].content).send()
252
-
253
- #Plotting the graph
254
- df=u.historical_stock_prices(stockticker,90)
255
- df=u.calculate_MACD(df, fast_period=12, slow_period=26, signal_period=9)
256
- #df values
257
- #Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits','EMA_fast', 'EMA_slow', 'MACD', 'Signal_Line', 'MACD_Histogram']
258
- fig = u.plot_macd2(df)
259
 
260
- if fig:
261
- elements = [cl.Pyplot(name="plot", figure=fig, display="inline",size="large"),
262
- ]
263
- await cl.Message(
264
- content="Here is the MACD plot",
265
- elements=elements,
266
- ).send()
267
- else:
268
- await cl.Message(
269
- content="Failed to generate the MACD plot."
270
- ).send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
 
 
272
 
273
- #Perform sentiment analysis on the stock news & predict dominant sentiment along with plotting the sentiment breakdown chart
274
- news_articles = stock_sentiment_analysis_util.fetch_news(stockticker)
 
 
 
 
275
 
276
- analysis_results = []
277
-
278
- #Perform sentiment analysis for each product review
279
- for article in news_articles:
280
- sentiment_analysis_result = stock_sentiment_analysis_util.analyze_sentiment(article['News_Article'])
281
-
282
- # Display sentiment analysis results
283
- #print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
- result = {
286
- 'News_Article': sentiment_analysis_result["News_Article"],
287
- 'Sentiment': sentiment_analysis_result["Sentiment"][0]['label']
288
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
- analysis_results.append(result)
291
 
292
-
293
- #Retrieve dominant sentiment based on sentiment analysis data of reviews
294
- dominant_sentiment = stock_sentiment_analysis_util.get_dominant_sentiment(analysis_results)
295
- await cl.Message(
296
- content="Dominant sentiment of the stock based on last 7 days of news is : " + dominant_sentiment
297
- ).send()
298
-
299
- #Plot sentiment breakdown chart
300
-
301
- fig = stock_sentiment_analysis_util.plot_sentiment_graph(analysis_results)
302
- if fig:
303
- elements = [cl.Pyplot(name="plot", figure=fig, display="inline",size="large"),
304
- ]
305
- await cl.Message(
306
- content="Sentiment breakdown plot",
307
- elements=elements,
308
- ).send()
309
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  await cl.Message(
311
- content="Failed to generate the MACD plot."
312
- ).send()
313
-
314
- #Generate summarized message rationalize dominant sentiment
315
- summary = stock_sentiment_analysis_util.generate_summary_of_sentiment(analysis_results, dominant_sentiment)
316
- await cl.Message(
317
- content= summary
318
- ).send()
319
 
320
 
321
 
 
32
  from chainlit.input_widget import Select
33
  import matplotlib.pyplot as plt
34
  from langgraph.checkpoint.memory import MemorySaver
35
+ from langgraph.checkpoint.memory import MemorySaver
36
+ from langchain_openai.embeddings import OpenAIEmbeddings
37
+ from operator import itemgetter
38
+ from langchain.schema.output_parser import StrOutputParser
39
+ from langchain.schema.runnable import RunnablePassthrough
40
 
41
  load_dotenv()
42
  HF_ACCESS_TOKEN = os.environ["HF_ACCESS_TOKEN"]
 
219
  await cl.Message(content=f"You sent {counter} message(s)!").send()
220
  #if counter==1:
221
  inputs = {"messages": [HumanMessage(content=message.content)]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ #Checking if input message is a stock search, assumption here is that if user types a stockticker explicity or
224
+ #inputs the name of the company for app to find stockticker the lenght of input won't be greater than 15
225
+ if len(str(message.content)) <= 15 and counter==1:
226
+
227
+ res_data = graph_data.invoke(inputs, config=RunnableConfig(callbacks=[
228
+ cl.LangchainCallbackHandler(
229
+ to_ignore=["ChannelRead", "RunnableLambda", "ChannelWrite", "__start__", "_execute"]
230
+ # can add more into the to_ignore: "agent:edges", "call_model"
231
+ # to_keep=
232
+
233
+ )]))
234
+ #print(res_data)
235
+ await cl.Message(content=res_data["messages"][-1].content).send()
236
+ print('ticker',str(res_data).split(">>")[0])
237
+ if len(str(res_data).split(">>")[1])<10:
238
+ stockticker=(str(res_data).split(">>")[1])
239
+ else:
240
+ stockticker=(str(res_data).split(">>")[0])
241
+
242
+
243
+ #print('ticker1',stockticker)
244
+ print('here')
245
+ df=u.get_stock_price(stockticker)
246
+ df_history=u.historical_stock_prices(stockticker,90)
247
+ df_history_to_msg1=eval(str(list((pd.DataFrame(df_history['Close'].values.reshape(1, -1)[0]).T).iloc[0,:])))
248
+
249
+ inputs_all = {"messages": [HumanMessage(content=(f"Predict {stockticker}, historical prices are: {df_history_to_msg1}."))]}
250
+ df_history=pd.DataFrame(df_history)
251
+ df_history['stockticker']=np.repeat(stockticker,len(df_history))
252
+ df_history.to_csv('df_history.csv')
253
+ #df_history.to_csv('./tools/df_history.csv')
254
+
255
+ print ("Running forecasting models on historical prices")
256
+ res = graph.invoke(inputs_all, config=RunnableConfig(callbacks=[
257
+ cl.LangchainCallbackHandler(
258
+ to_ignore=["ChannelRead", "RunnableLambda", "ChannelWrite", "__start__", "_execute"]
259
+ # can add more into the to_ignore: "agent:edges", "call_model"
260
+ # to_keep=
261
+
262
+ )]))
263
+ await cl.Message(content= res["messages"][-2].content + '\n\n' + res["messages"][-1].content).send()
264
+
265
+ #Storing recommendation
266
+ recommendation = "Recommendation for " + stockticker + '\n' + res["messages"][-2].content + '\n\n' + res["messages"][-1].content
267
+
268
+
269
+ #Plotting the graph
270
+ df=u.historical_stock_prices(stockticker,90)
271
+ df=u.calculate_MACD(df, fast_period=12, slow_period=26, signal_period=9)
272
+ #df values
273
+ #Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits','EMA_fast', 'EMA_slow', 'MACD', 'Signal_Line', 'MACD_Histogram']
274
+ fig = u.plot_macd2(df)
275
+
276
+ if fig:
277
+ elements = [cl.Pyplot(name="plot", figure=fig, display="inline",size="large"),
278
+ ]
279
+ await cl.Message(
280
+ content="Here is the MACD plot",
281
+ elements=elements,
282
+ ).send()
283
+ else:
284
+ await cl.Message(
285
+ content="Failed to generate the MACD plot."
286
+ ).send()
287
+
288
+
289
+ #Perform sentiment analysis on the stock news & predict dominant sentiment along with plotting the sentiment breakdown chart
290
+ news_articles = stock_sentiment_analysis_util.fetch_news(stockticker)
291
+
292
+ analysis_results = []
293
+
294
+ #Perform sentiment analysis for each product review
295
+ for article in news_articles:
296
+ sentiment_analysis_result = stock_sentiment_analysis_util.analyze_sentiment(article['News_Article'])
297
 
298
+ # Display sentiment analysis results
299
+ #print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
300
 
301
+ result = {
302
+ 'News_Article': sentiment_analysis_result["News_Article"],
303
+ 'Sentiment': sentiment_analysis_result["Sentiment"][0]['label']
304
+ }
305
+
306
+ analysis_results.append(result)
307
 
308
+
309
+ #Retrieve dominant sentiment based on sentiment analysis data of reviews
310
+ dominant_sentiment = stock_sentiment_analysis_util.get_dominant_sentiment(analysis_results)
311
+ await cl.Message(
312
+ content="Dominant sentiment of the stock based on last 7 days of news is : " + dominant_sentiment
313
+ ).send()
314
+
315
+ #Plot sentiment breakdown chart
316
+ fig = stock_sentiment_analysis_util.plot_sentiment_graph(analysis_results)
317
+ if fig:
318
+ elements = [cl.Pyplot(name="plot", figure=fig, display="inline",size="large"),
319
+ ]
320
+ await cl.Message(
321
+ content="Sentiment breakdown plot",
322
+ elements=elements,
323
+ ).send()
324
+ else:
325
+ await cl.Message(
326
+ content="Failed to generate the MACD plot."
327
+ ).send()
328
+
329
+ #Generate summarized message rationalize dominant sentiment
330
+ summary = stock_sentiment_analysis_util.generate_summary_of_sentiment(analysis_results, dominant_sentiment)
331
+ await cl.Message(
332
+ content= summary
333
+ ).send()
334
+
335
 
336
+ #Storing sentiment summary
337
+ recommendation = recommendation + '\n' + "Stock sentiment summary for " + stockticker + ' is, \n' + summary + '\n and dominant sentiment for stock is ' + dominant_sentiment
338
+ print("******************************************************")
339
+ print(recommendation)
340
+ print("******************************************************")
341
+ answers=np.append(res["messages"][-1].content,summary)
342
+ with open('answers.txt', 'w') as a:
343
+ a.write(str(answers))
344
+
345
+
346
+ #Adding messages to Qdrant in memory store, to provide users ability to ask questions based on the recommmendation and sentiment summarization
347
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
348
+ text_splitter = RecursiveCharacterTextSplitter(
349
+ chunk_size = 250,
350
+ chunk_overlap = 20
351
+ )
352
+ recommendation_chunks = text_splitter.split_text(recommendation)
353
+ # Convert the chunks into Document objects
354
+ from langchain.schema import Document
355
+ documents = [Document(page_content=chunk) for chunk in recommendation_chunks]
356
+
357
+ #4 Store embeddings in QDrant vector store in memory
358
+ from langchain_community.vectorstores import Qdrant
359
+ qdrant_vector_store = Qdrant.from_documents(
360
+ documents,
361
+ OpenAIEmbeddings(model="text-embedding-3-small"),
362
+ location=":memory:",
363
+ collection_name="Stock Analysis",
364
+ )
365
+ qdrant_retriever = qdrant_vector_store.as_retriever()
366
+
367
+ #Setting up RAG Prompt Template
368
+ from langchain_core.prompts import PromptTemplate
369
+ RAG_PROMPT_TEMPLATE = """\
370
+ <|start_header_id|>system<|end_header_id|>
371
+ You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
372
+
373
+ <|start_header_id|>user<|end_header_id|>
374
+ User Query:
375
+ {question}
376
+
377
+ Context:
378
+ {context}<|eot_id|>
379
+
380
+ <|start_header_id|>assistant<|end_header_id|>
381
+ """
382
+ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
383
+
384
+ from langchain.memory import ConversationBufferMemory
385
+ from langchain_core.runnables import RunnableLambda, RunnablePassthrough
386
+ # Instantiate ConversationBufferMemory
387
+ memory = ConversationBufferMemory(
388
+ return_messages=True, output_key="answer", input_key="question"
389
+ )
390
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
391
+ # First, load the memory to access chat history
392
+ loaded_memory = RunnablePassthrough.assign(
393
+ chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
394
+ )
395
+ retrieval_augmented_qa_chain = (loaded_memory|
396
+ {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
397
+ | RunnablePassthrough.assign(context=itemgetter("context"))
398
+ | {"response": rag_prompt | llm, "context": itemgetter("context")}
399
+ )
400
+ cl.user_session.set("lcel_rag_chain", retrieval_augmented_qa_chain)
401
 
 
402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  else:
404
+ #question_array=question_array+message.content
405
+ print('questions', question_array)
406
+ file = open("answers.txt", "r")
407
+ answers = file.read()
408
+
409
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
410
+ text_splitter = RecursiveCharacterTextSplitter(
411
+ chunk_size = 250,
412
+ chunk_overlap = 20
413
+ )
414
+ recommendation_chunks = text_splitter.split_text(answers)
415
+ question_chunks = text_splitter.split_text(question_array)
416
+ all_chunks=recommendation_chunks+question_chunks
417
+ # Convert the chunks into Document objects
418
+ from langchain.schema import Document
419
+ documents = [Document(page_content=chunk) for chunk in all_chunks] #recommendation_chunks]
420
+
421
+ #4 Store embeddings in QDrant vector store in memory
422
+ from langchain_community.vectorstores import Qdrant
423
+ qdrant_vector_store = Qdrant.from_documents(
424
+ documents,
425
+ OpenAIEmbeddings(model="text-embedding-3-small"),
426
+ location=":memory:",
427
+ collection_name="Stock Analysis",
428
+ )
429
+ qdrant_retriever = qdrant_vector_store.as_retriever()
430
+
431
+ #Setting up RAG Prompt Template
432
+ from langchain_core.prompts import PromptTemplate
433
+ RAG_PROMPT_TEMPLATE = """\
434
+ <|start_header_id|>system<|end_header_id|>
435
+ You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
436
+
437
+ <|start_header_id|>user<|end_header_id|>
438
+ User Query:
439
+ {question}
440
+
441
+ Context:
442
+ {context}<|eot_id|>
443
+
444
+ <|start_header_id|>assistant<|end_header_id|>
445
+ """
446
+ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
447
+
448
+ from langchain.memory import ConversationBufferMemory
449
+ from langchain_core.runnables import RunnableLambda, RunnablePassthrough
450
+ # Instantiate ConversationBufferMemory
451
+ memory = ConversationBufferMemory(
452
+ return_messages=True, output_key="answer", input_key="question"
453
+ )
454
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
455
+ # First, load the memory to access chat history
456
+ loaded_memory = RunnablePassthrough.assign(
457
+ chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
458
+ )
459
+ retrieval_augmented_qa_chain = (loaded_memory|
460
+ {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
461
+ | RunnablePassthrough.assign(context=itemgetter("context"))
462
+ | {"response": rag_prompt | llm, "context": itemgetter("context")}
463
+ )
464
+ #retrieve lcel chain
465
+ cl.user_session.set("lcel_rag_chain", retrieval_augmented_qa_chain)
466
+
467
+ #retrieve lcel chain
468
+ lcel_rag_chain = cl.user_session.get("lcel_rag_chain")
469
+
470
+ question = message.content
471
+ print("Query : " + question)
472
+
473
+ result = lcel_rag_chain.invoke({"question" : question})
474
  await cl.Message(
475
+ content= result["response"].content
476
+ ).send()
477
+ response=result["response"].content
478
+ question_array += (f"Answer: {response}")
479
+ print(response)
480
+ print(question_array)
 
 
481
 
482
 
483
 
requirements.txt CHANGED
@@ -24,4 +24,5 @@ GoogleNews
24
  streamlit
25
  googlenews
26
  scikit-learn==1.5.1
27
- torch
 
 
24
  streamlit
25
  googlenews
26
  scikit-learn==1.5.1
27
+ torch
28
+ qdrant-client