Spaces:

KatBet
/

TrafficLens

Sleeping

App Files Files Community

TrafficLens / pages /chat.py

KatGaw

Update pages/chat.py

c40d6e0 verified 28 days ago

raw

history blame

9.3 kB

	from openai import OpenAI
	import streamlit as st
	from langchain_openai import ChatOpenAI
	from langchain_openai.embeddings import OpenAIEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	import markdown
	from operator import itemgetter
	from langchain.schema.runnable import RunnablePassthrough
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.schema import Document
	from dotenv import load_dotenv
	from langchain_community.vectorstores import Qdrant
	# from langchain_qdrant import Qdrant
	import os
	import pandas as pd
	import numpy as np

	st.set_page_config(
	page_title="Narrativ 🧠",
	layout="wide",
	initial_sidebar_state="expanded",
	page_icon="🧠",
	)

	# Custom CSS for enhanced styling
	st.markdown("""
	<style>
	.circular-image {
	width: 200px;
	height: 200px;
	border-radius: 50%;
	object-fit: cover;
	display: block;
	margin: 20px auto;
	box-shadow: 0 4px 8px rgba(0,0,0,0.1);
	}

	/* Container for search section */
	.search-container {
	background: white;
	padding: 20px;
	border-radius: 10px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	margin: 20px 0;
	}

	/* Combined search input styling */
	.combined-search {
	display: flex;
	gap: 10px;
	align-items: center;
	margin-bottom: 20px;
	}
	</style>
	""", unsafe_allow_html=True)

	load_dotenv()
	OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
	base_llm = ChatOpenAI(model="gpt-4o")
	embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

	#========== APP

	from PIL import Image, ImageEnhance

	image = Image.open('./data/Sentiment_index_traffic.png')
	#enhancer = ImageEnhance.Brightness(image)
	#darker_image = enhancer.enhance(0.5) # Adjust the brightness factor as needed
	st.image(image, output_format="PNG", clamp=True)

	st.title("Narrativ 📰")

	#check1 = st.button("Submit", key="submit_button")
	prompt='traffic'

	prompt=st.session_state.prompt
	date=st.session_state.date
	# Change the sidebar background with enhanced gradient and text styling
	# sideb.markdown(

	if 'messages' not in st.session_state:
	st.session_state.messages = []

	st.session_state.messages.append({"role": "assistant", "content": f'{date} {prompt}'})


	if prompt:
	if date:
	try:
	data=pd.read_csv('./data/sentiment_index_traffic_index_final1.csv',
	index_col='index',
	parse_dates=True,
	infer_datetime_format=True
	).drop(columns=['llm_index','sentiment_index_hf','confidence_hf'])

	data = data.loc[data.index == date]
	filtered_data = data[data.apply(lambda row: row.astype(str).str.contains(prompt, na=False).any(), axis=1)]
	urls = data['url'].values.flatten()
	data_all = filtered_data.values.flatten()
	docs = data_all
	if len(docs)==0:
	st.warning("No articles found that contain the topic on the given day.")

	except Exception as e:
	st.error(f"Error processing date: {e}")
	else:
	try:
	data = pd.read_csv(
	'./data/sentiment_index_traffic_index_final1.csv',
	index_col='index',
	parse_dates=True,
	infer_datetime_format=True
	).drop(columns=['llm_index','sentiment_index_hf','confidence_hf'])

	filtered_data = data[data.apply(lambda row: row.astype(str).str.contains(prompt, na=False).any(), axis=1)]
	data_all = filtered_data.values.flatten()
	urls = data['url'].values.flatten()
	docs = data_all
	if len(docs)==0:
	st.warning("No articles found that contain the topic on the given day.")

	#data_all = data.values.flatten()
	#docs = data_all
	# with open(f'./data/sentiment_index_traffic_index_final1.md', "w", encoding="utf-8") as file:
	# file.write(str(data_all))
	# with open(f'./data/sentiment_index_traffic_index_final1.md', "r", encoding="utf-8") as file_content:
	# docs = file_content.read()
	except Exception as e:
	st.error(f"Error loading data: {e}")

	else:
	data=pd.read_csv('./data/sentiment_index_traffic_index_final1.csv',
	index_col='index',
	parse_dates=True,
	infer_datetime_format=True
	).drop(columns=['llm_index','sentiment_index_hf','confidence_hf'])

	urls = data['url'].values.flatten()
	data_all = data.values.flatten()
	docs = data_all
	if len(docs)==0:
	st.warning("No articles found that contain the topic on the given day.")

	if len(docs)>0:
	docs_text = "\n".join([f"- {value}" for value in data_all if not pd.isna(value)])
	docs = [Document(page_content=docs_text)]
	st.write(data)

	try:
	no_rows=len(data)
	data_summary=data.groupby('title').first()
	filtered_data = data_summary[data_summary.apply(lambda row: row.astype(str).str.contains(prompt, na=False).any(), axis=1)]
	data_all_summary = filtered_data['summary_date'].groupby('title').first().values.flatten()
	docs_text_summary = "\n".join([f"- {value}" for value in data_all_summary if not pd.isna(value)])
	summary_data=str(docs_text_summary) #docs['summary_date']
	print('heere',summary_data)
	summary = base_llm.invoke(f"""format nicely the summary into paragraphs for Streamlit. Say how many news articles are available for the given data, the number is: {no_rows}.
	## Output Format:
	- Summary of Opinions: [Concise summary of key opinions]
	- Sentiment Analysis:
	- Sentiment: [Positive/Negative/Neutral]
	- Reasoning: [Detailed explanation here]
	- Chain-of-Thought Reasoning: [Step-by-step explanation]
	- Sources: [URLs for 5 most critical and recent articles on this topic]
	## Guidelines:
	- Maintain objectivity and precision in your analysis.
	- Focus on the context specific to the Greater Washington Area.
	- Use professional and analytical language suitable for client reports.
	- Respond in the language of the article (mostly English).
	- From the provided context, add the URL sources, you find them here, URLs: {urls} - make sure they are clicable! related to the topic.
	Context: {summary_data}""").content #lcel_rag_chain.invoke({"question": prompt})
	print(summary)
	st.chat_message("assistant").write((summary))
	st.session_state.messages.append({"role": "assistant", "content": summary})
	except Exception as e:
	st.error(f"Error generating summary: {e}")

	if date:
	with open('./data/sentiment_index_traffic_index_final_date.md', 'w') as file:
	file.write(str(data_all))
	else:
	with open('./data/sentiment_index_traffic_index_final1.md', 'w') as file:
	file.write(str(data_all))


	client = OpenAI(api_key=OPENAI_API_KEY)

	if "openai_model" not in st.session_state:
	st.session_state["openai_model"] = "gpt-4o"

	prompt1 = st.chat_input("Type your additional questions here...")

	# Suggested keywords with enhanced styling
	suggested_keywords = ["Summarize results", f"Explain the traffic drop", f"Explain the traffic growth"]
	st.markdown("Suggested Keywords:")
	cols = st.columns(len(suggested_keywords))
	for idx, keyword in enumerate(suggested_keywords):
	if cols[idx].button(keyword, key=keyword):
	prompt1 = keyword

	if prompt1:
	if date:
	file_path = f'./data/sentiment_index_traffic_index_final_date.md'
	else:
	file_path = f'./data/sentiment_index_traffic_index_final1.md'

	try:
	with open(file_path, "r", encoding="utf-8") as file_content:
	docs = file_content.read()
	except Exception as e:
	st.error(f"Error loading context: {e}")
	docs = ""

	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": f'You are a Transurban traffic analyst, that focuses on the Express lanes I-495 and I-95 in the Greater Washington Area. Having this knowledge answer questions: {prompt1} using context from {docs}'})
	# Display user message in chat message container
	with st.chat_message("user"):
	st.markdown(prompt1)
	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	try:
	stream = client.chat.completions.create(
	model=st.session_state["openai_model"],
	messages=[
	{"role": m["role"], "content": m["content"]}
	for m in st.session_state.messages
	],
	stream=True,
	)
	response = st.write_stream(stream)
	st.session_state.messages.append({"role": "assistant", "content": response})
	except Exception as e:
	st.error(f"Error generating response: {e}")