Spaces:

mdredze1
/

tobacco-watcher-chat-with-citations

Sleeping

App Files Files Community

tobacco-watcher-chat-with-citations / app.py

vtiyyal1

Upload 2 files

a084a92 verified about 2 months ago

raw

history blame

8.74 kB

	import gradio as gr
	from full_chain import get_response
	import os
	import json
	from openai import OpenAI
	import json

	# from langchain_openai import ChatOpenAI
	# from langchain.schema import HumanMessage, SystemMessage
	import urllib3

	urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

	# Initialize OpenAI client
	client = OpenAI() # It will automatically use OPENAI_API_KEY from environment

	def load_content(filename):
	"""Load content from text files"""
	with open(os.path.join("prompts", filename), "r", encoding="utf-8") as f:
	return f.read()

	def load_filter_options():
	with open(os.path.join("prompts", "filter_options.json"), "r") as f:
	return json.load(f)

	def load_example_shots():
	with open(os.path.join("prompts", "shots.json"), "r") as f:
	return json.load(f)

	def predict(message, history):
	"""Process user message and return appropriate response."""
	try:
	# Query classification prompt
	classifier_messages = [
	{"role": "system", "content": """You are the Tobacco Watcher Assistant. Analyze the user's query and categorize it into exactly ONE of these types:

	1. HELP - Questions about using the website, its features, or navigation
	Example: "How do I use filters?", "How to search for articles?"

	2. ABOUT - Questions about Tobacco Watcher's purpose, mission, or organization
	Example: "What is Tobacco Watcher?", "Who runs this website?"

	3. FILTER - Requests for specific articles using filters
	Example: "Show articles about smoking in India from 2023", "Find French articles about e-cigarettes"

	4. QUERY - Questions seeking tobacco-related information
	Example: "How many people smoke in Asia?", "What are the effects of secondhand smoke?"

	Respond with ONLY the category name (HELP, ABOUT, FILTER, or QUERY)."""},
	{"role": "user", "content": message}
	]

	completion = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=classifier_messages,
	temperature=0
	)
	query_type = completion.choices[0].message.content.strip().upper()
	print(f"Query type: {query_type}")

	if query_type == "HELP":
	help_content = load_content("help.txt")
	help_messages = [
	{"role": "system", "content": """You are the Tobacco Watcher Help Assistant.
	Use the provided help content to guide users on how to use the platform's features.
	Be clear and specific in your instructions. If a feature isn't mentioned in the content, acknowledge that and suggest contacting support."""},
	{"role": "user", "content": f"Using this help content:\n\n{help_content}\n\nAnswer this question: {message}"}
	]
	completion = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=help_messages,
	temperature=0
	)
	return completion.choices[0].message.content

	elif query_type == "ABOUT":
	about_content = load_content("about.txt")
	about_messages = [
	{"role": "system", "content": """You are the Tobacco Watcher Assistant specializing in explaining the platform.
	Use the provided content to answer questions about Tobacco Watcher's purpose, mission, features, and organization.
	Be concise but informative. If a specific detail isn't in the content, say so rather than making assumptions."""},
	{"role": "user", "content": f"Using this content:\n\n{about_content}\n\nAnswer this question: {message}"}
	]
	completion = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=about_messages,
	temperature=0
	)
	return completion.choices[0].message.content

	elif query_type == "FILTER":
	filter_options = load_filter_options()
	example_shots = load_example_shots()

	url_prompt = """Generate a Tobacco Watcher article URL based on the query. Follow these rules:

	1. Base URL: https://tobaccowatcher.globaltobaccocontrol.org/articles/
	2. Parameters:
	- Subject (c=): Can have multiple
	- Product (pro=): Can have multiple
	- Region (r=): Can have multiple
	- Language (lang=)
	- Always add: st=&e=&section=keywords&dups=0&sort=-timestamp

	Available filters:
	""" + json.dumps(filter_options, indent=2) + """

	Example queries and URLs:
	"""

	for shot in example_shots:
	url_prompt += f"\nQuery: {shot['query']}\nURL: {shot['url']}\n"

	url_prompt += "\nGenerate a valid URL for this query. Return ONLY the complete URL."

	url_messages = [
	{"role": "system", "content": url_prompt},
	{"role": "user", "content": message}
	]

	try:
	completion = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=url_messages,
	temperature=0
	)
	url_response = completion.choices[0].message.content.strip()
	print(f"Generated URL: {url_response}")

	if url_response.startswith("http"):
	return f"Here are the filtered articles you requested:\n{url_response}"
	else:
	return "I couldn't create a proper filter URL. Please try rephrasing your request."
	except Exception as e:
	print(f"Error creating filter URL: {str(e)}")
	return "I couldn't create a proper filter URL. Please try rephrasing your request."

	else: # QUERY
	try:
	response = get_response(message, rerank_type="crossencoder")
	if not response or len(response) != 5:
	print(f"Invalid response format: {response}")
	return "I apologize, but I couldn't find relevant information. Please try rephrasing your question."

	responder, links, titles, domains, published_dates = response

	if not responder:
	print("Empty response content")
	return "I apologize, but I couldn't generate a meaningful response. Please try rephrasing your question."

	response_parts = responder.split("References:")
	main_response = response_parts[0].strip()

	if not any([links, titles, domains, published_dates]):
	print("Missing citation data")
	return main_response # Return just the response without citations

	hyperlinks = [
	f"[{i}] <a href='{link}' target='_blank'>{title}</a> ({domain}) {date}"
	for i, (link, title, domain, date) in
	enumerate(zip(links, titles, domains, published_dates), 1)
	if link and title and domain # Only create links for complete data
	]

	if hyperlinks:
	return f"{main_response}\n\nReferences:\n{chr(10).join(hyperlinks)}"
	return main_response

	except Exception as e:
	print(f"Error in QUERY handling: {str(e)}")
	return "I apologize, but I encountered an error processing your request. Please try again."

	except Exception as e:
	print(f"Error in predict: {str(e)}")
	return "I apologize, but I encountered an error processing your request. Please try again."

	# Initialize and launch Gradio interface
	gr.ChatInterface(
	predict,
	examples=[
	"What is Tobacco Watcher?",
	"How do I use the search filters?",
	"Show me articles about smoking in India from 2023",
	"Find French articles about e-cigarettes",
	"What are the health effects of secondhand smoke?",
	"Show me articles about tobacco industry in Eastern Europe",
	],
	title="Tobacco Watcher Chatbot",
	description="Ask questions about tobacco-related topics, get help with navigation, or learn about Tobacco Watcher."
	).launch()