File size: 8,735 Bytes
4c7c1f7 adef65e 5e43d3e 799113d adef65e 799113d 4c7c1f7 5e43d3e adef65e 5e43d3e 4c7c1f7 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e adef65e 5e43d3e a084a92 5e43d3e adef65e 5e43d3e adef65e 5e43d3e 4c7c1f7 5e43d3e 4c7c1f7 5e43d3e 4c7c1f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import gradio as gr
from full_chain import get_response
import os
import json
from openai import OpenAI
import json
# from langchain_openai import ChatOpenAI
# from langchain.schema import HumanMessage, SystemMessage
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Initialize OpenAI client
client = OpenAI() # It will automatically use OPENAI_API_KEY from environment
def load_content(filename):
"""Load content from text files"""
with open(os.path.join("prompts", filename), "r", encoding="utf-8") as f:
return f.read()
def load_filter_options():
with open(os.path.join("prompts", "filter_options.json"), "r") as f:
return json.load(f)
def load_example_shots():
with open(os.path.join("prompts", "shots.json"), "r") as f:
return json.load(f)
def predict(message, history):
"""Process user message and return appropriate response."""
try:
# Query classification prompt
classifier_messages = [
{"role": "system", "content": """You are the Tobacco Watcher Assistant. Analyze the user's query and categorize it into exactly ONE of these types:
1. HELP - Questions about using the website, its features, or navigation
Example: "How do I use filters?", "How to search for articles?"
2. ABOUT - Questions about Tobacco Watcher's purpose, mission, or organization
Example: "What is Tobacco Watcher?", "Who runs this website?"
3. FILTER - Requests for specific articles using filters
Example: "Show articles about smoking in India from 2023", "Find French articles about e-cigarettes"
4. QUERY - Questions seeking tobacco-related information
Example: "How many people smoke in Asia?", "What are the effects of secondhand smoke?"
Respond with ONLY the category name (HELP, ABOUT, FILTER, or QUERY)."""},
{"role": "user", "content": message}
]
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=classifier_messages,
temperature=0
)
query_type = completion.choices[0].message.content.strip().upper()
print(f"Query type: {query_type}")
if query_type == "HELP":
help_content = load_content("help.txt")
help_messages = [
{"role": "system", "content": """You are the Tobacco Watcher Help Assistant.
Use the provided help content to guide users on how to use the platform's features.
Be clear and specific in your instructions. If a feature isn't mentioned in the content, acknowledge that and suggest contacting support."""},
{"role": "user", "content": f"Using this help content:\n\n{help_content}\n\nAnswer this question: {message}"}
]
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=help_messages,
temperature=0
)
return completion.choices[0].message.content
elif query_type == "ABOUT":
about_content = load_content("about.txt")
about_messages = [
{"role": "system", "content": """You are the Tobacco Watcher Assistant specializing in explaining the platform.
Use the provided content to answer questions about Tobacco Watcher's purpose, mission, features, and organization.
Be concise but informative. If a specific detail isn't in the content, say so rather than making assumptions."""},
{"role": "user", "content": f"Using this content:\n\n{about_content}\n\nAnswer this question: {message}"}
]
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=about_messages,
temperature=0
)
return completion.choices[0].message.content
elif query_type == "FILTER":
filter_options = load_filter_options()
example_shots = load_example_shots()
url_prompt = """Generate a Tobacco Watcher article URL based on the query. Follow these rules:
1. Base URL: https://tobaccowatcher.globaltobaccocontrol.org/articles/
2. Parameters:
- Subject (c=): Can have multiple
- Product (pro=): Can have multiple
- Region (r=): Can have multiple
- Language (lang=)
- Always add: st=&e=§ion=keywords&dups=0&sort=-timestamp
Available filters:
""" + json.dumps(filter_options, indent=2) + """
Example queries and URLs:
"""
for shot in example_shots:
url_prompt += f"\nQuery: {shot['query']}\nURL: {shot['url']}\n"
url_prompt += "\nGenerate a valid URL for this query. Return ONLY the complete URL."
url_messages = [
{"role": "system", "content": url_prompt},
{"role": "user", "content": message}
]
try:
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=url_messages,
temperature=0
)
url_response = completion.choices[0].message.content.strip()
print(f"Generated URL: {url_response}")
if url_response.startswith("http"):
return f"Here are the filtered articles you requested:\n{url_response}"
else:
return "I couldn't create a proper filter URL. Please try rephrasing your request."
except Exception as e:
print(f"Error creating filter URL: {str(e)}")
return "I couldn't create a proper filter URL. Please try rephrasing your request."
else: # QUERY
try:
response = get_response(message, rerank_type="crossencoder")
if not response or len(response) != 5:
print(f"Invalid response format: {response}")
return "I apologize, but I couldn't find relevant information. Please try rephrasing your question."
responder, links, titles, domains, published_dates = response
if not responder:
print("Empty response content")
return "I apologize, but I couldn't generate a meaningful response. Please try rephrasing your question."
response_parts = responder.split("References:")
main_response = response_parts[0].strip()
if not any([links, titles, domains, published_dates]):
print("Missing citation data")
return main_response # Return just the response without citations
hyperlinks = [
f"[{i}] <a href='{link}' target='_blank'>{title}</a> ({domain}) {date}"
for i, (link, title, domain, date) in
enumerate(zip(links, titles, domains, published_dates), 1)
if link and title and domain # Only create links for complete data
]
if hyperlinks:
return f"{main_response}\n\nReferences:\n{chr(10).join(hyperlinks)}"
return main_response
except Exception as e:
print(f"Error in QUERY handling: {str(e)}")
return "I apologize, but I encountered an error processing your request. Please try again."
except Exception as e:
print(f"Error in predict: {str(e)}")
return "I apologize, but I encountered an error processing your request. Please try again."
# Initialize and launch Gradio interface
gr.ChatInterface(
predict,
examples=[
"What is Tobacco Watcher?",
"How do I use the search filters?",
"Show me articles about smoking in India from 2023",
"Find French articles about e-cigarettes",
"What are the health effects of secondhand smoke?",
"Show me articles about tobacco industry in Eastern Europe",
],
title="Tobacco Watcher Chatbot",
description="Ask questions about tobacco-related topics, get help with navigation, or learn about Tobacco Watcher."
).launch() |