File size: 8,735 Bytes
4c7c1f7
 
 
adef65e
 
5e43d3e
799113d
adef65e
 
799113d
4c7c1f7
5e43d3e
adef65e
 
 
5e43d3e
 
 
 
 
 
 
 
 
 
 
 
 
4c7c1f7
 
5e43d3e
 
 
adef65e
 
5e43d3e
adef65e
 
5e43d3e
adef65e
 
5e43d3e
adef65e
 
5e43d3e
adef65e
 
5e43d3e
adef65e
 
5e43d3e
 
adef65e
 
 
 
 
 
5e43d3e
 
 
 
adef65e
 
5e43d3e
adef65e
 
5e43d3e
adef65e
 
 
 
 
 
5e43d3e
 
 
adef65e
 
5e43d3e
adef65e
 
5e43d3e
adef65e
 
 
 
 
 
5e43d3e
 
 
 
 
 
 
a084a92
5e43d3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adef65e
 
 
5e43d3e
 
 
adef65e
 
 
 
 
 
5e43d3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c7c1f7
 
 
 
 
5e43d3e
 
 
 
 
 
4c7c1f7
5e43d3e
 
4c7c1f7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import gradio as gr
from full_chain import get_response
import os
import json
from openai import OpenAI
import json

# from langchain_openai import ChatOpenAI
# from langchain.schema import HumanMessage, SystemMessage
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Initialize OpenAI client
client = OpenAI()  # It will automatically use OPENAI_API_KEY from environment

def load_content(filename):
    """Load content from text files"""
    with open(os.path.join("prompts", filename), "r", encoding="utf-8") as f:
        return f.read()

def load_filter_options():
    with open(os.path.join("prompts", "filter_options.json"), "r") as f:
        return json.load(f)

def load_example_shots():
    with open(os.path.join("prompts", "shots.json"), "r") as f:
        return json.load(f)

def predict(message, history):
    """Process user message and return appropriate response."""
    try:
        # Query classification prompt
        classifier_messages = [
            {"role": "system", "content": """You are the Tobacco Watcher Assistant. Analyze the user's query and categorize it into exactly ONE of these types:



            1. HELP - Questions about using the website, its features, or navigation

            Example: "How do I use filters?", "How to search for articles?"



            2. ABOUT - Questions about Tobacco Watcher's purpose, mission, or organization

            Example: "What is Tobacco Watcher?", "Who runs this website?"



            3. FILTER - Requests for specific articles using filters

            Example: "Show articles about smoking in India from 2023", "Find French articles about e-cigarettes"



            4. QUERY - Questions seeking tobacco-related information

            Example: "How many people smoke in Asia?", "What are the effects of secondhand smoke?"



            Respond with ONLY the category name (HELP, ABOUT, FILTER, or QUERY)."""},
            {"role": "user", "content": message}
        ]
        
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=classifier_messages,
            temperature=0
        )
        query_type = completion.choices[0].message.content.strip().upper()
        print(f"Query type: {query_type}")

        if query_type == "HELP":
            help_content = load_content("help.txt")
            help_messages = [
                {"role": "system", "content": """You are the Tobacco Watcher Help Assistant.

Use the provided help content to guide users on how to use the platform's features.

Be clear and specific in your instructions. If a feature isn't mentioned in the content, acknowledge that and suggest contacting support."""},
                {"role": "user", "content": f"Using this help content:\n\n{help_content}\n\nAnswer this question: {message}"}
            ]
            completion = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=help_messages,
                temperature=0
            )
            return completion.choices[0].message.content

        elif query_type == "ABOUT":
            about_content = load_content("about.txt")
            about_messages = [
                {"role": "system", "content": """You are the Tobacco Watcher Assistant specializing in explaining the platform.

Use the provided content to answer questions about Tobacco Watcher's purpose, mission, features, and organization.

Be concise but informative. If a specific detail isn't in the content, say so rather than making assumptions."""},
                {"role": "user", "content": f"Using this content:\n\n{about_content}\n\nAnswer this question: {message}"}
            ]
            completion = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=about_messages,
                temperature=0
            )
            return completion.choices[0].message.content

        elif query_type == "FILTER":
            filter_options = load_filter_options()
            example_shots = load_example_shots()
            
            url_prompt = """Generate a Tobacco Watcher article URL based on the query. Follow these rules:



            1. Base URL: https://tobaccowatcher.globaltobaccocontrol.org/articles/

            2. Parameters:

               - Subject (c=): Can have multiple

               - Product (pro=): Can have multiple

               - Region (r=): Can have multiple

               - Language (lang=)

               - Always add: st=&e=&section=keywords&dups=0&sort=-timestamp



            Available filters:

            """ + json.dumps(filter_options, indent=2) + """



            Example queries and URLs:

            """
            
            for shot in example_shots:
                url_prompt += f"\nQuery: {shot['query']}\nURL: {shot['url']}\n"
            
            url_prompt += "\nGenerate a valid URL for this query. Return ONLY the complete URL."
            
            url_messages = [
                {"role": "system", "content": url_prompt},
                {"role": "user", "content": message}
            ]
            
            try:
                completion = client.chat.completions.create(
                    model="gpt-4o-mini",
                    messages=url_messages,
                    temperature=0
                )
                url_response = completion.choices[0].message.content.strip()
                print(f"Generated URL: {url_response}")
                
                if url_response.startswith("http"):
                    return f"Here are the filtered articles you requested:\n{url_response}"
                else:
                    return "I couldn't create a proper filter URL. Please try rephrasing your request."
            except Exception as e:
                print(f"Error creating filter URL: {str(e)}")
                return "I couldn't create a proper filter URL. Please try rephrasing your request."

        else:  # QUERY
            try:
                response = get_response(message, rerank_type="crossencoder")
                if not response or len(response) != 5:
                    print(f"Invalid response format: {response}")
                    return "I apologize, but I couldn't find relevant information. Please try rephrasing your question."

                responder, links, titles, domains, published_dates = response
                
                if not responder:
                    print("Empty response content")
                    return "I apologize, but I couldn't generate a meaningful response. Please try rephrasing your question."
                    
                response_parts = responder.split("References:")
                main_response = response_parts[0].strip()
                
                if not any([links, titles, domains, published_dates]):
                    print("Missing citation data")
                    return main_response  # Return just the response without citations
                    
                hyperlinks = [
                    f"[{i}] <a href='{link}' target='_blank'>{title}</a> ({domain}) {date}"
                    for i, (link, title, domain, date) in 
                    enumerate(zip(links, titles, domains, published_dates), 1)
                    if link and title and domain  # Only create links for complete data
                ]
                
                if hyperlinks:
                    return f"{main_response}\n\nReferences:\n{chr(10).join(hyperlinks)}"
                return main_response
                
            except Exception as e:
                print(f"Error in QUERY handling: {str(e)}")
                return "I apologize, but I encountered an error processing your request. Please try again."

    except Exception as e:
        print(f"Error in predict: {str(e)}")
        return "I apologize, but I encountered an error processing your request. Please try again."

# Initialize and launch Gradio interface
gr.ChatInterface(
    predict,
    examples=[
        "What is Tobacco Watcher?",
        "How do I use the search filters?",
        "Show me articles about smoking in India from 2023",
        "Find French articles about e-cigarettes",
        "What are the health effects of secondhand smoke?",
        "Show me articles about tobacco industry in Eastern Europe",
    ],
    title="Tobacco Watcher Chatbot",
    description="Ask questions about tobacco-related topics, get help with navigation, or learn about Tobacco Watcher."
).launch()