|
from haystack.components.generators import OpenAIGenerator |
|
from haystack.utils import Secret |
|
from haystack.components.builders.prompt_builder import PromptBuilder |
|
from haystack.components.routers import ConditionalRouter |
|
from haystack import Pipeline |
|
|
|
from haystack.components.embedders import SentenceTransformersTextEmbedder |
|
|
|
|
|
|
|
from haystack_integrations.document_stores.chroma import ChromaDocumentStore |
|
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever |
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
embedding_model = "Alibaba-NLP/gte-multilingual-base" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
document_store = ChromaDocumentStore( |
|
persist_path="vstore_4012" |
|
) |
|
|
|
|
|
|
|
|
|
no_answer_message = ( |
|
"I'm not allowed to answer this question. Please ask something related to " |
|
"APIs access in accordance DSA’s transparency and data-sharing provisions. " |
|
"Is there anything else I can do for you? " |
|
) |
|
|
|
relevance_prompt_template = """ |
|
Classify whether this user is asking for something related to social media APIs, |
|
the Digital Services Act (DSA), or any topic related to online platforms’ compliance |
|
with legal and data-sharing frameworks. |
|
|
|
Relevant topics include: |
|
- Social media API access |
|
- Data transparency |
|
- Compliance with DSA provisions |
|
- Online platform regulations |
|
|
|
Here is their message: |
|
{{query}} |
|
|
|
Here are the two previous messages. ONLY refer to these if the above message refers previous ones. |
|
|
|
{% for message in user_history[-2:] %} |
|
* {{message["content"]}} |
|
|
|
{% endfor %} |
|
|
|
Instructions: |
|
- Respond with “YES” if the query pertains to any of the relevant topics listed above and not mixed with off-topic content. |
|
- Respond with “NO” if the query is off-topic and does not relate to the topics listed above. |
|
|
|
Examples: |
|
- Query: "How does the DSA affect API usage?" |
|
- Response: "YES" |
|
|
|
- Query: "How to make a pancake with APIs?" |
|
- Response: "NO" |
|
|
|
""" |
|
|
|
routes = [ |
|
{ |
|
"condition": "{{'YES' in replies[0]}}", |
|
"output": "{{query}}", |
|
"output_name": "query", |
|
"output_type": str, |
|
}, |
|
{ |
|
"condition": "{{'NO' in replies[0]}}", |
|
"output": no_answer_message, |
|
"output_name": "no_answer", |
|
"output_type": str, |
|
} |
|
] |
|
|
|
query_prompt_template = """ |
|
Conversation history: |
|
{{conv_history}} |
|
|
|
Here is what the user has requested: |
|
{{query}} |
|
|
|
Instructions: |
|
- Craft a concise, short informative answer to the user's request using the information provided below. |
|
- Synthesize the key points into a seamless response that appears as your own expert knowledge. |
|
- Avoid direct quotes or explicit references to the documents. |
|
- You are directly answering the user's query. |
|
|
|
Relevant Information: |
|
{% for document in documents %} |
|
- {{ document.content }} |
|
{% endfor %} |
|
|
|
""" |
|
|
|
def setup_generator(model_name, api_key_env_var="OPENAI_API_KEY", max_tokens=8192): |
|
return OpenAIGenerator( |
|
api_key=Secret.from_env_var(api_key_env_var), |
|
model=model_name, |
|
generation_kwargs={"max_tokens": max_tokens} |
|
) |
|
|
|
|
|
llm = setup_generator("gpt-4o-mini", max_tokens=30) |
|
llm2 = setup_generator("gpt-4o-mini") |
|
|
|
|
|
embedder = SentenceTransformersTextEmbedder(model=embedding_model, trust_remote_code=True) |
|
retriever = ChromaEmbeddingRetriever(document_store) |
|
|
|
router = ConditionalRouter(routes=routes) |
|
prompt_builder = PromptBuilder(template=relevance_prompt_template) |
|
prompt_builder2 = PromptBuilder(template=query_prompt_template) |
|
|
|
|
|
answer_query = Pipeline() |
|
|
|
answer_query.add_component("prompt_builder", prompt_builder) |
|
answer_query.add_component("llm", llm) |
|
answer_query.add_component("router", router) |
|
answer_query.add_component("embedder", embedder) |
|
answer_query.add_component("retriever", retriever) |
|
answer_query.add_component("prompt_builder2", prompt_builder2) |
|
answer_query.add_component("llm2", llm2) |
|
|
|
answer_query.connect("prompt_builder", "llm") |
|
answer_query.connect("llm", "router") |
|
answer_query.connect("router.query", "embedder") |
|
answer_query.connect("embedder", "retriever") |
|
answer_query.connect("retriever", "prompt_builder2") |
|
answer_query.connect("prompt_builder2", "llm2") |
|
|
|
answer_query.warm_up() |
|
|
|
|
|
|
|
|
|
|
|
|
|
def chat(message, history): |
|
""" |
|
Chat function for Gradio. Uses the pipeline to produce next answer. |
|
""" |
|
conv_history = "\n\n".join([f'{message["role"]}: {message["content"]}' for message in history[-2:]]) |
|
user_history = [message for message in history if message["role"] == "user"] |
|
results = answer_query.run({"user_history": user_history, |
|
"query": message, |
|
"conv_history": conv_history, |
|
"top_k":3}) |
|
if "llm2" in results: |
|
answer = results["llm2"]["replies"][0] |
|
elif "router" in results and "no_answer" in results["router"]: |
|
answer = results["router"]["no_answer"] |
|
else: |
|
answer = "Sorry, a mistake occured" |
|
return answer |
|
|
|
if __name__ == "__main__": |
|
interface = gr.ChatInterface( |
|
fn=chat, |
|
type="messages", |
|
title="40.12 Chatbot", |
|
description="Ask me anything about social media APIs, the Digital Services Act (DSA), or online platform regulations.") |
|
|
|
interface.launch() |