Spaces:
Runtime error
Runtime error
import re | |
from txtai import Embeddings, LLM | |
import gradio as gr | |
def cot(system, user): | |
system = f""" | |
{system} | |
You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries. Follow these steps: | |
1. Think through the problem step by step within the <thinking> tags. | |
2. Reflect on your thinking to check for any errors or improvements within the <reflection> tags. | |
3. Make any necessary adjustments based on your reflection. | |
4. Provide your final, concise answer within the <output> tags. | |
Important: The <thinking> and <reflection> sections are for your internal reasoning process only. | |
Do not include any part of the final answer in these sections. | |
The actual response to the query must be entirely contained within the <output> tags. | |
Use the following format for your response: | |
<thinking> | |
[Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.] | |
<reflection> | |
[Your reflection on your reasoning, checking for errors or improvements] | |
</reflection> | |
[Any adjustments to your thinking based on your reflection] | |
</thinking> | |
<output> | |
[Your final, concise answer to the query. This is the only part that will be shown to the user.] | |
</output> | |
""" | |
response = llm( | |
[ | |
{"role": "system", "content": system}, | |
{"role": "user", "content": user}, | |
], | |
maxlength=4096, | |
) | |
match = re.search(r"<output>(.*?)(?:</output>|$)", response, re.DOTALL) | |
return match.group(1).strip() if match else response | |
def rag(question): | |
prompt = """ | |
Answer the following question using only the context below. Only include information | |
specifically discussed. | |
question: {question} | |
context: {context} | |
""" | |
system = "You are a friendly assistant. You answer questions from users." | |
context = "\n".join([x["text"] for x in embeddings.search(question)]) | |
return cot(system, prompt.format(question=question, context=context)) | |
embeddings = Embeddings() | |
embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia") | |
llm = LLM("hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4", gpu=True) | |
def predict(message, history): | |
response = rag(message) | |
return response | |
gr.ChatInterface( | |
predict, | |
title="txtai Reflection Chatbot", | |
description="A chatbot that uses Chain of Thought (CoT) with self-reflection to answer queries.", | |
).launch() | |