|
import gradio as gr |
|
|
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline |
|
from threading import Thread |
|
|
|
|
|
checkpoint = "microsoft/phi-2" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True) |
|
|
|
|
|
phi2 = pipeline( |
|
"text-generation", |
|
tokenizer=tokenizer, |
|
model=model, |
|
pad_token_id=tokenizer.eos_token_id, |
|
eos_token_id=tokenizer.eos_token_id, |
|
device_map="cpu" |
|
) |
|
|
|
from dspy.agents import Agent |
|
from dspy import spawn_processes |
|
from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer |
|
|
|
def dspy_generate_agent_prompts(prompt): |
|
""" |
|
Generates prompts for different agents based on the provided prompt and DSPy functionalities. |
|
|
|
Args: |
|
prompt (str): The user-provided prompt (e.g., customer reviews). |
|
|
|
Returns: |
|
list: A list containing agent-specific prompts. |
|
""" |
|
|
|
|
|
sentences = SentenceSplitter().process(prompt) |
|
|
|
|
|
sentiment_analyzer = SentimentAnalyzer() |
|
sentiment_labels = [] |
|
for sentence in sentences: |
|
sentiment_labels.append(sentiment_analyzer.analyze(sentence)) |
|
|
|
|
|
ner = NamedEntityRecognizer(model_name="en_core_web_sm") |
|
extracted_entities = {} |
|
for sentence in sentences: |
|
entities = ner.process(sentence) |
|
for entity in entities: |
|
if entity.label_ in ["FOOD", "ORG", "LOCATION"]: |
|
extracted_entities.setdefault(entity.label_, []).append(entity.text) |
|
|
|
|
|
agent_prompts = [] |
|
|
|
|
|
sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences) |
|
agent_prompts.append(sentiment_prompt) |
|
|
|
|
|
topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}" |
|
agent_prompts.append(topic_prompt) |
|
|
|
|
|
positive_count = sum(label == "POSITIVE" for label in sentiment_labels) |
|
negative_count = sum(label == "NEGATIVE" for label in sentiment_labels) |
|
neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels) |
|
topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items()) |
|
|
|
recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for organic farming methods to address user's concerns in their location.""" |
|
agent_prompts.append(recommendation_prompt) |
|
|
|
return agent_prompts |
|
|
|
|
|
def generate(message, chat_history, max_new_tokens): |
|
|
|
dspy_generate_agent_prompts(message) == synth_message |
|
instruction = "You are a helpful organic farming assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'. You are an agricultural assistant committed to regenerative practices. You are being supplied with a list of tasks which you will need to walk the user through with the compassionate heart of a teacher and using easily understandable language." |
|
final_prompt = f"Instruction: {instruction} {synth_message}\n" |
|
|
|
for sent, received in chat_history: |
|
final_prompt += "User: " + sent + "\n" |
|
final_prompt += "Assistant: " + received + "\n" |
|
|
|
final_prompt += "User: " + message + "\n" |
|
final_prompt += "Output:" |
|
|
|
if len(tokenizer.tokenize(final_prompt)) >= tokenizer.model_max_length - max_new_tokens: |
|
final_prompt = "Instruction: Say 'Input exceeded context size, please clear the chat history and retry!' Output:" |
|
|
|
|
|
streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0) |
|
thread = Thread(target=phi2, kwargs={"text_inputs":final_prompt, "max_new_tokens":max_new_tokens, "streamer":streamer}) |
|
thread.start() |
|
|
|
generated_text = "" |
|
for word in streamer: |
|
generated_text += word |
|
response = generated_text.strip() |
|
|
|
if "User:" in response: |
|
response = response.split("User:")[0].strip() |
|
|
|
if "Assistant:" in response: |
|
response = response.split("Assistant:")[1].strip() |
|
|
|
yield response |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(""" |
|
# LEAP Phi-2 Agentic Chatbot Demo |
|
This multi-agent chatbot was created for LEAP hackathon, to offer interfce with a team of experts for organic farming advice, using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model and DSPy synthetic prompt agentics. |
|
|
|
In order to reduce the response time on this hardware, `max_new_tokens` has been set to `21` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text. |
|
""") |
|
|
|
tokens_slider = gr.Slider(8, 128, value=21, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.") |
|
|
|
chatbot = gr.ChatInterface( |
|
fn=generate, |
|
additional_inputs=[tokens_slider], |
|
stop_btn=None, |
|
examples=[["Who is Leonhard Euler?"]] |
|
) |
|
|
|
demo.queue().launch() |