Spaces:

PabloVD
/

MedievalChatbot

Sleeping

App Files Files Community

MedievalChatbot / app.py

PabloVD

Add description to chatbot

16f3588 3 months ago

raw

history blame contribute delete

2.64 kB

	# Following https://python.langchain.com/docs/tutorials/chatbot/
	# Missing: trimming, streaming with memory, use multiple threads

	from langchain_mistralai import ChatMistralAI
	from langchain_core.rate_limiters import InMemoryRateLimiter
	from langgraph.checkpoint.memory import MemorySaver
	from langgraph.graph import START, MessagesState, StateGraph
	from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_core.messages import HumanMessage, AIMessage
	import gradio as gr

	# Prompt template
	prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"You talk like a person of the Middle Ages. Answer all questions to the best of your ability.",
	),
	MessagesPlaceholder(variable_name="messages"),
	]
	)

	# Rate limiter
	rate_limiter = InMemoryRateLimiter(
	requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
	check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
	max_bucket_size=10, # Controls the maximum burst size.
	)

	model = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)

	# Define a new graph
	workflow = StateGraph(state_schema=MessagesState)

	# Define the function that calls the model
	def call_model(state: MessagesState):
	chain = prompt \| model
	response = chain.invoke(state)
	return {"messages": response}

	# Define the (single) node in the graph
	workflow.add_edge(START, "model")
	workflow.add_node("model", call_model)

	# Add memory
	memory = MemorySaver()
	app = workflow.compile(checkpointer=memory)

	# Config with thread
	config = {"configurable": {"thread_id": "abc345"}}


	def handle_prompt(query, history):
	input_messages = [HumanMessage(query)]
	try:
	# Stream output
	# out=""
	# for chunk, metadata in app.stream({
	# "messages": input_messages},
	# config,
	# stream_mode="messages"):
	# if isinstance(chunk, AIMessage): # Filter to just model responses
	# out += chunk.content
	# yield out
	output = app.invoke({"messages": input_messages}, config)
	return output["messages"][-1].content
	except:
	raise gr.Error("Requests rate limit exceeded")

	description = "A MistralAI powered chatbot which talks in the way of ancient times, using Langchain and deployed with Gradio."

	demo = gr.ChatInterface(handle_prompt, type="messages", title="Medieval ChatBot", theme=gr.themes.Citrus(), description=description)

	demo.launch()