MJannik's picture
Update app.py
9fabe04 verified
import gradio as gr
import json
import uuid
from langfuse import Langfuse
from langfuse.decorators import observe, langfuse_context
from langfuse.openai import openai, OpenAI
import os
# set Langfuse api keys and host address
LANGFUSE_PUBLIC_KEY = os.getenv('LANGFUSE_PUBLIC_KEY')
LANGFUSE_SECRET_KEY = os.getenv('LANGFUSE_SECRET_KEY')
LANGFUSE_HOST = os.getenv('LANGFUSE_HOST')
# initialize the client but point it to TGI
client = OpenAI(
base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" + "/v1/", # replace with your endpoint url
api_key= os.getenv('HUGGINGFACE_ACCESS_TOKEN'), # replace with your token
)
langfuse = Langfuse()
session_id = None
def set_new_session_id():
global session_id
session_id = str(uuid.uuid4())
# Initialize
set_new_session_id()
# Global reference for the current trace_id which is used to later add user feedback
current_trace_id = None
# Add decorator here to capture overall timings, input/output, and manipulate trace metadata via `langfuse_context`
@observe()
async def create_response(
prompt: str,
history,
):
# Save trace id in global var to add feedback later
global current_trace_id
current_trace_id = langfuse_context.get_current_trace_id()
# Add session_id to Langfuse Trace to enable session tracking
global session_id
langfuse_context.update_current_trace(
name="gradio_demo_chat",
session_id=session_id,
input=prompt,
)
# Add prompt to history
if not history:
history = [{"role": "system", "content": "You are a friendly chatbot"}]
history.append({"role": "user", "content": prompt})
yield history
# Get completion via OpenAI SDK
# Auto-instrumented by Langfuse via the import, see alternative in note above
response = {"role": "assistant", "content": ""}
oai_response = client.chat.completions.create(
messages=history,
model="tgi",
)
response["content"] = oai_response.choices[0].message.content or ""
# Customize trace ouput for better readability in Langfuse Sessions
langfuse_context.update_current_trace(
output=response["content"],
)
yield history + [response]
async def respond(prompt: str, history):
async for message in create_response(prompt, history):
yield message
def handle_like(data: gr.LikeData):
global current_trace_id
if data.liked:
langfuse.score(value=1, name="user-feedback", trace_id=current_trace_id)
else:
langfuse.score(value=0, name="user-feedback", trace_id=current_trace_id)
async def handle_retry(history, retry_data: gr.RetryData):
new_history = history[: retry_data.index]
previous_prompt = history[retry_data.index]["content"]
async for message in respond(previous_prompt, new_history):
yield message
with gr.Blocks() as demo:
gr.Markdown("# Chatbot using 🤗 Gradio + 🪢 Langfuse")
chatbot = gr.Chatbot(
label="Chat",
type="messages",
show_copy_button=True,
avatar_images=(
None,
"https://static.langfuse.com/cookbooks/gradio/hf-logo.png",
),
)
prompt = gr.Textbox(max_lines=1, label="Chat Message")
prompt.submit(respond, [prompt, chatbot], [chatbot])
chatbot.retry(handle_retry, chatbot, [chatbot])
chatbot.like(handle_like, None, None)
chatbot.clear(set_new_session_id)
if __name__ == "__main__":
demo.launch(share=True, debug=True)