import gradio as gr from openai import OpenAI import threading pause_event = threading.Event() resume_event = threading.Event() def predict(message, history, character, api_key, progress=gr.Progress()): client = OpenAI(api_key=api_key) history_openai_format = [] for human, assistant in history: history_openai_format.append({"role": "user", "content": human}) history_openai_format.append({"role": "assistant", "content": assistant}) history_openai_format.append({"role": "user", "content": message}) response = client.chat.completions.create( model='gpt-4o', messages=history_openai_format, temperature=1.0, stream=True ) partial_message = "" for chunk in progress.tqdm(response, desc="Generating"): while pause_event.is_set(): resume_event.wait() if chunk.choices[0].delta.content: partial_message += chunk.choices[0].delta.content yield partial_message def pause(): pause_event.set() resume_event.clear() def resume(): pause_event.clear() resume_event.set() def reset(character): return [], [] # Gradio app with gr.Blocks() as demo: gr.Markdown(f"