import os from groq import Groq import gradio as gr from transformers import AutoModel, AutoConfig hf_token = os.getenv("HF_TOKEN") # Make sure you set this environment variable try: config = AutoConfig.from_pretrained("HusseinEid/llama-3-chatbot", config_file_name="config.json", use_auth_token=hf_token) model = AutoModel.from_pretrained("HusseinEid/lora_model", config=config, use_auth_token=hf_token) except OSError as e: print(f"Error: {e}") client = Groq(api_key = os.environ.get("GROQ_API_KEY"), ) system_prompt = { "role": "system", "content": "You are a useful assistant. You reply with detailed answers. " } async def chat_groq(message, history): messages = [system_prompt] for msg in history: messages.append({"role": "user", "content": str(msg[0])}) messages.append({"role": "assistant", "content": str(msg[1])}) messages.append({"role": "user", "content": str (message)}) response_content = '' stream = client. chat.completions.create( model=model, messages=messages, max_tokens=1024, temperature=1.2, stream=True ) for chunk in stream: content = chunk.choices[0].delta.content if content: response_content += chunk. choices[0].delta.content yield response_content with gr. Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo: gr.ChatInterface( chat_groq, clear_btn=None, undo_btn=None, retry_btn=None, ) demo.queue() demo.launch()