import gradio as gr from transformers import pipeline, set_seed import torch # Function to generate responses using the entire conversation history def generate_response(messages, model_name, sampling_temperature, max_tokens, top_p): generator = pipeline('text-generation', model=model_name, torch_dtype=torch.float16) set_seed(42) # You can set a different seed for reproducibility # Combine entire conversation history conversation = "" for message in messages: role = message['role'] content = message['content'] conversation += f"<|im_start|>{role}\n{content}<|im_end|>\n" # Generate response response = generator(conversation, max_length=2048, temperature=sampling_temperature, max_tokens=max_tokens, top_p=top_p, repetition_penalty=1.1, top_k=12) return [{'content': response[0]['generated_text'], 'role': 'assistant'}] # Gradio chatbot interface with conversation history iface = gr.Interface( fn=generate_response, inputs=[ gr.Chat("You", "Chatbot"), gr.Dropdown("Select Model", ["Locutusque/TinyMistral-248M-v2.5-Instruct", "Locutusque/Hercules-1.0-Mistral-7B", "Locutusque/UltraQwen-1_8B"]), gr.Slider("Sampling Temperature", 0.1, 2.0, 1.0, 0.1), gr.Slider("Max Tokens", 5, 200, 50, 5), gr.Slider("Top P", 0.1, 0.5, 0.75, 0.1) ], outputs=gr.Chat(role="Chatbot"), live=True, capture_session=True ) # Launch Gradio chatbot interface iface.launch()