import subprocess import os import warnings warnings.filterwarnings("ignore", message="The installed version of bitsandbytes was compiled without GPU support") warnings.filterwarnings("ignore", message="Setting share=True is not supported on Hugging Face Spaces") # Install packages using subprocess subprocess.run(["pip", "install", "transformers", "torch"], check=True) import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from typing import List, Optional, Tuple, Dict # Set up default system prompt and history/message structures default_system = 'You are Andrej Karpathy, the most helpful coding assistant and solves everything with working code.' History = List[Tuple[str, str]] Messages = List[Dict[str, str]] # Clear the chat session def clear_session() -> History: return '', [] # Modify the chat system's prompt def modify_system_session(system: str) -> Tuple[str, str, History]: if not system: system = default_system return system, system, [] # Convert the history of messages into a format suitable for model input def history_to_messages(history: History, system: str) -> Messages: messages = [{'role': 'system', 'content': system}] for user_msg, assistant_msg in history: messages.append({'role': 'user', 'content': user_msg}) messages.append({'role': 'assistant', 'content': assistant_msg}) return messages # Convert messages back into a history format after model processing def messages_to_history(messages: Messages) -> Tuple[str, History]: system = messages[0]['content'] history = [(msg['content'], messages[i+1]['content']) for i, msg in enumerate(messages[1:-1:2])] return system, history # Handle the chat interaction, including processing the input and generating a response def model_chat(query: str, history: History, system: str) -> Tuple[str, History, str]: messages = history_to_messages(history, system) messages.append({'role': 'user', 'content': query}) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_name = "codeqwen1.5-7b-chat" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name).to(device) inputs = tokenizer(query, return_tensors="pt").to(device) outputs = model.generate(inputs.input_ids, max_length=100, pad_token_id=tokenizer.eos_token_id) response = tokenizer.decode(outputs[0], skip_special_tokens=True) system, history = messages_to_history(messages + [{'role': 'assistant', 'content': response}]) return '', history, system # Setting up the Gradio interface with gr.Blocks() as demo: gr.Markdown("""
CodeQwen1.5-7B-Chat Bot๐Ÿ‘พ
""") with gr.Row(): with gr.Column(scale=3): system_input = gr.Textbox(value=default_system, lines=1, label='System') with gr.Column(scale=1): modify_system = gr.Button("๐Ÿ› ๏ธ Set system prompt and clear history", scale=2) system_state = gr.State(value=default_system) chatbot = gr.Chatbot(label='codeqwen1.5-7b-chat') textbox = gr.Textbox(lines=2, label='Input') with gr.Row(): clear_history = gr.Button("๐Ÿงน Clear History") submit = gr.Button("๐Ÿš€ Send") submit.click(model_chat, inputs=[textbox, chatbot, system_state], outputs=[textbox, chatbot, system_state]) clear_history.click(fn=clear_session, inputs=[], outputs=[textbox, chatbot]) modify_system.click(fn=modify_system_session, inputs=[system_input], outputs=[system_state, system_input, chatbot]) demo.queue(api_open=False) demo.launch(max_threads=30)