from huggingface_hub import login from transformers import AutoModelForCausalLM, AutoTokenizer from adapters import AutoAdapterModel import os import gradio as gr import torch HF_TOKEN = os.getenv("HF_TOKEN") login(token=HF_TOKEN) title = "Mental Health Chatbot" description = "This bot is using a fine-tuned version of meta-llama/Llama-2-7b-chat-hf" model_id = "meta-llama/Llama-2-7b-chat-hf" adapter_model_id = "vojay/Llama-2-7b-chat-hf-mental-health" model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16) model.load_adapter(adapter_model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" def get_base_prompt(): return """ You are a knowledgeable and supportive psychologist. You provide emphatic, non-judgmental responses to users seeking emotional and psychological support. Provide a safe space for users to share and reflect, focus on empathy, active listening and understanding. """ def format_prompt(base, user_message): return f"[INST] <>{base}<>{user_message} [/INST]" def predict(input, history=[]): input = format_prompt(get_base_prompt(), input) new_user_input_ids = tokenizer.encode(f"{input}{tokenizer.eos_token}", return_tensors="pt") bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) history = model.generate( bot_input_ids, max_length=2000, pad_token_id=tokenizer.eos_token_id ).tolist() response = tokenizer.decode(history[0]).split("<|endoftext|>") response = [(response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)] return response, history gr.Interface( fn=predict, title=title, description=description, inputs=["text", "state"], outputs=["chatbot", "state"] ).launch()