|
!pip install torch==2.3.0 torchvision torchaudio -f https://download.pytorch.org/whl/cu121/torch_stable.html |
|
|
|
import sys |
|
print("Python version:", sys.version) |
|
|
|
|
|
import torch |
|
print("PyTorch version:", torch.__version__) |
|
|
|
!pip install "unsloth[cu121-torch230] @ git+https://github.com/unslothai/unsloth.git" |
|
|
|
!pip install triton |
|
|
|
from unsloth import FastLanguageModel |
|
|
|
import torch |
|
max_seq_length = 2048 |
|
dtype = None |
|
load_in_4bit = True |
|
|
|
|
|
from transformers import AutoTokenizer |
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name="DipeshChaudhary/ShareGPTChatBot-Counselchat1", |
|
max_seq_length=max_seq_length, |
|
dtype=dtype, |
|
load_in_4bit=load_in_4bit, |
|
) |
|
|
|
|
|
from unsloth.chat_templates import get_chat_template |
|
|
|
|
|
tokenizer = get_chat_template( |
|
tokenizer, |
|
chat_template = "llama-3", |
|
mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, |
|
) |
|
|
|
|
|
import re |
|
FastLanguageModel.for_inference(model) |
|
|
|
messages = [ |
|
{"from": "human", "value": "hlo"}, |
|
] |
|
inputs = tokenizer.apply_chat_template( |
|
messages, |
|
tokenize = True, |
|
add_generation_prompt = True, |
|
return_tensors = "pt", |
|
).to("cuda") |
|
|
|
|
|
from transformers import TextStreamer |
|
text_streamer = TextStreamer(tokenizer) |
|
|
|
|
|
x= model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True) |
|
|
|
|
|
|
|
def generate_response(conversation_history): |
|
|
|
inputs = tokenizer.apply_chat_template(conversation_history, |
|
tokenize = True, |
|
add_generation_prompt = True, |
|
return_tensors = "pt", |
|
).to("cuda") |
|
text_streamer = TextStreamer(tokenizer) |
|
|
|
|
|
|
|
if tokenizer.pad_token_id is None: |
|
tokenizer.pad_token_id = tokenizer.eos_token_id |
|
|
|
|
|
output = model.generate( |
|
inputs, |
|
max_new_tokens=10000, |
|
use_cache=True, |
|
pad_token_id=tokenizer.pad_token_id, |
|
attention_mask=inputs.ne(tokenizer.pad_token_id) |
|
) |
|
|
|
|
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
|
|
bot_response = decoded_output.split("assistant")[-1].strip() |
|
|
|
return bot_response |
|
|
|
|
|
conversation_history = [] |
|
while True: |
|
user_input = input("User: ") |
|
if user_input.lower() == "exit": |
|
print("Exiting...") |
|
break |
|
|
|
|
|
conversation_history.append({"from": "human", "value": user_input}) |
|
|
|
|
|
response = generate_response(conversation_history) |
|
|
|
|
|
conversation_history.append({"from": "bot", "value": response}) |
|
|
|
|
|
print("Bot:", response) |