|
import torch |
|
import streamlit as st |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
model_name = "ybelkada/falcon-7b-sharded-bf16" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
device = "cpu" |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16, |
|
device_map=device |
|
) |
|
|
|
|
|
st.title("🦜 Falcon-7B Chatbot") |
|
st.write("Ask me anything!") |
|
|
|
|
|
if "chat_history" not in st.session_state: |
|
st.session_state.chat_history = [] |
|
|
|
|
|
user_input = st.text_input("You:", "") |
|
|
|
if user_input: |
|
|
|
inputs = tokenizer(user_input, return_tensors="pt") |
|
inputs.pop("token_type_ids", None) |
|
inputs = {key: value.to(device) for key, value in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
output = model.generate(**inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95) |
|
|
|
|
|
response = tokenizer.decode(output[:, inputs["input_ids"].shape[-1]:][0], skip_special_tokens=True) |
|
|
|
|
|
st.session_state.chat_history.append(("You", user_input)) |
|
st.session_state.chat_history.append(("Bot", response)) |
|
|
|
|
|
for sender, message in st.session_state.chat_history: |
|
st.write(f"**{sender}:** {message}") |
|
|