skaltenp's picture
Update app.py
dca1b8e verified
raw
history blame
2.56 kB
import gradio as gr
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import AutoPeftModelForCausalLM
from datasets import load_dataset
from huggingface_hub import login
login(token=os.environ.get('HF_TOKEN', None))
model_name = "skaltenp/Meta-Llama-3-8B-sepsis_cases-199900595"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="cuda",
trust_remote_code=True,
#token=True,
)
model.eval()
model.to("cuda")
#model = AutoPeftModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
train = load_dataset("skaltenp/sepsis_cases")["train"]
def prepare_sample_text(example, tokenizer, remove_indent=False, start=None, end=None):
"""Prepare the text from a sample of the dataset."""
thread = example["event_list"]
if start and end:
thread = thread[start:end]
text = ""
for message in thread:
text += f"{message}{tokenizer.eos_token}\n"
return text
dataset = load_dataset(
args.dataset_name,
token=True,
num_proc=args.num_workers,
download_mode='force_redownload'
)
train_data = dataset["train"].train_test_split(train_size=0.8, shuffle=True, seed=199900595)
test_data = train_data["test"]
train_data = train_data["train"].train_test_split(train_size=0.8, shuffle=True, seed=199900595)
valid_data = train_data["test"]
train_data = train_data["train"]
def generate_answer(question):
#inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
inputs = tokenizer(question, return_tensors="pt")
inputs.to("cuda")
outputs = model.generate(**inputs, max_length=2048, num_return_sequences=1, do_sample=True)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
return answer
iface = gr.Interface(
fn=generate_answer,
inputs="text",
outputs="text",
title="Straight Outta Logs",
examples = [prepare_sample_text(test_data[0], tokenizer, start=0, end=3), prepare_sample_text(test_data[4], tokenizer, start=0, end=5), prepare_sample_text(test_data[50], tokenizer, start=0, end=1)],
description="Use the examples or copy own sepsis case example",
)
iface.launch(share=True) # Deploy the interface