Solshine's picture
Update app.py
70ba04d
raw
history blame
2.45 kB
import streamlit as st
import pandas as pd
from transformers import pipeline, AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
gr.Blocks(theme= 'pseudolab/huggingface-korea-theme')
#Note this should be used always in compliance with applicable laws and regulations if used with real patient data.
# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
# Load the PEFT model
peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed")
peft_model = MistralForCausalLM.from_pretrained("pseudolab/K23_MiniMed", trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed")
#Upload Patient Data
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
# Prepare the context
def prepare_context(data):
# Format the data as a string
data_str = data.to_string(index=False, header=False)
# Tokenize the data
input_ids = tokenizer.encode(data_str, return_tensors="pt")
# Truncate the input if it's too long for the model
max_length = tokenizer.model_max_length
if input_ids.shape[1] > max_length:
input_ids = input_ids[:, :max_length]
return input_ids
if uploaded_file is not None:
data = pd.read_csv(uploaded_file)
st.write(data)
# Generate text based on the context
context = prepare_context(data)
generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text']
st.write(generated_text)
# Internally prompt the model to data analyze the EHR patient data
prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know."
if prompt:
# Tokenize the prompt
input_ids = tokenizer.encode(prompt, return_tensors="pt")
# Generate text based on the prompt
generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text']
st.write(generated_text)
else:
st.write("Please enter patient data")
else:
st.write("No file uploaded")