Spaces:

pseudolab
/

MiniMed_EHR_Analyst

Sleeping

File size: 2,498 Bytes

fc5ee8e
 
130318f
512eba0
 
b8dfcd5
034300b
fc5ee8e
 
 
130318f
ba57ab6
130318f
 
bcb7f51
130318f
ba57ab6
 
 
fc5ee8e

import streamlit as st
import pandas as pd
from transformers import pipeline, AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfigimport streamlit as st
from streamlit_theme import theme

st.set_theme('pseudolab/huggingface-korea-theme')

#Note this should be used always in compliance with applicable laws and regulations if used with real patient data.

# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# Load the PEFT model
peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed")
peft_model = MistralForCausalLM.from_pretrained("pseudolab/K23_MiniMed", trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed")

#Upload Patient Data
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

# Prepare the context
def prepare_context(data):
    # Format the data as a string
    data_str = data.to_string(index=False, header=False)

    # Tokenize the data
    input_ids = tokenizer.encode(data_str, return_tensors="pt")

    # Truncate the input if it's too long for the model
    max_length = tokenizer.model_max_length
    if input_ids.shape[1] > max_length:
        input_ids = input_ids[:, :max_length]

    return input_ids

if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    st.write(data)

    # Generate text based on the context
    context = prepare_context(data)
    generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text']
    st.write(generated_text)

    # Internally prompt the model to data analyze the EHR patient data
    prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know."

    if prompt:
        # Tokenize the prompt
        input_ids = tokenizer.encode(prompt, return_tensors="pt")

        # Generate text based on the prompt
        generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text']
        st.write(generated_text)
    else:
        st.write("Please enter patient data")
        
else:
    st.write("No file uploaded")