from transformers import AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, AutoModelForCausalLM
import streamlit as st

st.title('How does the LLM choose its words?')

model_checkpoint = "google/flan-t5-small"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

model = T5ForConditionalGeneration.from_pretrained(  
    model_checkpoint,
    load_in_8bit=False,
    device_map="auto"
)

instruction = st.text_area('Write an instruction:')

prompts = [
    f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
    ### Instruction: {instruction}
    ### Response:"""
]

inputs = tokenizer(
    prompts[0],
    return_tensors="pt",
)
# input_ids = inputs["input_ids"].to("cuda")

if instruction:
    st.write(tokenizer.decode(input_ids[0]))