from transformers import AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, AutoModelForCausalLM import streamlit as st st.title('How does the LLM choose its words?') model_checkpoint = "google/flan-t5-small" tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) model = T5ForConditionalGeneration.from_pretrained( model_checkpoint, load_in_8bit=False, device_map="auto" ) instruction = st.text_area('Write an instruction:') prompts = [ f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Response:""" ] inputs = tokenizer( prompts[0], return_tensors="pt", ) # input_ids = inputs["input_ids"].to("cuda") if instruction: st.write(tokenizer.decode(input_ids[0]))