import transformers import torch import einops import streamlit as st from PIL import Image from transformers import AutoModelForCausalLM, AutoTokenizer DEVICE = "cuda:0" # Add a header st.title("BEST Story Teller...Ever!") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption='Uploaded Image.', use_column_width=True) model_id = "vikhyatk/moondream2" revision = "2024-05-08" model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, revision=revision ) tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) enc_image = model.encode_image(image) answer = model.answer_question(enc_image, "Describe this image in a few sentences.", tokenizer) st.header("Image Description Generated.") st.success(answer) model_id = "meta-llama/Meta-Llama-3-8B" pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto") story = pipeline("Generate a short story based on image description: " + answer) st.header("Story Generated.") st.success(story)