import streamlit as st from PIL import Image import numpy as np # Designing the interface st.title("French Image Caption App") # For newline st.write('\n') st.markdown( """ An image caption model by combining the ViT model and a French GPT2 model.\n [Part of the [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/).]\n The pretained weights of both models are loaded, and a set of randomly initialized cross-attention weigths are added.\n The model is trained on 65000 images from the COCO dataset for about 1500 steps.\n The original english cpationis are translated to french for training purpose.\n """ ) #image = Image.open('samples/val_000000039769.jpg') #show = st.image(image, use_column_width=True) #show.image(image, 'Preloaded Image', use_column_width=True) with st.spinner('Loading and compiling ViT-GPT2 model ...'): from model import * st.sidebar.write(f'Vit-GPT2 model loaded :)') st.sidebar.title("Select a sample image") sample_name = st.sidebar.selectbox( "Please Choose the Model", sample_fns ) sample_name = f"COCO_val2014_{sample_name.replace('.jpg', '').zfill(12)}.jpg" sample_path = os.path.join(sample_dir, sample_name) image = Image.open(sample_path) show = st.image(image, use_column_width=True) show.image(image, '\n\nSelected Image', use_column_width=True) # For newline st.sidebar.write('\n') with st.spinner('Generating image caption ...'): caption = predict(image) image.close() # st.success(f'{caption}') st.header(f'{caption}') st.sidebar.header("ViT-GPT2 predicts:") st.sidebar.write(f"{caption}", '\n')