import streamlit as st from transformers import pipeline #from diffusers import DiffusionPipeline from PIL import Image import requests import io from io import BytesIO # Load the image-to-text pipeline image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # Load the text mask pipeline generate_mask = pipeline("fill-mask", model="google-bert/bert-base-uncased") # Load the text generation pipeline extend_text = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2") # Load the text-to-image model #text_to_image = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") def main(): st.title("SmartCart (Product Recommender)") # User input for text or URL input_option = st.radio("Select input option:", ("Text", "URL")) # Input text if input_option == "Text": text_input = st.text_input("Enter the text:") if st.button("Generate Story and Image") and text_input: #generate_image(text_input) generated_text = generate_mask_from_result(text_input) st.success(f'Generated Caption: {text_input}') st.success(f'Generated Text: {generated_text}') # Input URL elif input_option == "URL": image_url = st.text_input("Enter the image URL:") if st.button("Generate Story and Image") and image_url: image_text = image_to_text_from_url(image_url) #generate_image(image_text) generated_text = generate_mask_from_result(image_text) st.success(f'Generated Caption: {image_text}') st.success(f'Generated Text: {generated_text}') def image_to_text_from_file(uploaded_file): image_bytes = io.BytesIO(uploaded_file.read()) return image_to_text(image_bytes)[0]['generated_text'] def image_to_text_from_url(image_url): response = requests.get(image_url) image_bytes = Image.open(BytesIO(response.content)) return image_to_text(image_bytes)[0]['generated_text'] def generate_image(text): rephrased_text = "I want to buy " + text + " and [MASK] for my children" generated_image = text_to_image(rephrased_text) st.image(generated_image, caption="Generated Image", use_column_width=True) def generate_mask_from_result(text): output = generate_mask(f"I want to buy 2 toys for my children. I will buy {text} and [MASK].") if output and output[0]['token_str'] == text: # If the first result matches the input, get the second output instead second_output = output[1] if len(output) > 1 else None result = second_output['token_str'] if second_output else None else: result = output[0]['token_str'] if output else None extended_text = extend_text(f"A child with {text} and {result} ") return extended_text[0]['generated_text'] if __name__ == "__main__": main()