File size: 2,908 Bytes
b5ee4bd
635f88c
d0229ea
c5f357c
 
635f88c
b07f7f0
c5f357c
635f88c
 
620aad6
7ef4508
 
 
7c7849c
 
 
635f88c
d0229ea
52919d7
635f88c
5a19baf
635f88c
 
 
 
 
 
 
 
d0229ea
f102058
d0229ea
f102058
d0229ea
635f88c
 
 
d0229ea
 
 
 
f102058
d0229ea
f102058
d0229ea
635f88c
 
 
 
 
 
 
e1f691a
635f88c
 
 
 
 
 
 
7ef4508
 
 
ab97f41
7ef4508
 
 
 
 
 
7c7849c
6b4b227
7ef4508
 
635f88c
 
6b4b227
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
from transformers import pipeline
#from diffusers import DiffusionPipeline
from PIL import Image
import requests
import io
from io import BytesIO

# Load the image-to-text pipeline
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

# Load the text mask pipeline
generate_mask = pipeline("fill-mask", model="google-bert/bert-base-uncased")

# Load the text generation pipeline
extend_text = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")

# Load the text-to-image model
#text_to_image = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")

def main():
    st.title("SmartCart (Product Recommender)")

    # User input for text or URL
    input_option = st.radio("Select input option:", ("Text", "URL"))

    # Input text
    if input_option == "Text":
        text_input = st.text_input("Enter the text:")
        if st.button("Generate Story and Image") and text_input:
            #generate_image(text_input)
            generated_text = generate_mask_from_result(text_input)
            st.success(f'Generated Caption: {text_input}')
            st.success(f'Generated Text: {generated_text}')
            

    # Input URL
    elif input_option == "URL":
        image_url = st.text_input("Enter the image URL:")
        if st.button("Generate Story and Image") and image_url:
            image_text = image_to_text_from_url(image_url)
            #generate_image(image_text)
            generated_text = generate_mask_from_result(image_text)
            st.success(f'Generated Caption: {image_text}')
            st.success(f'Generated Text: {generated_text}')
            

def image_to_text_from_file(uploaded_file):
    image_bytes = io.BytesIO(uploaded_file.read())
    return image_to_text(image_bytes)[0]['generated_text']

def image_to_text_from_url(image_url):
    response = requests.get(image_url)
    image_bytes = Image.open(BytesIO(response.content))
    return image_to_text(image_bytes)[0]['generated_text']

def generate_image(text):
    rephrased_text = "I want to buy " + text + " and [MASK] for my children"
    generated_image = text_to_image(rephrased_text)
    st.image(generated_image, caption="Generated Image", use_column_width=True)

def generate_mask_from_result(text):
    output = generate_mask(f"I want to buy 2 toys for my children. I will buy {text} and [MASK].")
    
    if output and output[0]['token_str'] == text:
        # If the first result matches the input, get the second output instead
        second_output = output[1] if len(output) > 1 else None
        result = second_output['token_str'] if second_output else None
    else:
        result = output[0]['token_str'] if output else None

    extended_text = extend_text(f"A child with {text} and {result} ")
    return extended_text[0]['generated_text']


if __name__ == "__main__":
    main()