wolf1997 commited on
Commit
1d2a699
·
verified ·
1 Parent(s): b3cb8a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -109
app.py CHANGED
@@ -1,110 +1,110 @@
1
-
2
- from PIL import Image
3
- #importing models
4
- from transformers import BlipProcessor, BlipForConditionalGeneration
5
- from langchain_google_genai import ChatGoogleGenerativeAI
6
- from dotenv import load_dotenv
7
- import os
8
- from langchain.output_parsers import StructuredOutputParser, ResponseSchema
9
- import gradio as gr
10
- from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
11
- from huggingface_hub import login
12
-
13
- load_dotenv()
14
-
15
- Hugging_face_token=os.getenv('huggingface_token')
16
-
17
- login(Hugging_face_token)
18
-
19
-
20
- # loading image captionning model
21
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
22
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
23
-
24
- # Set the model name for our LLMs.
25
- GEMINI_MODEL = "gemini-2.0-flash"
26
-
27
- # Store the API key in a variable.
28
- GEMINI_API_KEY = os.getenv("google_api_key")
29
-
30
- class stable_dif:
31
- def __init__(self,sizes):
32
- self.sizes=sizes
33
-
34
- def model(self):
35
- if self.sizes == 'medium':
36
- pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
37
- elif self.sizes == 'large':
38
- pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
39
- elif self.sizes == 'small':
40
- pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
41
- return pipe
42
-
43
- stable=stable_dif('small')
44
- pipe=stable.model()
45
-
46
-
47
-
48
- def image_story_generator(image,requirement,style):
49
-
50
- raw_image = Image.open(image)
51
-
52
- # get caption from image
53
- inputs = processor(raw_image, return_tensors="pt")
54
- out = model.generate(**inputs, min_length=20)
55
- model_prompt=processor.decode(out[0], skip_special_tokens=True)
56
-
57
- #load gemnini for creating story
58
- llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
59
-
60
- query =f' Write a 4 chapters story based on {model_prompt} and\
61
- that fits the following requirements: {requirement}. Give a detailed\
62
- description of the charaters appearences.'
63
-
64
- result = llm.invoke(query)
65
- story= result.content.replace('\n',' ')
66
-
67
- # create promts for image gen from story
68
- image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
69
-
70
- # create shemas to format output
71
- schemas=[
72
- ResponseSchema(name='prompt 1', description='the prompt'),
73
- ResponseSchema(name='prompt 2', description='the prompt'),
74
- ResponseSchema(name='prompt 3', description='the prompt'),
75
- ResponseSchema(name='prompt 4', description='the prompt')
76
- ]
77
-
78
- # initialize parser for output
79
- parser=StructuredOutputParser.from_response_schemas(schemas)
80
- instructions=parser.get_format_instructions()
81
-
82
- query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
83
- what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
84
- '+ '\n\n'+instructions
85
-
86
- result=image_prompt_llm.invoke(query)
87
- image_prompts = parser.parse(result.content)
88
-
89
- # iterate through the prompts and generate new images
90
- images=[]
91
- for i in image_prompts.keys():
92
-
93
- image = pipe(image_prompts[i]).images[0]
94
- images.append(image)
95
-
96
-
97
- return images, story
98
-
99
- # gradio
100
- interface = gr.Interface(
101
- fn=image_story_generator,
102
- inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],
103
-
104
- outputs=[gr.Gallery(),
105
- gr.Textbox('story')
106
- ],
107
- description='Upload an image to start the story generation process.'
108
- )
109
-
110
  interface.launch()
 
1
+
2
+ from PIL import Image
3
+ #importing models
4
+ from transformers import BlipProcessor, BlipForConditionalGeneration
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from dotenv import load_dotenv
7
+ import os
8
+ from langchain.output_parsers import StructuredOutputParser, ResponseSchema
9
+ import gradio as gr
10
+ from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
11
+ from huggingface_hub import login
12
+
13
+ load_dotenv()
14
+
15
+ Hugging_face_token=os.getenv('HFToken')
16
+
17
+ login(Hugging_face_token)
18
+
19
+
20
+ # loading image captionning model
21
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
22
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
23
+
24
+ # Set the model name for our LLMs.
25
+ GEMINI_MODEL = "gemini-2.0-flash"
26
+
27
+ # Store the API key in a variable.
28
+ GEMINI_API_KEY = os.getenv("google_api_key")
29
+
30
+ class stable_dif:
31
+ def __init__(self,sizes):
32
+ self.sizes=sizes
33
+
34
+ def model(self):
35
+ if self.sizes == 'medium':
36
+ pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
37
+ elif self.sizes == 'large':
38
+ pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
39
+ elif self.sizes == 'small':
40
+ pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
41
+ return pipe
42
+
43
+ stable=stable_dif('small')
44
+ pipe=stable.model()
45
+
46
+
47
+
48
+ def image_story_generator(image,requirement,style):
49
+
50
+ raw_image = Image.open(image)
51
+
52
+ # get caption from image
53
+ inputs = processor(raw_image, return_tensors="pt")
54
+ out = model.generate(**inputs, min_length=20)
55
+ model_prompt=processor.decode(out[0], skip_special_tokens=True)
56
+
57
+ #load gemnini for creating story
58
+ llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
59
+
60
+ query =f' Write a 4 chapters story based on {model_prompt} and\
61
+ that fits the following requirements: {requirement}. Give a detailed\
62
+ description of the charaters appearences.'
63
+
64
+ result = llm.invoke(query)
65
+ story= result.content.replace('\n',' ')
66
+
67
+ # create promts for image gen from story
68
+ image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
69
+
70
+ # create shemas to format output
71
+ schemas=[
72
+ ResponseSchema(name='prompt 1', description='the prompt'),
73
+ ResponseSchema(name='prompt 2', description='the prompt'),
74
+ ResponseSchema(name='prompt 3', description='the prompt'),
75
+ ResponseSchema(name='prompt 4', description='the prompt')
76
+ ]
77
+
78
+ # initialize parser for output
79
+ parser=StructuredOutputParser.from_response_schemas(schemas)
80
+ instructions=parser.get_format_instructions()
81
+
82
+ query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
83
+ what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
84
+ '+ '\n\n'+instructions
85
+
86
+ result=image_prompt_llm.invoke(query)
87
+ image_prompts = parser.parse(result.content)
88
+
89
+ # iterate through the prompts and generate new images
90
+ images=[]
91
+ for i in image_prompts.keys():
92
+
93
+ image = pipe(image_prompts[i]).images[0]
94
+ images.append(image)
95
+
96
+
97
+ return images, story
98
+
99
+ # gradio
100
+ interface = gr.Interface(
101
+ fn=image_story_generator,
102
+ inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],
103
+
104
+ outputs=[gr.Gallery(),
105
+ gr.Textbox('story')
106
+ ],
107
+ description='Upload an image to start the story generation process.'
108
+ )
109
+
110
  interface.launch()