Spaces:

wolf1997
/

Image_story_generator

Running

App Files Files Community

wolf1997 commited on 24 days ago

Commit

1d2a699

verified ·

1 Parent(s): b3cb8a5

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -109

app.py CHANGED Viewed

@@ -1,110 +1,110 @@
-from PIL import Image
-#importing models
-from transformers import BlipProcessor, BlipForConditionalGeneration
-from langchain_google_genai import ChatGoogleGenerativeAI
-from dotenv import load_dotenv
-import os
-from langchain.output_parsers import StructuredOutputParser, ResponseSchema
-import gradio as gr
-from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
-from huggingface_hub import login
-load_dotenv()
-Hugging_face_token=os.getenv('huggingface_token')
-login(Hugging_face_token)
-# loading image captionning model
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
-# Set the model name for our LLMs.
-GEMINI_MODEL = "gemini-2.0-flash"
-# Store the API key in a variable.
-GEMINI_API_KEY = os.getenv("google_api_key")
-class stable_dif:
-  def __init__(self,sizes):
-    self.sizes=sizes
-  def model(self):
-    if self.sizes == 'medium':
-      pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
-    elif self.sizes == 'large':
-      pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
-    elif self.sizes == 'small':
-      pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
-    return pipe
-stable=stable_dif('small')
-pipe=stable.model()
-def image_story_generator(image,requirement,style):
-  raw_image = Image.open(image)
-  # get caption from image
-  inputs = processor(raw_image, return_tensors="pt")
-  out = model.generate(**inputs, min_length=20)
-  model_prompt=processor.decode(out[0], skip_special_tokens=True)
-  #load gemnini for creating story
-  llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
-  query =f' Write a 4 chapters story based on {model_prompt} and\
-  that fits the following requirements: {requirement}. Give a detailed\
-  description of the charaters appearences.'
-  result = llm.invoke(query)
-  story= result.content.replace('\n',' ')
-  # create promts for image gen from story
-  image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
-  # create shemas to format output
-  schemas=[
-      ResponseSchema(name='prompt 1', description='the prompt'),
-      ResponseSchema(name='prompt 2', description='the prompt'),
-      ResponseSchema(name='prompt 3', description='the prompt'),
-      ResponseSchema(name='prompt 4', description='the prompt')
-  ]
-  # initialize parser for output
-  parser=StructuredOutputParser.from_response_schemas(schemas)
-  instructions=parser.get_format_instructions()
-  query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
-  what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
-  '+ '\n\n'+instructions
-  result=image_prompt_llm.invoke(query)
-  image_prompts = parser.parse(result.content)
-  # iterate through the prompts and generate new images
-  images=[]
-  for i in image_prompts.keys():
-    image = pipe(image_prompts[i]).images[0]
-    images.append(image)
-  return images, story
-# gradio
-interface = gr.Interface(
-    fn=image_story_generator,
-    inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],
-    outputs=[gr.Gallery(),
-        gr.Textbox('story')
-    ],
-    description='Upload an image to start the story generation process.'
-)
 interface.launch()

+from PIL import Image
+#importing models
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from langchain_google_genai import ChatGoogleGenerativeAI
+from dotenv import load_dotenv
+import os
+from langchain.output_parsers import StructuredOutputParser, ResponseSchema
+import gradio as gr
+from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
+from huggingface_hub import login
+load_dotenv()
+Hugging_face_token=os.getenv('HFToken')
+login(Hugging_face_token)
+# loading image captionning model
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
+# Set the model name for our LLMs.
+GEMINI_MODEL = "gemini-2.0-flash"
+# Store the API key in a variable.
+GEMINI_API_KEY = os.getenv("google_api_key")
+class stable_dif:
+  def __init__(self,sizes):
+    self.sizes=sizes
+  def model(self):
+    if self.sizes == 'medium':
+      pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
+    elif self.sizes == 'large':
+      pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
+    elif self.sizes == 'small':
+      pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
+    return pipe
+stable=stable_dif('small')
+pipe=stable.model()
+def image_story_generator(image,requirement,style):
+  raw_image = Image.open(image)
+  # get caption from image
+  inputs = processor(raw_image, return_tensors="pt")
+  out = model.generate(**inputs, min_length=20)
+  model_prompt=processor.decode(out[0], skip_special_tokens=True)
+  #load gemnini for creating story
+  llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
+  query =f' Write a 4 chapters story based on {model_prompt} and\
+  that fits the following requirements: {requirement}. Give a detailed\
+  description of the charaters appearences.'
+  result = llm.invoke(query)
+  story= result.content.replace('\n',' ')
+  # create promts for image gen from story
+  image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
+  # create shemas to format output
+  schemas=[
+      ResponseSchema(name='prompt 1', description='the prompt'),
+      ResponseSchema(name='prompt 2', description='the prompt'),
+      ResponseSchema(name='prompt 3', description='the prompt'),
+      ResponseSchema(name='prompt 4', description='the prompt')
+  ]
+  # initialize parser for output
+  parser=StructuredOutputParser.from_response_schemas(schemas)
+  instructions=parser.get_format_instructions()
+  query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
+  what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
+  '+ '\n\n'+instructions
+  result=image_prompt_llm.invoke(query)
+  image_prompts = parser.parse(result.content)
+  # iterate through the prompts and generate new images
+  images=[]
+  for i in image_prompts.keys():
+    image = pipe(image_prompts[i]).images[0]
+    images.append(image)
+  return images, story
+# gradio
+interface = gr.Interface(
+    fn=image_story_generator,
+    inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],
+    outputs=[gr.Gallery(),
+        gr.Textbox('story')
+    ],
+    description='Upload an image to start the story generation process.'
+)
 interface.launch()