Spaces:

koey811
/

assignment1

Sleeping

App Files Files Community

assignment1 / app.py

koey811

Update app.py

a4cadbe verified about 2 months ago

raw

history blame

2.53 kB

	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	from gtts import gTTS
	import io
	from PIL import Image

	# Install PyTorch
	try:
	import torch
	except ImportError:
	st.warning("PyTorch is not installed. Installing PyTorch...")
	import subprocess
	subprocess.run(["pip", "install", "torch"])
	st.success("PyTorch has been successfully installed!")
	import torch

	# Load the image captioning model
	caption_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	# Load the DeepSeek model for story generation
	story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True)

	def generate_caption(image):
	# Generate the caption for the uploaded image
	caption = caption_model(image)[0]["generated_text"]
	return caption

	def generate_story(caption):
	# Generate the story based on the caption using the DeepSeek model
	prompt = f"Imagine you are a storyteller for young children. Based on the image described as '{caption}', create a short and interesting story for children aged 3-10. Keep it positive and happy in tone."
	messages = [{"role": "user", "content": prompt}]
	story = story_generator(messages)[0]["generated_text"]
	return story

	def convert_to_audio(story):
	# Convert the story to audio using gTTS
	tts = gTTS(text=story, lang="en")
	audio_bytes = io.BytesIO()
	tts.write_to_fp(audio_bytes)
	audio_bytes.seek(0)
	return audio_bytes

	def main():
	st.title("Storytelling Application")

	# File uploader for the image (restricted to JPG)
	uploaded_image = st.file_uploader("Upload an image", type=["jpg"])

	if uploaded_image is not None:
	# Convert the uploaded image to PIL image
	image = Image.open(uploaded_image)

	# Display the uploaded image
	st.image(image, caption="Uploaded Image", use_container_width=True)

	# Generate the caption for the image
	caption = generate_caption(image)
	st.subheader("Generated Caption:")
	st.write(caption)

	# Generate the story based on the caption using the DeepSeek model
	story = generate_story(caption)
	st.subheader("Generated Story:")
	st.write(story)

	# Convert the story to audio
	audio_bytes = convert_to_audio(story)

	# Display the audio player
	st.audio(audio_bytes, format="audio/mp3")

	if __name__ == "__main__":
	main()