slliac commited on
Commit
c67a79d
Β·
verified Β·
1 Parent(s): 3096151

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -30
app.py CHANGED
@@ -7,6 +7,7 @@ from gtts import gTTS
7
  import os
8
  import io
9
 
 
10
  # function part
11
  # img2text
12
  def img2text(url):
@@ -14,24 +15,26 @@ def img2text(url):
14
  text = image_to_text_model(url)[0]["generated_text"]
15
  return text
16
 
 
17
  # text2story
18
  def text2story(text):
19
  # Initialize the text generation pipeline
20
  generator = pipeline('text-generation', model='gpt2')
21
-
22
  # Create a prompt for the story
23
  prompt = f"Create a short story about this scene: {text}\n\nStory:"
24
-
25
  # Generate the story
26
- story = generator(prompt,
27
- max_length=150,
28
- num_return_sequences=1,
29
- temperature=0.7)[0]['generated_text']
30
-
31
  # Clean up the story by removing the prompt
32
  story = story.replace(prompt, "").strip()
33
  return story
34
 
 
35
  def text2audio(text):
36
  try:
37
  # Create a gTTS object
@@ -40,7 +43,7 @@ def text2audio(text):
40
  audio_bytes = io.BytesIO()
41
  tts.write_to_fp(audio_bytes)
42
  audio_bytes.seek(0) # Reset the pointer to the start
43
-
44
  return {
45
  'audio': audio_bytes,
46
  'sampling_rate': 24000 # gTTS default sampling rate
@@ -49,43 +52,282 @@ def text2audio(text):
49
  st.error(f"Error in audio generation: {str(e)}")
50
  return None
51
 
52
- st.set_page_config(page_title="Your Image to Audio Story",
53
- page_icon="🦜")
54
- st.header("Turn Your Image to Audio Story")
55
- uploaded_file = st.file_uploader("Select an Image...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  if uploaded_file is not None:
58
- print(uploaded_file)
59
  bytes_data = uploaded_file.getvalue()
60
  with open(uploaded_file.name, "wb") as file:
61
  file.write(bytes_data)
62
- st.image(uploaded_file, caption="Uploaded Image",
63
- use_column_width=True)
64
 
65
- #Stage 1: Image to Text
66
- st.text('Processing img2text...')
 
 
 
 
 
 
 
 
 
 
 
 
67
  scenario = img2text(uploaded_file.name)
68
- st.write(scenario)
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- #Stage 2: Text to Story
71
- st.text('Generating a story...')
72
  story = text2story(scenario)
73
- st.write(story)
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- #Stage 3: Story to Audio data
76
- st.text('Generating audio data...')
77
  audio_data = text2audio(story)
78
 
79
- # Play button
80
- if st.button("Play Audio"):
81
  if audio_data:
82
  st.audio(audio_data['audio'],
83
- format="audio/wav",
84
- start_time=0,
85
- sample_rate=audio_data['sampling_rate'])
86
  else:
87
- st.error("Failed to generate audio")
 
88
 
89
  # Cleanup: Remove the temporary file
90
  if os.path.exists(uploaded_file.name):
91
- os.remove(uploaded_file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import os
8
  import io
9
 
10
+
11
  # function part
12
  # img2text
13
  def img2text(url):
 
15
  text = image_to_text_model(url)[0]["generated_text"]
16
  return text
17
 
18
+
19
  # text2story
20
  def text2story(text):
21
  # Initialize the text generation pipeline
22
  generator = pipeline('text-generation', model='gpt2')
23
+
24
  # Create a prompt for the story
25
  prompt = f"Create a short story about this scene: {text}\n\nStory:"
26
+
27
  # Generate the story
28
+ story = generator(prompt,
29
+ max_length=100,
30
+ num_return_sequences=1,
31
+ temperature=0.7)[0]['generated_text']
32
+
33
  # Clean up the story by removing the prompt
34
  story = story.replace(prompt, "").strip()
35
  return story
36
 
37
+
38
  def text2audio(text):
39
  try:
40
  # Create a gTTS object
 
43
  audio_bytes = io.BytesIO()
44
  tts.write_to_fp(audio_bytes)
45
  audio_bytes.seek(0) # Reset the pointer to the start
46
+
47
  return {
48
  'audio': audio_bytes,
49
  'sampling_rate': 24000 # gTTS default sampling rate
 
52
  st.error(f"Error in audio generation: {str(e)}")
53
  return None
54
 
55
+
56
+ # Apply custom CSS for kid-friendly UI
57
+ st.set_page_config(page_title="Magic Picture Storyteller", page_icon="πŸ§™β€β™‚οΈ")
58
+
59
+ st.markdown("""
60
+ <style>
61
+ /* Colorful, rounded child-friendly design */
62
+ @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
63
+
64
+ .stApp {
65
+ background: linear-gradient(135deg, #ffcce6, #b3ecff);
66
+ font-family: 'Comic Neue', cursive;
67
+ }
68
+
69
+ h1 {
70
+ color: #6a3e8c;
71
+ font-size: 2.5rem;
72
+ text-align: center;
73
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
74
+ background: rgba(255,255,255,0.7);
75
+ padding: 15px;
76
+ border-radius: 20px;
77
+ margin-bottom: 20px;
78
+ border: 3px dashed #ff9933;
79
+ }
80
+
81
+ .uploadfile {
82
+ background: rgba(255,255,255,0.7);
83
+ padding: 20px;
84
+ border-radius: 20px;
85
+ border: 3px solid #ff6b6b;
86
+ margin-bottom: 20px;
87
+ }
88
+
89
+ .stButton > button {
90
+ background-color: #6adb9d;
91
+ color: white;
92
+ font-size: 1.5rem;
93
+ border-radius: 30px;
94
+ padding: 10px 25px;
95
+ font-family: 'Comic Neue', cursive;
96
+ font-weight: bold;
97
+ border: none;
98
+ box-shadow: 0 4px 0 #489d73;
99
+ transition: all 0.2s;
100
+ }
101
+
102
+ .stButton > button:hover {
103
+ background-color: #5bc28d;
104
+ transform: translateY(2px);
105
+ box-shadow: 0 2px 0 #489d73;
106
+ }
107
+
108
+ .stButton > button:active {
109
+ transform: translateY(4px);
110
+ box-shadow: none;
111
+ }
112
+
113
+ .output-box {
114
+ background: rgba(255, 255, 255, 0.8);
115
+ padding: 15px;
116
+ border-radius: 20px;
117
+ margin: 15px 0;
118
+ border: 3px solid #7678ed;
119
+ }
120
+
121
+ .step-title {
122
+ background-color: #ff9e7d;
123
+ color: white;
124
+ padding: 8px 15px;
125
+ border-radius: 15px;
126
+ font-size: 1.2rem;
127
+ margin-bottom: 10px;
128
+ display: inline-block;
129
+ }
130
+
131
+ .story-text {
132
+ font-size: 1.3rem;
133
+ line-height: 1.6;
134
+ color: #333;
135
+ }
136
+
137
+ /* Custom file uploader */
138
+ .stFileUploader > div > div {
139
+ background: #ffea8c;
140
+ border: 2px dashed #ffaa44;
141
+ border-radius: 20px;
142
+ padding: 10px;
143
+ }
144
+
145
+ /* Audio player styling */
146
+ audio {
147
+ width: 100%;
148
+ border-radius: 30px;
149
+ background: #ffcc66;
150
+ }
151
+
152
+ /* Loading animation */
153
+ .loading {
154
+ display: flex;
155
+ justify-content: center;
156
+ margin: 15px 0;
157
+ }
158
+
159
+ .loading div {
160
+ width: 20px;
161
+ height: 20px;
162
+ background-color: #ff6b6b;
163
+ border-radius: 50%;
164
+ margin: 0 5px;
165
+ animation: bounce 0.8s infinite alternate;
166
+ }
167
+
168
+ .loading div:nth-child(2) {
169
+ animation-delay: 0.2s;
170
+ background-color: #ffcc66;
171
+ }
172
+
173
+ .loading div:nth-child(3) {
174
+ animation-delay: 0.4s;
175
+ background-color: #7678ed;
176
+ }
177
+
178
+ @keyframes bounce {
179
+ from { transform: translateY(0); }
180
+ to { transform: translateY(-15px); }
181
+ }
182
+
183
+ /* Fun floating animation for welcome screen */
184
+ @keyframes float {
185
+ 0% { transform: translateY(0); }
186
+ 50% { transform: translateY(-10px); }
187
+ 100% { transform: translateY(0); }
188
+ }
189
+
190
+ .float-animation {
191
+ animation: float 3s infinite ease-in-out;
192
+ }
193
+
194
+ /* Title styling with icons */
195
+ .app-title {
196
+ display: flex;
197
+ align-items: center;
198
+ justify-content: center;
199
+ gap: 10px;
200
+ }
201
+
202
+ .app-title img {
203
+ height: 40px;
204
+ }
205
+
206
+ /* Speech bubble styling */
207
+ .speech-bubble {
208
+ position: relative;
209
+ background: #ffffff;
210
+ border-radius: 20px;
211
+ padding: 15px;
212
+ margin: 20px 0;
213
+ border: 3px solid #ff9e7d;
214
+ }
215
+
216
+ .speech-bubble:after {
217
+ content: '';
218
+ position: absolute;
219
+ bottom: 0;
220
+ left: 30px;
221
+ width: 0;
222
+ height: 0;
223
+ border: 15px solid transparent;
224
+ border-top-color: #ffffff;
225
+ border-bottom: 0;
226
+ margin-left: -15px;
227
+ margin-bottom: -15px;
228
+ }
229
+
230
+ /* Fun emoji decorations */
231
+ .emoji-decoration {
232
+ font-size: 2rem;
233
+ margin: 0 5px;
234
+ display: inline-block;
235
+ animation: wiggle 2s infinite;
236
+ }
237
+
238
+ @keyframes wiggle {
239
+ 0%, 100% { transform: rotate(-5deg); }
240
+ 50% { transform: rotate(5deg); }
241
+ }
242
+ </style>
243
+ """, unsafe_allow_html=True)
244
+
245
+ # App header with kid-friendly design
246
+ st.markdown(
247
+ "<h1><span class='emoji-decoration'>πŸ§™β€β™‚οΈ</span> Magic Picture Storyteller <span class='emoji-decoration'>🎨</span></h1>",
248
+ unsafe_allow_html=True)
249
+
250
+ # File uploader with kid-friendly styling
251
+ st.markdown("<h3>πŸ“· Let's pick a picture!</h3>", unsafe_allow_html=True)
252
+ uploaded_file = st.file_uploader("Choose a fun picture...", key="upload")
253
 
254
  if uploaded_file is not None:
 
255
  bytes_data = uploaded_file.getvalue()
256
  with open(uploaded_file.name, "wb") as file:
257
  file.write(bytes_data)
 
 
258
 
259
+ st.image(uploaded_file, caption="Your magical picture!", use_column_width=True)
260
+
261
+ # Stage 1: Image to Text
262
+ st.markdown("<div class='step-title'>πŸ” Finding the magic in your picture...</div>", unsafe_allow_html=True)
263
+
264
+ # Add loading animation
265
+ st.markdown("""
266
+ <div class="loading">
267
+ <div></div>
268
+ <div></div>
269
+ <div></div>
270
+ </div>
271
+ """, unsafe_allow_html=True)
272
+
273
  scenario = img2text(uploaded_file.name)
274
+ st.markdown(f"<p class='story-text'>{scenario}</p>", unsafe_allow_html=True)
275
+
276
+ # Stage 2: Text to Story
277
+ st.markdown("<div class='step-title'>πŸ“ Creating a magical story...</div>", unsafe_allow_html=True)
278
+
279
+ # Add loading animation
280
+ st.markdown("""
281
+ <div class="loading">
282
+ <div></div>
283
+ <div></div>
284
+ <div></div>
285
+ </div>
286
+ """, unsafe_allow_html=True)
287
 
 
 
288
  story = text2story(scenario)
289
+ st.markdown(f"<p class='story-text'>{story}</p>", unsafe_allow_html=True)
290
+
291
+ # Stage 3: Story to Audio data
292
+ st.markdown("<div class='step-title'>πŸ”Š Let's hear the story!</div>", unsafe_allow_html=True)
293
+
294
+ # Add loading animation
295
+ st.markdown("""
296
+ <div class="loading">
297
+ <div></div>
298
+ <div></div>
299
+ <div></div>
300
+ </div>
301
+ """, unsafe_allow_html=True)
302
 
 
 
303
  audio_data = text2audio(story)
304
 
305
+ # Play button with kid-friendly design
306
+ if st.button("πŸ”Š Play My Story!"):
307
  if audio_data:
308
  st.audio(audio_data['audio'],
309
+ format="audio/wav",
310
+ start_time=0,
311
+ sample_rate=audio_data['sampling_rate'])
312
  else:
313
+ st.error("Oops! The magical voice got lost!")
314
+
315
 
316
  # Cleanup: Remove the temporary file
317
  if os.path.exists(uploaded_file.name):
318
+ os.remove(uploaded_file.name)
319
+ else:
320
+ # Fun welcome screen for kids
321
+ st.markdown("""
322
+ <div style="text-align: center; padding: 30px;" class="float-animation">
323
+ <div style="font-size: 80px; margin-bottom: 20px;">πŸ§™β€β™‚οΈ</div>
324
+ <h2 style="color: #6a3e8c;">Pick a picture to start the magic!</h2>
325
+ <p style="font-size: 1.2rem;">The wizard is waiting to tell you a story!</p>
326
+ <div style="margin-top: 30px;">
327
+ <span class="emoji-decoration">🌈</span>
328
+ <span class="emoji-decoration">⭐</span>
329
+ <span class="emoji-decoration">πŸ¦„</span>
330
+ <span class="emoji-decoration">🎭</span>
331
+ </div>
332
+ </div>
333
+ """, unsafe_allow_html=True)