ai-bedtime-story-server

Paused

jbilcke-hf HF staff commited on Nov 21, 2023

Commit

4156639

•

1 Parent(s): 9df4ebb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -197,6 +197,30 @@ def format_prompt_zephyr(message, history, system_message=system_message):
     print(prompt)
     return prompt
 def generate_local(
     prompt,
     history,
@@ -587,7 +611,7 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
                 wav_bytestream = wav_bytestream.tobytes()
             # Directly encode the WAV bytestream to base64
-            base64_audio = base64.b64encode(wav_bytestream).decode('utf8')
             if audio_stream is not None:
                 return (history, base64_audio)

     print(prompt)
     return prompt
+import struct
+# Generated by GPT-4
+def pcm_to_wav(pcm_data, sample_rate=24000, channels=1, bit_depth=16):
+    # Check if the input data is already in the WAV format
+    if pcm_data.startswith(b"RIFF"):
+        return pcm_data
+    # Calculate subchunk sizes
+    fmt_subchunk_size = 16 # for PCM
+    data_subchunk_size = len(pcm_data)
+    chunk_size = 4 + (8 + fmt_subchunk_size) + (8 + data_subchunk_size)
+    # Prepare the WAV file headers
+    wav_header = struct.pack('<4sI4s', b'RIFF', chunk_size, b'WAVE')  # 'RIFF' chunk descriptor
+    fmt_subchunk = struct.pack('<4sIHHIIHH',
+                               b'fmt ', fmt_subchunk_size, 1, channels,
+                               sample_rate, sample_rate * channels * bit_depth // 8,
+                               channels * bit_depth // 8, bit_depth)
+    data_subchunk = struct.pack('<4sI', b'data', data_subchunk_size)
+    return wav_header + fmt_subchunk + data_subchunk + pcm_data
 def generate_local(
     prompt,
     history,
                 wav_bytestream = wav_bytestream.tobytes()
             # Directly encode the WAV bytestream to base64
+            base64_audio = base64.b64encode(pcm_to_wav(wav_bytestream)).decode('utf8')
             if audio_stream is not None:
                 return (history, base64_audio)