Spaces:

sohojoe
/

project_charles

Runtime error

App Files Files Community

sohojoe commited on Jun 26, 2023

Commit

6dfcd3a

1 Parent(s): d0639dc

basic version that is working

Browse files

Files changed (2) hide show

debug_app.py +60 -87
frames.pkl +0 -3

debug_app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import asyncio
 import io
 import logging
 import traceback
 from typing import List
@@ -17,40 +19,58 @@ from sample_utils.turn import get_ice_servers
 logger = logging.getLogger(__name__)
-class StreamingMP3ToFrames:
-    def __init__(self):
-        self.append = False
-    def process_chunk(self, chunk):
-        audio_frames = []
         try:
-            if self.append:
-                self.bytes_io.write(chunk)
-                self.append = False
-                self.bytes_io.seek(0)
-            else:
-                self.bytes_io = io.BytesIO(chunk)
-            container = av.open(self.bytes_io, 'r', format='mp3')
-            audio_stream = next(s for s in container.streams if s.type == 'audio')
-            for frame in container.decode(audio_stream):
-                # Convert the audio frame to a NumPy array
-                array = frame.to_ndarray()
-                # Now you can use av.AudioFrame.from_ndarray
-                # audio_frame = av.AudioFrame.from_ndarray(array, format='flt', layout='mono')
-                audio_frame = av.AudioFrame.from_ndarray(array, format='fltp', layout='mono')
-                audio_frame.sample_rate = 44100
-                audio_frames.append(audio_frame)
-            return audio_frames
-        except Exception as e:
-            print (e)
-            self.append = True
-            self.bytes_io.seek(0, io.SEEK_END)
-            return audio_frames
 def video_frame_callback(
@@ -59,27 +79,14 @@ def video_frame_callback(
     return frame
-streaming_mp3_to_frames = StreamingMP3ToFrames()
 with open("chunks.pkl", "rb") as f:
     import pickle
     debug_chunks = pickle.load(f)
-    debug_frames = []
-    debug_frame_idx = 0
     for chunk in debug_chunks:
-        new_frames = streaming_mp3_to_frames.process_chunk(chunk)
-        for frame in new_frames:
-            debug_frames.append(frame)
-            # print (frame)
-def dequeue_frame():
-    global debug_frame_idx, debug_frames
-    enqueued_frame = debug_frames[debug_frame_idx]
-    debug_frame_idx += 1
-    if debug_frame_idx >= len(debug_frames):
-        debug_frame_idx = 0
-    return enqueued_frame
 # emptry array of type int16
 sample_buffer = np.zeros((0), dtype=np.int16)
@@ -91,38 +98,8 @@ def process_frame(old_frame):
         output_sample_rate = 44100
         required_samples = old_frame.samples
-        global sample_buffer
-        while sample_buffer.shape[0] < required_samples:
-            dequeued_frame = dequeue_frame()
-            if dequeued_frame is None:
-                break
-            # convert dequeued_frame to same format as old_frame
-            float_samples = dequeued_frame.to_ndarray()
-            max_sample = np.max(np.abs(float_samples))
-            min_sample = np.min(np.abs(float_samples))
-            if max_sample > 1.0 or min_sample > 1.0:
-                print(f"WARNING: max_sample: {max_sample}, min_sample: {min_sample}")
-            int_samples = np.int16(float_samples * 32767)
-            sound = pydub.AudioSegment(
-                data=int_samples.tobytes(),
-                sample_width=2,
-                frame_rate=output_sample_rate,
-                channels=len(dequeued_frame.layout.channels),
-            )
-            sound = sound.set_frame_rate(old_frame.sample_rate)
-            samples = np.array(sound.get_array_of_samples(), dtype=np.int16)
-            sample_buffer = np.append(sample_buffer, samples)
-        # handle case where we ran out of frames
-        if sample_buffer.shape[0] < required_samples:
-            empty_samples = np.zeros((required_samples - sample_buffer.shape[0]), dtype=np.int16)
-            sample_buffer = np.append(sample_buffer, empty_samples)
-        # take the first required_samples samples from the buffer
-        samples = sample_buffer[:required_samples]
-        sample_buffer = sample_buffer[required_samples:]
         # Duplicate mono channel for stereo
         if output_channels == 2:
@@ -143,14 +120,10 @@ def process_frame(old_frame):
 def audio_frame_callback(old_frame: av.AudioFrame) -> av.AudioFrame:
-    global debug_frame_idx, debug_frames
     new_frame = process_frame(old_frame)
-    # print (f"new_frames: {len(new_frames)}, frames: {len(frames)}")
-    print (f"frame:     {old_frame}, pts: {old_frame.pts}")
-    print (f"new_frame: {new_frame}, pts: {new_frame.pts}")
     return new_frame
     # return old_frame

 import asyncio
 import io
 import logging
+import select
+import time
 import traceback
 from typing import List
 logger = logging.getLogger(__name__)
+import subprocess
+import os
+class FFMpegConverter:
+    def __init__(self, input_format='mp3', output_format='s16le'):
+        self.input_format = input_format
+        self.output_format = output_format
+        self.input_pipe = None
+        self.output_pipe = None
+        self.process = None
+        self.start_process()
+    def start_process(self):
+        cmd = [
+            'ffmpeg',
+            # '-y',
+            # '-f', self.input_format,
+            '-i', 'pipe:0',  # read from stdin
+            '-f', self.output_format,
+            '-ar', '48000',
+            '-ac', '1',
+            # '-acodec', 'pcm_s16le',  # output format
+            'pipe:1'  # write to stdout
+        ]
+        self.process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        self.input_pipe = self.process.stdin
+        self.output_pipe = self.process.stdout
+    def push_chunk(self, chunk):
         try:
+            self.input_pipe.write(chunk)
+            self.input_pipe.flush()
+        except BrokenPipeError:
+            # If the pipe is broken, restart the process.
+            self.start_process()
+            self.input_pipe.write(chunk)
+            self.input_pipe.flush()
+    def read_output(self, num_bytes=1024):
+        frame = self.output_pipe.read(num_bytes)
+        return frame
+    def has_processed_all_data(self):
+        return self.process.poll() is not None
+    def close(self):
+        self.input_pipe.close()
+        self.output_pipe.close()
+        self.process.wait()
 def video_frame_callback(
     return frame
 with open("chunks.pkl", "rb") as f:
     import pickle
     debug_chunks = pickle.load(f)
+    converter = FFMpegConverter()
     for chunk in debug_chunks:
+        converter.push_chunk(chunk)
 # emptry array of type int16
 sample_buffer = np.zeros((0), dtype=np.int16)
         output_sample_rate = 44100
         required_samples = old_frame.samples
+        frame_as_bytes = converter.read_output(required_samples*2*1) # 2 bytes per sample, mono
+        samples = np.frombuffer(frame_as_bytes, dtype=np.int16)
         # Duplicate mono channel for stereo
         if output_channels == 2:
 def audio_frame_callback(old_frame: av.AudioFrame) -> av.AudioFrame:
     new_frame = process_frame(old_frame)
+    # print (f"frame:     {old_frame}, pts: {old_frame.pts}")
+    # print (f"new_frame: {new_frame}, pts: {new_frame.pts}")
     return new_frame
     # return old_frame

frames.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d85ef7ee28d01dab9ee6faa439f791a1647cc937e0c68e3e5a73d5bd2f071d7f
-size 117337