Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,7 @@ import boto3
|
|
20 |
from botocore.exceptions import NoCredentialsError
|
21 |
import time
|
22 |
import tempfile
|
|
|
23 |
|
24 |
# Import functions from other modules
|
25 |
from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
|
@@ -81,49 +82,49 @@ def extract_audio_from_file(input_bytes):
|
|
81 |
temp_file_path = temp_file.name
|
82 |
|
83 |
try:
|
84 |
-
#
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
try:
|
89 |
-
# Try to read as a video file
|
90 |
video = VideoFileClip(temp_file_path)
|
91 |
audio = video.audio
|
92 |
if audio is not None:
|
93 |
-
# Extract audio from video
|
94 |
audio_array = audio.to_soundarray()
|
95 |
sample_rate = audio.fps
|
96 |
-
|
97 |
-
# Convert to mono if stereo
|
98 |
-
if len(audio_array.shape) > 1 and audio_array.shape[1] > 1:
|
99 |
-
audio_array = audio_array.mean(axis=1)
|
100 |
-
|
101 |
-
# Ensure audio is float32 and normalized
|
102 |
audio_array = audio_array.astype(np.float32)
|
103 |
audio_array /= np.max(np.abs(audio_array))
|
104 |
-
|
105 |
video.close()
|
|
|
106 |
return audio_array, sample_rate
|
107 |
else:
|
108 |
-
|
109 |
-
except Exception:
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
raise ValueError(f"Unsupported file format: {str(e)}")
|
125 |
finally:
|
126 |
-
# Clean up the temporary file
|
127 |
os.unlink(temp_file_path)
|
128 |
|
129 |
@app.post("/transcribe")
|
@@ -342,7 +343,8 @@ async def identify_language_file(
|
|
342 |
processing_time = time.time() - start_time
|
343 |
return JSONResponse(
|
344 |
status_code=500,
|
345 |
-
content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time}
|
|
|
346 |
|
347 |
@app.post("/asr_languages")
|
348 |
async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
|
@@ -392,12 +394,10 @@ async def get_tts_languages(request: LanguageRequest, api_key: APIKey = Depends(
|
|
392 |
content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
|
393 |
)
|
394 |
|
395 |
-
# If you want to add a health check endpoint
|
396 |
@app.get("/health")
|
397 |
async def health_check():
|
398 |
return {"status": "ok"}
|
399 |
|
400 |
-
# You might also want to add a root endpoint that provides basic API information
|
401 |
@app.get("/")
|
402 |
async def root():
|
403 |
return {
|
|
|
20 |
from botocore.exceptions import NoCredentialsError
|
21 |
import time
|
22 |
import tempfile
|
23 |
+
import magic
|
24 |
|
25 |
# Import functions from other modules
|
26 |
from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
|
|
|
82 |
temp_file_path = temp_file.name
|
83 |
|
84 |
try:
|
85 |
+
# Log file info
|
86 |
+
file_info = magic.from_file(temp_file_path, mime=True)
|
87 |
+
logger.info(f"Received file of type: {file_info}")
|
88 |
+
|
89 |
+
# Try reading with soundfile first
|
90 |
+
try:
|
91 |
+
audio_array, sample_rate = sf.read(temp_file_path)
|
92 |
+
logger.info(f"Successfully read audio with soundfile. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
|
93 |
+
return audio_array, sample_rate
|
94 |
+
except Exception as e:
|
95 |
+
logger.info(f"Could not read with soundfile: {str(e)}")
|
96 |
+
|
97 |
+
# Try reading as video
|
98 |
try:
|
|
|
99 |
video = VideoFileClip(temp_file_path)
|
100 |
audio = video.audio
|
101 |
if audio is not None:
|
|
|
102 |
audio_array = audio.to_soundarray()
|
103 |
sample_rate = audio.fps
|
104 |
+
audio_array = audio_array.mean(axis=1) if len(audio_array.shape) > 1 and audio_array.shape[1] > 1 else audio_array
|
|
|
|
|
|
|
|
|
|
|
105 |
audio_array = audio_array.astype(np.float32)
|
106 |
audio_array /= np.max(np.abs(audio_array))
|
|
|
107 |
video.close()
|
108 |
+
logger.info(f"Successfully extracted audio from video. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
|
109 |
return audio_array, sample_rate
|
110 |
else:
|
111 |
+
logger.info("Video file contains no audio")
|
112 |
+
except Exception as e:
|
113 |
+
logger.info(f"Could not read as video: {str(e)}")
|
114 |
+
|
115 |
+
# Try reading with pydub
|
116 |
+
try:
|
117 |
+
audio = AudioSegment.from_file(temp_file_path)
|
118 |
+
audio_array = np.array(audio.get_array_of_samples())
|
119 |
+
audio_array = audio_array.astype(np.float32) / (2**15 if audio.sample_width == 2 else 2**7)
|
120 |
+
audio_array = audio_array.reshape((-1, 2)).mean(axis=1) if audio.channels == 2 else audio_array
|
121 |
+
logger.info(f"Successfully read audio with pydub. Shape: {audio_array.shape}, Sample rate: {audio.frame_rate}")
|
122 |
+
return audio_array, audio.frame_rate
|
123 |
+
except Exception as e:
|
124 |
+
logger.info(f"Could not read with pydub: {str(e)}")
|
125 |
+
|
126 |
+
raise ValueError(f"Unsupported file format: {file_info}")
|
|
|
127 |
finally:
|
|
|
128 |
os.unlink(temp_file_path)
|
129 |
|
130 |
@app.post("/transcribe")
|
|
|
343 |
processing_time = time.time() - start_time
|
344 |
return JSONResponse(
|
345 |
status_code=500,
|
346 |
+
content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time}
|
347 |
+
)
|
348 |
|
349 |
@app.post("/asr_languages")
|
350 |
async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
|
|
|
394 |
content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
|
395 |
)
|
396 |
|
|
|
397 |
@app.get("/health")
|
398 |
async def health_check():
|
399 |
return {"status": "ok"}
|
400 |
|
|
|
401 |
@app.get("/")
|
402 |
async def root():
|
403 |
return {
|