Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -9,6 +9,7 @@ from starlette.middleware.cors import CORSMiddleware
|
|
9 |
from pdf2image import convert_from_bytes
|
10 |
from pydub import AudioSegment
|
11 |
import numpy as np
|
|
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
@@ -166,21 +167,19 @@ async def transcribe_and_match(
|
|
166 |
contents = await file.read()
|
167 |
audio = AudioSegment.from_file(BytesIO(contents))
|
168 |
|
169 |
-
# Convert AudioSegment to
|
170 |
-
# First, export to raw audio format and then load into NumPy
|
171 |
raw_audio = BytesIO()
|
172 |
audio.export(raw_audio, format="wav")
|
173 |
raw_audio.seek(0)
|
174 |
|
175 |
-
#
|
176 |
samples = np.array(audio.get_array_of_samples()).astype(np.float64)
|
177 |
|
178 |
# Step 2: Use the speech-to-text model (expecting NumPy array of float64)
|
179 |
-
transcription_result = nlp_speech_to_text(
|
180 |
transcription_text = transcription_result['text']
|
181 |
|
182 |
# Step 3: Parse the field_data (which contains field names/IDs)
|
183 |
-
import json
|
184 |
fields = json.loads(field_data)
|
185 |
|
186 |
# Step 4: Find the matching field for the transcription
|
@@ -189,7 +188,7 @@ async def transcribe_and_match(
|
|
189 |
field_label = field.get("field_label", "").lower()
|
190 |
field_id = field.get("field_id", "")
|
191 |
|
192 |
-
# Simple matching: if the transcribed text contains the field label
|
193 |
if field_label in transcription_text.lower():
|
194 |
field_matches[field_id] = transcription_text
|
195 |
|
@@ -200,7 +199,7 @@ async def transcribe_and_match(
|
|
200 |
}
|
201 |
|
202 |
except Exception as e:
|
203 |
-
return JSONResponse(content=f"Error processing audio or matching fields: {str(e)}", status_code=500)
|
204 |
|
205 |
# Set up CORS middleware
|
206 |
origins = ["*"] # or specify your list of allowed origins
|
|
|
9 |
from pdf2image import convert_from_bytes
|
10 |
from pydub import AudioSegment
|
11 |
import numpy as np
|
12 |
+
import json
|
13 |
|
14 |
app = FastAPI()
|
15 |
|
|
|
167 |
contents = await file.read()
|
168 |
audio = AudioSegment.from_file(BytesIO(contents))
|
169 |
|
170 |
+
# Convert AudioSegment to raw audio format in WAV
|
|
|
171 |
raw_audio = BytesIO()
|
172 |
audio.export(raw_audio, format="wav")
|
173 |
raw_audio.seek(0)
|
174 |
|
175 |
+
# Load the raw audio into a NumPy array
|
176 |
samples = np.array(audio.get_array_of_samples()).astype(np.float64)
|
177 |
|
178 |
# Step 2: Use the speech-to-text model (expecting NumPy array of float64)
|
179 |
+
transcription_result = nlp_speech_to_text(raw_audio)
|
180 |
transcription_text = transcription_result['text']
|
181 |
|
182 |
# Step 3: Parse the field_data (which contains field names/IDs)
|
|
|
183 |
fields = json.loads(field_data)
|
184 |
|
185 |
# Step 4: Find the matching field for the transcription
|
|
|
188 |
field_label = field.get("field_label", "").lower()
|
189 |
field_id = field.get("field_id", "")
|
190 |
|
191 |
+
# Simple matching: if the transcribed text contains the field label
|
192 |
if field_label in transcription_text.lower():
|
193 |
field_matches[field_id] = transcription_text
|
194 |
|
|
|
199 |
}
|
200 |
|
201 |
except Exception as e:
|
202 |
+
return JSONResponse(content={"error": f"Error processing audio or matching fields: {str(e)}"}, status_code=500)
|
203 |
|
204 |
# Set up CORS middleware
|
205 |
origins = ["*"] # or specify your list of allowed origins
|