Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -8,6 +8,7 @@ from starlette.middleware import Middleware
|
|
8 |
from starlette.middleware.cors import CORSMiddleware
|
9 |
from pdf2image import convert_from_bytes
|
10 |
from pydub import AudioSegment
|
|
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
@@ -165,22 +166,25 @@ async def transcribe_and_match(
|
|
165 |
contents = await file.read()
|
166 |
audio = AudioSegment.from_file(BytesIO(contents))
|
167 |
|
168 |
-
#
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
172 |
|
173 |
-
#
|
174 |
-
|
|
|
|
|
|
|
175 |
transcription_text = transcription_result['text']
|
176 |
|
177 |
-
# Step
|
178 |
import json
|
179 |
fields = json.loads(field_data)
|
180 |
|
181 |
-
# Step
|
182 |
field_matches = {}
|
183 |
-
|
184 |
for field in fields:
|
185 |
field_label = field.get("field_label", "").lower()
|
186 |
field_id = field.get("field_id", "")
|
@@ -189,7 +193,7 @@ async def transcribe_and_match(
|
|
189 |
if field_label in transcription_text.lower():
|
190 |
field_matches[field_id] = transcription_text
|
191 |
|
192 |
-
# Step
|
193 |
return {
|
194 |
"transcription": transcription_text,
|
195 |
"matched_fields": field_matches
|
|
|
8 |
from starlette.middleware.cors import CORSMiddleware
|
9 |
from pdf2image import convert_from_bytes
|
10 |
from pydub import AudioSegment
|
11 |
+
import numpy as np
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
|
|
166 |
contents = await file.read()
|
167 |
audio = AudioSegment.from_file(BytesIO(contents))
|
168 |
|
169 |
+
# Convert AudioSegment to a NumPy array
|
170 |
+
# First, export to raw audio format and then load into NumPy
|
171 |
+
raw_audio = BytesIO()
|
172 |
+
audio.export(raw_audio, format="wav")
|
173 |
+
raw_audio.seek(0)
|
174 |
|
175 |
+
# Convert audio to samples as NumPy array
|
176 |
+
samples = np.array(audio.get_array_of_samples())
|
177 |
+
|
178 |
+
# Step 2: Use the speech-to-text model (expecting NumPy array)
|
179 |
+
transcription_result = nlp_speech_to_text(samples)
|
180 |
transcription_text = transcription_result['text']
|
181 |
|
182 |
+
# Step 3: Parse the field_data (which contains field names/IDs)
|
183 |
import json
|
184 |
fields = json.loads(field_data)
|
185 |
|
186 |
+
# Step 4: Find the matching field for the transcription
|
187 |
field_matches = {}
|
|
|
188 |
for field in fields:
|
189 |
field_label = field.get("field_label", "").lower()
|
190 |
field_id = field.get("field_id", "")
|
|
|
193 |
if field_label in transcription_text.lower():
|
194 |
field_matches[field_id] = transcription_text
|
195 |
|
196 |
+
# Step 5: Return transcription + matched fields
|
197 |
return {
|
198 |
"transcription": transcription_text,
|
199 |
"matched_fields": field_matches
|