Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -10,6 +10,7 @@ from pdf2image import convert_from_bytes
|
|
10 |
from pydub import AudioSegment
|
11 |
import numpy as np
|
12 |
import json
|
|
|
13 |
|
14 |
app = FastAPI()
|
15 |
|
@@ -167,22 +168,25 @@ async def transcribe_and_match(
|
|
167 |
contents = await file.read()
|
168 |
audio = AudioSegment.from_file(BytesIO(contents))
|
169 |
|
170 |
-
#
|
171 |
-
|
172 |
-
audio.export(
|
173 |
-
|
174 |
|
175 |
-
# Load
|
176 |
-
|
|
|
|
|
|
|
177 |
|
178 |
-
# Step
|
179 |
-
transcription_result = nlp_speech_to_text(
|
180 |
transcription_text = transcription_result['text']
|
181 |
|
182 |
-
# Step
|
183 |
fields = json.loads(field_data)
|
184 |
|
185 |
-
# Step
|
186 |
field_matches = {}
|
187 |
for field in fields:
|
188 |
field_label = field.get("field_label", "").lower()
|
@@ -192,7 +196,7 @@ async def transcribe_and_match(
|
|
192 |
if field_label in transcription_text.lower():
|
193 |
field_matches[field_id] = transcription_text
|
194 |
|
195 |
-
# Step
|
196 |
return {
|
197 |
"transcription": transcription_text,
|
198 |
"matched_fields": field_matches
|
|
|
10 |
from pydub import AudioSegment
|
11 |
import numpy as np
|
12 |
import json
|
13 |
+
import torchaudio
|
14 |
|
15 |
app = FastAPI()
|
16 |
|
|
|
168 |
contents = await file.read()
|
169 |
audio = AudioSegment.from_file(BytesIO(contents))
|
170 |
|
171 |
+
# Step 2: Export to WAV format and load with torchaudio
|
172 |
+
wav_buffer = BytesIO()
|
173 |
+
audio.export(wav_buffer, format="wav")
|
174 |
+
wav_buffer.seek(0)
|
175 |
|
176 |
+
# Load audio using torchaudio
|
177 |
+
waveform, sample_rate = torchaudio.load(wav_buffer)
|
178 |
+
|
179 |
+
# Convert waveform to float64 if necessary
|
180 |
+
samples = waveform.numpy().astype(np.float64)
|
181 |
|
182 |
+
# Step 3: Use the speech-to-text model
|
183 |
+
transcription_result = nlp_speech_to_text(samples, sampling_rate=sample_rate)
|
184 |
transcription_text = transcription_result['text']
|
185 |
|
186 |
+
# Step 4: Parse the field_data (which contains field names/IDs)
|
187 |
fields = json.loads(field_data)
|
188 |
|
189 |
+
# Step 5: Find the matching field for the transcription
|
190 |
field_matches = {}
|
191 |
for field in fields:
|
192 |
field_label = field.get("field_label", "").lower()
|
|
|
196 |
if field_label in transcription_text.lower():
|
197 |
field_matches[field_id] = transcription_text
|
198 |
|
199 |
+
# Step 6: Return transcription + matched fields
|
200 |
return {
|
201 |
"transcription": transcription_text,
|
202 |
"matched_fields": field_matches
|