MJobe commited on
Commit
4556b98
1 Parent(s): a191415

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +6 -7
main.py CHANGED
@@ -9,6 +9,7 @@ from starlette.middleware.cors import CORSMiddleware
9
  from pdf2image import convert_from_bytes
10
  from pydub import AudioSegment
11
  import numpy as np
 
12
 
13
  app = FastAPI()
14
 
@@ -166,21 +167,19 @@ async def transcribe_and_match(
166
  contents = await file.read()
167
  audio = AudioSegment.from_file(BytesIO(contents))
168
 
169
- # Convert AudioSegment to a NumPy array
170
- # First, export to raw audio format and then load into NumPy
171
  raw_audio = BytesIO()
172
  audio.export(raw_audio, format="wav")
173
  raw_audio.seek(0)
174
 
175
- # Convert audio to samples as NumPy array (convert to float64)
176
  samples = np.array(audio.get_array_of_samples()).astype(np.float64)
177
 
178
  # Step 2: Use the speech-to-text model (expecting NumPy array of float64)
179
- transcription_result = nlp_speech_to_text(samples)
180
  transcription_text = transcription_result['text']
181
 
182
  # Step 3: Parse the field_data (which contains field names/IDs)
183
- import json
184
  fields = json.loads(field_data)
185
 
186
  # Step 4: Find the matching field for the transcription
@@ -189,7 +188,7 @@ async def transcribe_and_match(
189
  field_label = field.get("field_label", "").lower()
190
  field_id = field.get("field_id", "")
191
 
192
- # Simple matching: if the transcribed text contains the field label (or something close)
193
  if field_label in transcription_text.lower():
194
  field_matches[field_id] = transcription_text
195
 
@@ -200,7 +199,7 @@ async def transcribe_and_match(
200
  }
201
 
202
  except Exception as e:
203
- return JSONResponse(content=f"Error processing audio or matching fields: {str(e)}", status_code=500)
204
 
205
  # Set up CORS middleware
206
  origins = ["*"] # or specify your list of allowed origins
 
9
  from pdf2image import convert_from_bytes
10
  from pydub import AudioSegment
11
  import numpy as np
12
+ import json
13
 
14
  app = FastAPI()
15
 
 
167
  contents = await file.read()
168
  audio = AudioSegment.from_file(BytesIO(contents))
169
 
170
+ # Convert AudioSegment to raw audio format in WAV
 
171
  raw_audio = BytesIO()
172
  audio.export(raw_audio, format="wav")
173
  raw_audio.seek(0)
174
 
175
+ # Load the raw audio into a NumPy array
176
  samples = np.array(audio.get_array_of_samples()).astype(np.float64)
177
 
178
  # Step 2: Use the speech-to-text model (expecting NumPy array of float64)
179
+ transcription_result = nlp_speech_to_text(raw_audio)
180
  transcription_text = transcription_result['text']
181
 
182
  # Step 3: Parse the field_data (which contains field names/IDs)
 
183
  fields = json.loads(field_data)
184
 
185
  # Step 4: Find the matching field for the transcription
 
188
  field_label = field.get("field_label", "").lower()
189
  field_id = field.get("field_id", "")
190
 
191
+ # Simple matching: if the transcribed text contains the field label
192
  if field_label in transcription_text.lower():
193
  field_matches[field_id] = transcription_text
194
 
 
199
  }
200
 
201
  except Exception as e:
202
+ return JSONResponse(content={"error": f"Error processing audio or matching fields: {str(e)}"}, status_code=500)
203
 
204
  # Set up CORS middleware
205
  origins = ["*"] # or specify your list of allowed origins