Aryan Wadhawan commited on
Commit
a7fd32e
1 Parent(s): 1d71412

lotta changes

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -3,13 +3,13 @@ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
  import torch
4
  import phonemizer
5
  import librosa
 
6
  import base64
7
 
8
 
9
  def lark(audioAsB64):
10
- # convert b64 audio to wav
11
- with open("audio.wav", "wb") as preWaveform:
12
- preWaveform.write(base64.b64encode(audioAsB64))
13
 
14
  # processing
15
  processor = Wav2Vec2Processor.from_pretrained(
@@ -18,7 +18,7 @@ def lark(audioAsB64):
18
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
19
 
20
  waveform, sample_rate = librosa.load(
21
- "audio.wav", sr=16000
22
  ) # Downsample 44.1kHz to 8kHz
23
 
24
  input_values = processor(
 
3
  import torch
4
  import phonemizer
5
  import librosa
6
+ import io
7
  import base64
8
 
9
 
10
  def lark(audioAsB64):
11
+ # base64 to wav data conversion
12
+ wav_data = base64.b64decode(audioAsB64.encode("utf-8"))
 
13
 
14
  # processing
15
  processor = Wav2Vec2Processor.from_pretrained(
 
18
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
19
 
20
  waveform, sample_rate = librosa.load(
21
+ io.BytesIO(wav_data), sr=16000
22
  ) # Downsample 44.1kHz to 8kHz
23
 
24
  input_values = processor(