MusIre commited on
Commit
b2593bc
·
1 Parent(s): 774e3cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -45
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import subprocess
2
- import gradio as gr # Add this import statement
3
 
4
  subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
5
  subprocess.run(["pip", "install", "gradio", "--upgrade"])
@@ -8,50 +8,35 @@ subprocess.run(["pip", "install", "numpy"])
8
  subprocess.run(["pip", "install", "pydub"])
9
  subprocess.run(["pip", "install", "openai"])
10
 
11
- import gradio as gr
12
- import openai
13
- import soundfile as sf
14
- import numpy as np
15
- from pydub import AudioSegment
16
- from io import BytesIO
17
-
18
- # Set your OpenAI API key
19
- openai.api_key = "YOUR_OPENAI_API_KEY"
20
-
21
- # Whisper ASR model
22
- whisper_model = "whisper-small"
23
-
24
- # Define the Gradio interface
25
- iface = gr.Interface(
26
- fn=None, # To be defined later
27
- inputs=gr.Audio(),
28
- outputs=gr.Textbox(),
29
- live=True,
30
- )
31
-
32
- # Define the function for ASR
33
- def transcribe_audio(audio_data):
34
- # Convert the audio data to a suitable format
35
- audio = AudioSegment.from_file(BytesIO(audio_data), format="wav")
36
- audio.export("temp.wav", format="wav")
37
-
38
- # Load the audio file using soundfile
39
- audio_array, _ = sf.read("temp.wav")
40
-
41
- # Perform ASR using OpenAI's Whisper
42
- response = openai.Completion.create(
43
- engine=whisper_model,
44
- audio_input=audio_array.tolist(),
45
- content_type="audio/wav",
46
- )
47
-
48
- # Extract the transcribed text from the response
49
- transcription = response["choices"][0]["text"].strip()
50
 
51
- return transcription # This is the transcription result
 
 
52
 
53
- # Set the function for the Gradio interface
54
- iface.fn = transcribe_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Launch the Gradio app
57
- iface.launch()
 
 
1
  import subprocess
2
+
3
 
4
  subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
5
  subprocess.run(["pip", "install", "gradio", "--upgrade"])
 
8
  subprocess.run(["pip", "install", "pydub"])
9
  subprocess.run(["pip", "install", "openai"])
10
 
11
+ import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ subprocess.run(["pip", "install", "datasets"])
14
+ subprocess.run(["pip", "install", "transformers"])
15
+ subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
16
 
17
+ import gradio as gr
18
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
19
+
20
+ # Load model and processor
21
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large")
22
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
23
+ model.config.forced_decoder_ids = None
24
+
25
+ # Custom preprocessing function
26
+ def preprocess_audio(audio_data):
27
+ # Apply any custom preprocessing to the audio data here if needed
28
+ return processor(audio_data, return_tensors="pt").input_features
29
+
30
+ # Function to perform ASR on audio data
31
+ def transcribe_audio(input_features):
32
+ # Generate token ids
33
+ predicted_ids = model.generate(input_features)
34
+
35
+ # Decode token ids to text
36
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
37
+
38
+ return transcription[0]
39
 
40
+ # Create Gradio interface
41
+ audio_input = gr.Audio(preprocess=preprocess_audio)
42
+ gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()