hadxu commited on
Commit
cd864c2
1 Parent(s): 65c9185

add openai whisper

Browse files
Files changed (2) hide show
  1. app.py +16 -12
  2. utils.py +0 -43
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import gradio as gr
2
  import yt_dlp
3
  import os
 
4
 
5
- from faster_whisper import WhisperModel
 
 
6
  # tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, or large
7
- model_name = 'base'
8
- model = WhisperModel(model_name, device="cpu", download_root="./models")
9
 
10
  ydl_opts = {
11
  'outtmpl': 'demo.m4a',
@@ -24,13 +27,15 @@ def download_audio(url):
24
  code = ydl.download([url])
25
  assert code == 0, "Failed to download audio"
26
 
27
- segments, info = model.transcribe("demo.m4a", beam_size=5)
28
- print("Transcript:", info.language)
29
- partial_message = ""
30
- for segment in segments:
31
- msg = "[%.2fs -> %.2fs] %s\n" % (segment.start, segment.end, segment.text)
32
- partial_message += msg
33
- yield partial_message
 
 
34
 
35
  with gr.Blocks() as demo:
36
  with gr.Column():
@@ -41,10 +46,9 @@ with gr.Blocks() as demo:
41
  output = gr.TextArea(label="Output")
42
 
43
  button.click(
44
- download_audio,
45
  inputs=[name],
46
  outputs=[output],
47
  )
48
 
49
-
50
  demo.launch()
 
1
  import gradio as gr
2
  import yt_dlp
3
  import os
4
+ from openai import OpenAI
5
 
6
+ client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
7
+
8
+ # from faster_whisper import WhisperModel
9
  # tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, or large
10
+ # model_name = 'base'
11
+ # model = WhisperModel(model_name, device="cpu", download_root="./models")
12
 
13
  ydl_opts = {
14
  'outtmpl': 'demo.m4a',
 
27
  code = ydl.download([url])
28
  assert code == 0, "Failed to download audio"
29
 
30
+ def generate_text(url):
31
+ download_audio(url)
32
+ with open("demo.m4a", "rb") as f:
33
+ transcription = client.audio.transcriptions.create(
34
+ model="whisper-1",
35
+ file=f,
36
+ response_format="text"
37
+ )
38
+ return transcription.text
39
 
40
  with gr.Blocks() as demo:
41
  with gr.Column():
 
46
  output = gr.TextArea(label="Output")
47
 
48
  button.click(
49
+ generate_text,
50
  inputs=[name],
51
  outputs=[output],
52
  )
53
 
 
54
  demo.launch()
utils.py CHANGED
@@ -1,43 +0,0 @@
1
- import google.generativeai as genai
2
-
3
- API_KEY = "AIzaSyCkqv9dWrlbRjv9fHO_O8jBORGfYVPJTnY"
4
-
5
- def call_gemini(prompt="", given_text=None, given_image=None, generation_config=None, safety_settings=None):
6
- genai.configure(api_key=API_KEY)
7
- generation_config = {
8
- "temperature": 0.8,
9
- "top_p": 1,
10
- "top_k": 32,
11
- "max_output_tokens": 8192,
12
- }
13
-
14
- safety_settings = [
15
- {
16
- "category": "HARM_CATEGORY_HARASSMENT",
17
- "threshold": "BLOCK_ONLY_HIGH"
18
- },
19
- {
20
- "category": "HARM_CATEGORY_HATE_SPEECH",
21
- "threshold": "BLOCK_ONLY_HIGH"
22
- },
23
- {
24
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
25
- "threshold": "BLOCK_ONLY_HIGH"
26
- },
27
- {
28
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
29
- "threshold": "BLOCK_ONLY_HIGH"
30
- },
31
- ]
32
-
33
- model = genai.GenerativeModel(model_name='gemini-pro',
34
- generation_config=generation_config,
35
- safety_settings=safety_settings)
36
-
37
- prompt_parts = "如何学习rust语言?"
38
-
39
- response = model.generate_content(prompt_parts)
40
- print(response.text)
41
-
42
- if __name__ == "__main__":
43
- call_gemini()