Spaces:
Running
Running
add openai whisper
Browse files
app.py
CHANGED
@@ -1,11 +1,14 @@
|
|
1 |
import gradio as gr
|
2 |
import yt_dlp
|
3 |
import os
|
|
|
4 |
|
5 |
-
|
|
|
|
|
6 |
# tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, or large
|
7 |
-
model_name = 'base'
|
8 |
-
model = WhisperModel(model_name, device="cpu", download_root="./models")
|
9 |
|
10 |
ydl_opts = {
|
11 |
'outtmpl': 'demo.m4a',
|
@@ -24,13 +27,15 @@ def download_audio(url):
|
|
24 |
code = ydl.download([url])
|
25 |
assert code == 0, "Failed to download audio"
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
|
35 |
with gr.Blocks() as demo:
|
36 |
with gr.Column():
|
@@ -41,10 +46,9 @@ with gr.Blocks() as demo:
|
|
41 |
output = gr.TextArea(label="Output")
|
42 |
|
43 |
button.click(
|
44 |
-
|
45 |
inputs=[name],
|
46 |
outputs=[output],
|
47 |
)
|
48 |
|
49 |
-
|
50 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import yt_dlp
|
3 |
import os
|
4 |
+
from openai import OpenAI
|
5 |
|
6 |
+
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
|
7 |
+
|
8 |
+
# from faster_whisper import WhisperModel
|
9 |
# tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, or large
|
10 |
+
# model_name = 'base'
|
11 |
+
# model = WhisperModel(model_name, device="cpu", download_root="./models")
|
12 |
|
13 |
ydl_opts = {
|
14 |
'outtmpl': 'demo.m4a',
|
|
|
27 |
code = ydl.download([url])
|
28 |
assert code == 0, "Failed to download audio"
|
29 |
|
30 |
+
def generate_text(url):
|
31 |
+
download_audio(url)
|
32 |
+
with open("demo.m4a", "rb") as f:
|
33 |
+
transcription = client.audio.transcriptions.create(
|
34 |
+
model="whisper-1",
|
35 |
+
file=f,
|
36 |
+
response_format="text"
|
37 |
+
)
|
38 |
+
return transcription.text
|
39 |
|
40 |
with gr.Blocks() as demo:
|
41 |
with gr.Column():
|
|
|
46 |
output = gr.TextArea(label="Output")
|
47 |
|
48 |
button.click(
|
49 |
+
generate_text,
|
50 |
inputs=[name],
|
51 |
outputs=[output],
|
52 |
)
|
53 |
|
|
|
54 |
demo.launch()
|
utils.py
CHANGED
@@ -1,43 +0,0 @@
|
|
1 |
-
import google.generativeai as genai
|
2 |
-
|
3 |
-
API_KEY = "AIzaSyCkqv9dWrlbRjv9fHO_O8jBORGfYVPJTnY"
|
4 |
-
|
5 |
-
def call_gemini(prompt="", given_text=None, given_image=None, generation_config=None, safety_settings=None):
|
6 |
-
genai.configure(api_key=API_KEY)
|
7 |
-
generation_config = {
|
8 |
-
"temperature": 0.8,
|
9 |
-
"top_p": 1,
|
10 |
-
"top_k": 32,
|
11 |
-
"max_output_tokens": 8192,
|
12 |
-
}
|
13 |
-
|
14 |
-
safety_settings = [
|
15 |
-
{
|
16 |
-
"category": "HARM_CATEGORY_HARASSMENT",
|
17 |
-
"threshold": "BLOCK_ONLY_HIGH"
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"category": "HARM_CATEGORY_HATE_SPEECH",
|
21 |
-
"threshold": "BLOCK_ONLY_HIGH"
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
25 |
-
"threshold": "BLOCK_ONLY_HIGH"
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
29 |
-
"threshold": "BLOCK_ONLY_HIGH"
|
30 |
-
},
|
31 |
-
]
|
32 |
-
|
33 |
-
model = genai.GenerativeModel(model_name='gemini-pro',
|
34 |
-
generation_config=generation_config,
|
35 |
-
safety_settings=safety_settings)
|
36 |
-
|
37 |
-
prompt_parts = "如何学习rust语言?"
|
38 |
-
|
39 |
-
response = model.generate_content(prompt_parts)
|
40 |
-
print(response.text)
|
41 |
-
|
42 |
-
if __name__ == "__main__":
|
43 |
-
call_gemini()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|