Spaces:
Running
Running
update cuda line code
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ import spaces
|
|
13 |
load_dotenv()
|
14 |
default_gemini_api_key = os.getenv('gemini_api_key')
|
15 |
|
|
|
|
|
16 |
def configure_genai(api_key, model_variant):
|
17 |
genai.configure(api_key=api_key)
|
18 |
return genai.GenerativeModel(model_variant)
|
@@ -73,7 +75,6 @@ def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_pr
|
|
73 |
gemini_api_key = default_gemini_api_key
|
74 |
model = configure_genai(gemini_api_key, gemini_model_variant)
|
75 |
|
76 |
-
device = 0 if torch.cuda.is_available() else "cpu"
|
77 |
pipe = pipeline(
|
78 |
task="automatic-speech-recognition",
|
79 |
model=whisper_model,
|
@@ -163,9 +164,9 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
|
|
163 |
with gr.Column():
|
164 |
input_type = gr.Radio(["YouTube URL", "Audio File"], label="Input Type", value="YouTube URL", info="Please consider using the audio file if you face any issues with the YouTube URL. Currently youtube is banning HuggingFace IP Addresses.")
|
165 |
with gr.Row():
|
166 |
-
youtube_url = gr.Textbox(label="YouTube URL", info="Input the full URL of the YouTube video you want to transcribe and summarize. Example: https://www.youtube.com/watch?v=VIDEO_ID")
|
167 |
-
audio_file = gr.File(label="Upload Audio File", visible=
|
168 |
-
whisper_model = gr.Dropdown(["openai/whisper-tiny", "openai/whisper-base", "openai/whisper-small", "openai/whisper-medium", "openai/whisper-large-v3"], label="Whisper Model", value="
|
169 |
gemini_model_variant = gr.Dropdown(["gemini-1.5-flash", "gemini-1.5-pro"], label="Gemini Model Variant", value="gemini-1.5-pro", info="Gemini-1.5-flash is the fastest model, but it's not the best quality. Gemini-1.5-pro is the best quality, but it's slower")
|
170 |
define_language = gr.Checkbox(label="Define Language", value=False, info="If you want to define the language, check this box")
|
171 |
language = gr.Dropdown(["id","en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"], label="Language", value=None, info="Select the language for transcription", visible=False)
|
|
|
13 |
load_dotenv()
|
14 |
default_gemini_api_key = os.getenv('gemini_api_key')
|
15 |
|
16 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
17 |
+
|
18 |
def configure_genai(api_key, model_variant):
|
19 |
genai.configure(api_key=api_key)
|
20 |
return genai.GenerativeModel(model_variant)
|
|
|
75 |
gemini_api_key = default_gemini_api_key
|
76 |
model = configure_genai(gemini_api_key, gemini_model_variant)
|
77 |
|
|
|
78 |
pipe = pipeline(
|
79 |
task="automatic-speech-recognition",
|
80 |
model=whisper_model,
|
|
|
164 |
with gr.Column():
|
165 |
input_type = gr.Radio(["YouTube URL", "Audio File"], label="Input Type", value="YouTube URL", info="Please consider using the audio file if you face any issues with the YouTube URL. Currently youtube is banning HuggingFace IP Addresses.")
|
166 |
with gr.Row():
|
167 |
+
youtube_url = gr.Textbox(label="YouTube URL", visible=False, info="Input the full URL of the YouTube video you want to transcribe and summarize. Example: https://www.youtube.com/watch?v=VIDEO_ID")
|
168 |
+
audio_file = gr.File(label="Upload Audio File", visible=True, file_types=['.wav', '.flac', '.mp3'])
|
169 |
+
whisper_model = gr.Dropdown(["openai/whisper-tiny", "openai/whisper-base", "openai/whisper-small", "openai/whisper-medium", "openai/whisper-large-v3", "distil-whisper/distil-large-v3"], label="Whisper Model", value="distil-whisper/distil-large-v3", info="Tiny is the fastest model, but it's not the best quality. large-v3 is the best quality, but it's the slowest model.")
|
170 |
gemini_model_variant = gr.Dropdown(["gemini-1.5-flash", "gemini-1.5-pro"], label="Gemini Model Variant", value="gemini-1.5-pro", info="Gemini-1.5-flash is the fastest model, but it's not the best quality. Gemini-1.5-pro is the best quality, but it's slower")
|
171 |
define_language = gr.Checkbox(label="Define Language", value=False, info="If you want to define the language, check this box")
|
172 |
language = gr.Dropdown(["id","en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"], label="Language", value=None, info="Select the language for transcription", visible=False)
|