Blane187 commited on
Commit
0f2423e
·
verified ·
1 Parent(s): 0d122a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -56
app.py CHANGED
@@ -9,12 +9,78 @@ import re
9
  import gradio as gr
10
  import groq
11
  from groq import Groq
 
12
 
 
13
 
14
  # setup groq
15
 
16
  client = Groq(api_key=os.environ.get("Groq_Api_Key"))
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def handle_groq_error(e, model_name):
19
  error_data = e.args[0]
20
 
@@ -359,7 +425,7 @@ def translate_audio(audio_file_path, model, prompt):
359
  handle_groq_error(e, model)
360
 
361
 
362
- with gr.Blocks() as interface:
363
  gr.Markdown(
364
  """
365
  # Groq API UI
@@ -430,61 +496,81 @@ with gr.Blocks() as interface:
430
 
431
 
432
  with gr.TabItem("LLMs"):
433
- with gr.Row():
434
- with gr.Column(scale=1, min_width=250):
435
- model = gr.Dropdown(
436
- choices=[
437
- "llama3-70b-8192",
438
- "llama3-8b-8192",
439
- "mixtral-8x7b-32768",
440
- "gemma-7b-it",
441
- "gemma2-9b-it",
442
- ],
443
- value="llama3-70b-8192",
444
- label="Model",
445
- )
446
- temperature = gr.Slider(
447
- minimum=0.0,
448
- maximum=1.0,
449
- step=0.01,
450
- value=0.5,
451
- label="Temperature",
452
- info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative.",
453
- )
454
- max_tokens = gr.Slider(
455
- minimum=1,
456
- maximum=8192,
457
- step=1,
458
- value=4096,
459
- label="Max Tokens",
460
- info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b.",
461
- )
462
- top_p = gr.Slider(
463
- minimum=0.0,
464
- maximum=1.0,
465
- step=0.01,
466
- value=0.5,
467
- label="Top P",
468
- info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p.",
469
- )
470
- seed = gr.Number(
471
- precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random"
472
- )
473
- model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
474
- with gr.Column(scale=1, min_width=400):
475
- chatbot = gr.ChatInterface(
476
- fn=generate_response,
477
- chatbot=None,
478
- additional_inputs=[
479
- model,
480
- temperature,
481
- max_tokens,
482
- top_p,
483
- seed,
484
- ],
485
- )
486
- model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
487
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
 
490
  interface.launch(share=True)
 
9
  import gradio as gr
10
  import groq
11
  from groq import Groq
12
+ import io
13
 
14
+ import soundfile as sf
15
 
16
  # setup groq
17
 
18
  client = Groq(api_key=os.environ.get("Groq_Api_Key"))
19
 
20
+
21
+
22
+
23
+
24
+
25
+ def transcribe_audio(audio):
26
+ if audio is None:
27
+ return ""
28
+
29
+ client = groq.Client(api_key=os.environ.get("Groq_Api_Key"))
30
+
31
+ # Convert audio to the format expected by the model
32
+ # The model supports mp3, mp4, mpeg, mpga, m4a, wav, and webm file types
33
+ audio_data = audio[1] # Get the numpy array from the tuple
34
+ buffer = io.BytesIO()
35
+ sf.write(buffer, audio_data, audio[0], format='wav')
36
+ buffer.seek(0)
37
+
38
+ bytes_audio = io.BytesIO()
39
+ np.save(bytes_audio, audio_data)
40
+ bytes_audio.seek(0)
41
+
42
+ try:
43
+ # Use Distil-Whisper English powered by Groq for transcription
44
+ completion = client.audio.transcriptions.create(
45
+ model="distil-whisper-large-v3-en",
46
+ file=("audio.wav", buffer),
47
+ response_format="text"
48
+ )
49
+ return completion
50
+ except Exception as e:
51
+ return f"Error in transcription: {str(e)}"
52
+
53
+ def generate_response(transcription, api_key):
54
+ if not transcription:
55
+ return "No transcription available. Please try speaking again."
56
+
57
+ client = groq.Client(api_key=api_key)
58
+
59
+ try:
60
+ # Use Llama 3 70B powered by Groq for text generation
61
+ completion = client.chat.completions.create(
62
+ model="llama3-70b-8192",
63
+ messages=[
64
+ {"role": "system", "content": "You are a helpful assistant."},
65
+ {"role": "user", "content": transcription}
66
+ ],
67
+ )
68
+ return completion.choices[0].message.content
69
+ except Exception as e:
70
+ return f"Error in response generation: {str(e)}"
71
+
72
+ def process_audio(audio, api_key):
73
+ if not api_key:
74
+ return "Please enter your Groq API key.", "API key is required."
75
+ transcription = transcribe_audio(audio, api_key)
76
+ response = generate_response(transcription, api_key)
77
+ return transcription, response
78
+
79
+
80
+
81
+
82
+
83
+
84
  def handle_groq_error(e, model_name):
85
  error_data = e.args[0]
86
 
 
425
  handle_groq_error(e, model)
426
 
427
 
428
+ with gr.Blocks(theme="Hev832/niceandsimple") as interface:
429
  gr.Markdown(
430
  """
431
  # Groq API UI
 
496
 
497
 
498
  with gr.TabItem("LLMs"):
499
+ with gr.Tab("Chat"):
500
+ with gr.Row():
501
+ with gr.Column(scale=1, min_width=250):
502
+ model = gr.Dropdown(
503
+ choices=[
504
+ "llama3-70b-8192",
505
+ "llama3-8b-8192",
506
+ "mixtral-8x7b-32768",
507
+ "gemma-7b-it",
508
+ "gemma2-9b-it",
509
+ ],
510
+ value="llama3-70b-8192",
511
+ label="Model",
512
+ )
513
+ temperature = gr.Slider(
514
+ minimum=0.0,
515
+ maximum=1.0,
516
+ step=0.01,
517
+ value=0.5,
518
+ label="Temperature",
519
+ info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative.",
520
+ )
521
+ max_tokens = gr.Slider(
522
+ minimum=1,
523
+ maximum=8192,
524
+ step=1,
525
+ value=4096,
526
+ label="Max Tokens",
527
+ info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b.",
528
+ )
529
+ top_p = gr.Slider(
530
+ minimum=0.0,
531
+ maximum=1.0,
532
+ step=0.01,
533
+ value=0.5,
534
+ label="Top P",
535
+ info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p.",
536
+ )
537
+ seed = gr.Number(
538
+ precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random"
539
+ )
540
+ model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
541
+ with gr.Column(scale=1, min_width=400):
542
+ chatbot = gr.ChatInterface(
543
+ fn=generate_response,
544
+ chatbot=None,
545
+ additional_inputs=[
546
+ model,
547
+ temperature,
548
+ max_tokens,
549
+ top_p,
550
+ seed,
551
+ ],
552
+ )
553
+ model.change(
554
+ update_max_tokens,
555
+ inputs=[
556
+ model,
557
+ ],
558
+ outputs=max_tokens,
559
+ )
560
+ with gr.Tab("Voice-Powered AI Assistant"):
561
+ with gr.Row():
562
+ audio_input = gr.Audio(label="Speak!", type="numpy")
563
+
564
+ with gr.Row():
565
+ transcription_output = gr.Textbox(label="Transcription")
566
+ response_output = gr.Textbox(label="AI Assistant Response")
567
+ submit_button = gr.Button("Process", variant="primary")
568
+
569
+ submit_button.click(
570
+ process_audio,
571
+ inputs=[audio_input, api_key_input],
572
+ outputs=[transcription_output, response_output]
573
+ )
574
 
575
 
576
  interface.launch(share=True)