File size: 944 Bytes
be0bb46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# llava.py

from speech_to_text import transcribe_audio
from text_to_speech import text_to_speech_file
import google.generativeai as genai
import os
from dotenv import load_dotenv

load_dotenv()

# Configure the Generative AI model
GENAI_API_KEY = os.getenv("GENAI_API_KEY")
genai.configure(api_key=GENAI_API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

def generate_response(prompt: str) -> str:
    response = model.generate_content(prompt)
    return response.text

def main(audio_file: str) -> str:
    # Transcribe audio to text
    transcript = transcribe_audio(audio_file)
    if not transcript:
        return "Transcription failed."

    # Generate response from the LLM
    response_text = generate_response(transcript)

    # Convert response text to speech
    if response_text:
        audio_output = text_to_speech_file(response_text)
        return audio_output
    else:
        return "Failed to generate response."