import openai import whisper from gtts import gTTS model = whisper.load_model("small") def transcribe(filepath): audio = whisper.load_audio(filepath) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) global language language = max(probs, key=probs.get) options = whisper.DecodingOptions(fp16=False) result = whisper.decode(model, mel, options) return result.text def answer_by_chat( question, role1, content1, role2, content2, role3, content3, role4, content4, role5, content5, api_key, ): openai.api_key = api_key messages = [ {"role": role, "content": content} for role, content in [ [role1, content1], [role2, content2], [role3, content3], [role4, content4], [role5, content5], ] if role != "" and content != "" ] messages.append({"role": "user", "content": question}) response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages) response_text = response["choices"][0]["message"]["content"] response_audio = speech_synthesis(response_text) return response_text, response_audio def speech_synthesis(sentence): tts = gTTS(sentence, lang=language) tts.save("tmp.mp3") return "tmp.mp3"