Spaces:
Build error
Build error
import os | |
import gradio as gr | |
import whisper | |
import openai | |
import tempfile | |
from neon_tts_plugin_coqui import CoquiTTS | |
model = whisper.load_model("small") | |
class Dost: | |
LANGUAGES = list(CoquiTTS.langs.keys()) | |
coquiTTS = CoquiTTS() | |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] | |
def __init__(self): | |
self.convHistory = [] | |
self.voice = None | |
self.result = [] | |
def recognize(self, audio): | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
_, probs = model.detect_language(mel) | |
lang = max(probs, key=probs.get) | |
options = whisper.DecodingOptions(fp16 = False) | |
result = whisper.decode(model, mel, options) | |
print("-------------------RECOGNIZE---------------------") | |
print(result) | |
self.response(result.text, lang) | |
def response(self, prompt, lang): | |
response = openai.Completion.create( | |
model="text-davinci-002", | |
prompt=f"You: {prompt}Friend: ", | |
temperature=0.5, | |
max_tokens=60, | |
top_p=1.0, | |
frequency_penalty=0.5, | |
presence_penalty=0.0, | |
stop=["You:"] | |
) | |
choice = response['choices'][0]['text'] | |
print("-------------------RESPONSE---------------------") | |
print(choice) | |
self.convHistory.append((prompt, choice)) | |
self.result.append(self.convHistory) | |
print(self.convHistory[0]) | |
print(type(self.convHistory[0])) | |
self.say(choice, lang) | |
def say(self, text, language): | |
coqui_langs = ['en' ,'es' ,'fr' ,'de' ,'pl' ,'uk' ,'ro' ,'hu' ,'bg' ,'nl' ,'fi' ,'sl' ,'lv' ,'ga'] | |
if language not in coqui_langs: | |
language = 'en' | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
self.coquiTTS.get_tts(text, fp, speaker = {"language" : language}) | |
print("-------------------AUDIOOUTPUT---------------------") | |
print("DONE", fp.name) | |
self.result.append(fp.name) | |
def start(self, audio, state): | |
self.convHistory = state | |
self.result = [] | |
self.recognize(audio) | |
print(self.result) | |
return tuple(self.result) | |
dost = Dost() | |
with gr.Blocks() as demo: | |
state = gr.State([]) | |
with gr.Row(): | |
with gr.Column(): | |
input_audio = gr.Audio(source="microphone", type="filepath") | |
btn = gr.Button("Submit") | |
conversation = gr.Chatbot(value=dost.convHistory) | |
output_audio = gr.Audio(label="AI voice response") | |
btn.click(dost.start, inputs=[input_audio, state], outputs=[conversation, output_audio]) | |
demo.launch(debug=True) |