import gradio as gr import openai import os #import torch #device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #print("Device type:", device) #from transformers import pipeline #from transformers import ( # AutoTokenizer, # WhisperProcessor, # WhisperForConditionalGeneration, #) api_key = os.getenv('OPEN_API_KEY') openai.api_key = api_key global_history = [{"role": "assistant", "content": "Hi, I am a chatbot. I can converse in English. I can answer your questions about farming in India. Ask me anything!"}] from pydub import AudioSegment def get_asr_model_processor(model_id): processor = WhisperProcessor.from_pretrained(model_id,model_max_length=225) model = WhisperForConditionalGeneration.from_pretrained(model_id).to(device) # model.forced_decoder_ids =None model.config.max_new_token = 200 return { "model": model, "processor": processor } #model_proc_dict = get_asr_model_processor("vasista22/whisper-hindi-large-v2") #asr_pipe = pipeline("automatic-speech-recognition", model=model_proc_dict["model"], tokenizer=model_proc_dict["processor"].tokenizer, feature_extractor=model_proc_dict["processor"].feature_extractor,device=device) def get_asr_output(audio_path,lang): audio = AudioSegment.from_file(audio_path) audio.export("temp.wav", format="wav") file = open("temp.wav","rb") transcription = openai.Audio.transcribe("whisper-1", file, language=lang) op_text = transcription.text """ if lang == "hi": op_text = asr_pipe("temp.wav")['text'] print('whisper',transcription) print('ai4b',op_text) """ return op_text def add_text(history, audio_path,lang): global global_history text = get_asr_output(audio_path,lang_dict[lang]) history = history + [(text, None)] global_history = global_history+[{"role": "user", "content": text}] print(global_history) return history, "" def get_chatgpt_response(history): output = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=history) history.append({"role": "assistant", "content": output.choices[0].message.content}) return output.choices[0].message.content, history def bot(history): global global_history response, global_history = get_chatgpt_response(global_history) history[-1][1] = response return history def clear_history(lang = "English"): global global_history global_history = [{"role": "assistant", "content": "Hi, I am a chatbot. I can converse in {}. I can answer your questions about farming in India. Ask me anything!".format(lang)}] return None lang_dict = { "English": "en", "Hindi": "hi", "Bengali": "bn", "Gujarati": "gu", "Kannada": "kn", "Marathi": "mr", "Tamil": "ta", "Telugu": "te" } with gr.Blocks(title="Krishi GPT Demo") as demo: lang = gr.Radio(list(lang_dict.keys()), label="Select a Language") with gr.Row(): with gr.Column(): user_audio = gr.Audio(source="microphone",type="filepath",label = "Speak your query") txt = gr.Textbox( show_label=True, placeholder="Enter text and press enter, or Record your Audio", visible=False, ).style(container=False) submit = gr.Button("Submit") chatbot = gr.Chatbot([], elem_id="chatbot").style(height=500) clear = gr.Button("Clear") submit.click(add_text, [chatbot, user_audio,lang], [chatbot, txt]).then(bot, chatbot, chatbot) clear.click(clear_history, [lang], chatbot, queue=False) lang.change(clear_history, [lang], chatbot, queue=False) demo.launch(share=False)